diff --git a/.codegen.json b/.codegen.json index 8cb42b415..077d072bb 100644 --- a/.codegen.json +++ b/.codegen.json @@ -11,10 +11,10 @@ "toolchain": { "required": ["go"], "post_generate": [ - "go run ./bundle/internal/bundle/schema/main.go ./bundle/schema/docs/bundle_descriptions.json", + "go run ./bundle/internal/schema/*.go ./bundle/schema/jsonschema.json", "echo 'bundle/internal/tf/schema/\\*.go linguist-generated=true' >> ./.gitattributes", "echo 'go.sum linguist-generated=true' >> ./.gitattributes", - "echo 'bundle/schema/docs/bundle_descriptions.json linguist-generated=true' >> ./.gitattributes" + "echo 'bundle/schema/jsonschema.json linguist-generated=true' >> ./.gitattributes" ] } } diff --git a/.gitattributes b/.gitattributes index d82ab7696..f35c4f81d 100755 --- a/.gitattributes +++ b/.gitattributes @@ -120,4 +120,4 @@ cmd/workspace/workspace-conf/workspace-conf.go linguist-generated=true cmd/workspace/workspace/workspace.go linguist-generated=true bundle/internal/tf/schema/\*.go linguist-generated=true go.sum linguist-generated=true -bundle/schema/docs/bundle_descriptions.json linguist-generated=true +bundle/schema/jsonschema.json linguist-generated=true diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 08edfb9da..ee60da9da 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -33,7 +33,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: 1.22.7 - name: Setup Python uses: actions/setup-python@v5 @@ -68,7 +68,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: 1.22.7 # No need to download cached dependencies when running gofmt. cache: false @@ -100,18 +100,25 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: 1.22.7 # Github repo: https://github.com/ajv-validator/ajv-cli - name: Install ajv-cli run: npm install -g ajv-cli@5.0.0 # Assert that the generated bundle schema is a valid JSON schema by using - # ajv-cli to validate it against a sample configuration file. + # ajv-cli to validate it against bundle configuration files. # By default the ajv-cli runs in strict mode which will fail if the schema # itself is not valid. Strict mode is more strict than the JSON schema # specification. See for details: https://ajv.js.org/options.html#strict-mode-options - name: Validate bundle schema run: | go run main.go bundle schema > schema.json - ajv -s schema.json -d ./bundle/tests/basic/databricks.yml + + for file in ./bundle/internal/schema/testdata/pass/*.yml; do + ajv test -s schema.json -d $file --valid + done + + for file in ./bundle/internal/schema/testdata/fail/*.yml; do + ajv test -s schema.json -d $file --invalid + done diff --git a/.github/workflows/release-snapshot.yml b/.github/workflows/release-snapshot.yml index defd1c535..6a601a5f9 100644 --- a/.github/workflows/release-snapshot.yml +++ b/.github/workflows/release-snapshot.yml @@ -21,7 +21,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: 1.22.7 # The default cache key for this action considers only the `go.sum` file. # We include .goreleaser.yaml here to differentiate from the cache used by the push action diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 531fb39bf..f9742a19d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,7 +22,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: 1.22.7 # The default cache key for this action considers only the `go.sum` file. # We include .goreleaser.yaml here to differentiate from the cache used by the push action diff --git a/CHANGELOG.md b/CHANGELOG.md index d63831253..32a7e5cfa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Version changelog +## [Release] Release v0.228.1 + +Bundles: + * Added listing cluster filtering for cluster lookups ([#1754](https://github.com/databricks/cli/pull/1754)). + * Expand library globs relative to the sync root ([#1756](https://github.com/databricks/cli/pull/1756)). + * Fixed generated YAML missing 'default' for empty values ([#1765](https://github.com/databricks/cli/pull/1765)). + * Use periodic triggers in all templates ([#1739](https://github.com/databricks/cli/pull/1739)). + * Use the friendly name of service principals when shortening their name ([#1770](https://github.com/databricks/cli/pull/1770)). + * Fixed detecting full syntax variable override which includes type field ([#1775](https://github.com/databricks/cli/pull/1775)). + +Internal: + * Pass copy of `dyn.Path` to callback function ([#1747](https://github.com/databricks/cli/pull/1747)). + * Make bundle JSON schema modular with `` ([#1700](https://github.com/databricks/cli/pull/1700)). + * Alias variables block in the `Target` struct ([#1748](https://github.com/databricks/cli/pull/1748)). + * Add end to end integration tests for bundle JSON schema ([#1726](https://github.com/databricks/cli/pull/1726)). + * Fix artifact upload integration tests ([#1767](https://github.com/databricks/cli/pull/1767)). + +API Changes: + * Added `databricks quality-monitors regenerate-dashboard` command. + +OpenAPI commit d05898328669a3f8ab0c2ecee37db2673d3ea3f7 (2024-09-04) +Dependency updates: + * Bump golang.org/x/term from 0.23.0 to 0.24.0 ([#1757](https://github.com/databricks/cli/pull/1757)). + * Bump golang.org/x/oauth2 from 0.22.0 to 0.23.0 ([#1761](https://github.com/databricks/cli/pull/1761)). + * Bump golang.org/x/text from 0.17.0 to 0.18.0 ([#1759](https://github.com/databricks/cli/pull/1759)). + * Bump github.com/databricks/databricks-sdk-go from 0.45.0 to 0.46.0 ([#1760](https://github.com/databricks/cli/pull/1760)). + ## [Release] Release v0.228.0 CLI: diff --git a/bundle/artifacts/expand_globs_test.go b/bundle/artifacts/expand_globs_test.go index c9c478448..1665a4806 100644 --- a/bundle/artifacts/expand_globs_test.go +++ b/bundle/artifacts/expand_globs_test.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/internal/bundletest" "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -36,7 +37,7 @@ func TestExpandGlobs_Nominal(t *testing.T) { }, } - bundletest.SetLocation(b, "artifacts", filepath.Join(tmpDir, "databricks.yml")) + bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) ctx := context.Background() diags := bundle.Apply(ctx, b, bundle.Seq( @@ -77,7 +78,7 @@ func TestExpandGlobs_InvalidPattern(t *testing.T) { }, } - bundletest.SetLocation(b, "artifacts", filepath.Join(tmpDir, "databricks.yml")) + bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) ctx := context.Background() diags := bundle.Apply(ctx, b, bundle.Seq( @@ -125,7 +126,7 @@ func TestExpandGlobs_NoMatches(t *testing.T) { }, } - bundletest.SetLocation(b, "artifacts", filepath.Join(tmpDir, "databricks.yml")) + bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) ctx := context.Background() diags := bundle.Apply(ctx, b, bundle.Seq( diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index 78648dfd7..f533c4d18 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -38,8 +38,11 @@ type Bundle struct { // Annotated readonly as this should be set at the target level. Mode Mode `json:"mode,omitempty" bundle:"readonly"` - // Overrides the compute used for jobs and other supported assets. - ComputeID string `json:"compute_id,omitempty"` + // DEPRECATED: Overrides the compute used for jobs and other supported assets. + ComputeId string `json:"compute_id,omitempty"` + + // Overrides the cluster used for jobs and other supported assets. + ClusterId string `json:"cluster_id,omitempty"` // Deployment section specifies deployment related configuration for bundle Deployment Deployment `json:"deployment,omitempty"` diff --git a/bundle/config/generate/job.go b/bundle/config/generate/job.go index 28bc86412..6cd7c1b32 100644 --- a/bundle/config/generate/job.go +++ b/bundle/config/generate/job.go @@ -25,6 +25,20 @@ func ConvertJobToValue(job *jobs.Job) (dyn.Value, error) { value["tasks"] = dyn.NewValue(tasks, []dyn.Location{{Line: jobOrder.Get("tasks")}}) } + // We're processing job.Settings.Parameters separately to retain empty default values. + if len(job.Settings.Parameters) > 0 { + params := make([]dyn.Value, 0) + for _, parameter := range job.Settings.Parameters { + p := map[string]dyn.Value{ + "name": dyn.NewValue(parameter.Name, []dyn.Location{{Line: 0}}), // We use Line: 0 to ensure that the name goes first. + "default": dyn.NewValue(parameter.Default, []dyn.Location{{Line: 1}}), + } + params = append(params, dyn.NewValue(p, []dyn.Location{})) + } + + value["parameters"] = dyn.NewValue(params, []dyn.Location{{Line: jobOrder.Get("parameters")}}) + } + return yamlsaver.ConvertToMapValue(job.Settings, jobOrder, []string{"format", "new_cluster", "existing_cluster_id"}, value) } diff --git a/bundle/config/mutator/apply_presets.go b/bundle/config/mutator/apply_presets.go index 28d015c10..27af82e54 100644 --- a/bundle/config/mutator/apply_presets.go +++ b/bundle/config/mutator/apply_presets.go @@ -160,6 +160,21 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos // the Databricks UI and via the SQL API. } + // Clusters: Prefix, Tags + for _, c := range r.Clusters { + c.ClusterName = prefix + c.ClusterName + if c.CustomTags == nil { + c.CustomTags = make(map[string]string) + } + for _, tag := range tags { + normalisedKey := b.Tagging.NormalizeKey(tag.Key) + normalisedValue := b.Tagging.NormalizeValue(tag.Value) + if _, ok := c.CustomTags[normalisedKey]; !ok { + c.CustomTags[normalisedKey] = normalisedValue + } + } + } + return nil } diff --git a/bundle/config/mutator/compute_id_compat.go b/bundle/config/mutator/compute_id_compat.go new file mode 100644 index 000000000..3afe02e9e --- /dev/null +++ b/bundle/config/mutator/compute_id_compat.go @@ -0,0 +1,87 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type computeIdToClusterId struct{} + +func ComputeIdToClusterId() bundle.Mutator { + return &computeIdToClusterId{} +} + +func (m *computeIdToClusterId) Name() string { + return "ComputeIdToClusterId" +} + +func (m *computeIdToClusterId) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + + // The "compute_id" key is set; rewrite it to "cluster_id". + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + v, d := rewriteComputeIdToClusterId(v, dyn.NewPath(dyn.Key("bundle"))) + diags = diags.Extend(d) + + // Check if the "compute_id" key is set in any target overrides. + return dyn.MapByPattern(v, dyn.NewPattern(dyn.Key("targets"), dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + v, d := rewriteComputeIdToClusterId(v, dyn.Path{}) + diags = diags.Extend(d) + return v, nil + }) + }) + + diags = diags.Extend(diag.FromErr(err)) + return diags +} + +func rewriteComputeIdToClusterId(v dyn.Value, p dyn.Path) (dyn.Value, diag.Diagnostics) { + var diags diag.Diagnostics + computeIdPath := p.Append(dyn.Key("compute_id")) + computeId, err := dyn.GetByPath(v, computeIdPath) + + // If the "compute_id" key is not set, we don't need to do anything. + if err != nil { + return v, nil + } + + if computeId.Kind() == dyn.KindInvalid { + return v, nil + } + + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: "compute_id is deprecated, please use cluster_id instead", + Locations: computeId.Locations(), + Paths: []dyn.Path{computeIdPath}, + }) + + clusterIdPath := p.Append(dyn.Key("cluster_id")) + nv, err := dyn.SetByPath(v, clusterIdPath, computeId) + if err != nil { + return dyn.InvalidValue, diag.FromErr(err) + } + // Drop the "compute_id" key. + vout, err := dyn.Walk(nv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + switch len(p) { + case 0: + return v, nil + case 1: + if p[0] == dyn.Key("compute_id") { + return v, dyn.ErrDrop + } + return v, nil + case 2: + if p[1] == dyn.Key("compute_id") { + return v, dyn.ErrDrop + } + } + return v, dyn.ErrSkip + }) + + diags = diags.Extend(diag.FromErr(err)) + return vout, diags +} diff --git a/bundle/config/mutator/compute_id_compate_test.go b/bundle/config/mutator/compute_id_compate_test.go new file mode 100644 index 000000000..e59d37e39 --- /dev/null +++ b/bundle/config/mutator/compute_id_compate_test.go @@ -0,0 +1,57 @@ +package mutator_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/libs/diag" + "github.com/stretchr/testify/assert" +) + +func TestComputeIdToClusterId(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + ComputeId: "compute-id", + }, + }, + } + + diags := bundle.Apply(context.Background(), b, mutator.ComputeIdToClusterId()) + assert.NoError(t, diags.Error()) + assert.Equal(t, "compute-id", b.Config.Bundle.ClusterId) + assert.Empty(t, b.Config.Bundle.ComputeId) + + assert.Len(t, diags, 1) + assert.Equal(t, "compute_id is deprecated, please use cluster_id instead", diags[0].Summary) + assert.Equal(t, diag.Warning, diags[0].Severity) +} + +func TestComputeIdToClusterIdInTargetOverride(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Targets: map[string]*config.Target{ + "dev": { + ComputeId: "compute-id-dev", + }, + }, + }, + } + + diags := bundle.Apply(context.Background(), b, mutator.ComputeIdToClusterId()) + assert.NoError(t, diags.Error()) + assert.Empty(t, b.Config.Targets["dev"].ComputeId) + + diags = diags.Extend(bundle.Apply(context.Background(), b, mutator.SelectTarget("dev"))) + assert.NoError(t, diags.Error()) + + assert.Equal(t, "compute-id-dev", b.Config.Bundle.ClusterId) + assert.Empty(t, b.Config.Bundle.ComputeId) + + assert.Len(t, diags, 1) + assert.Equal(t, "compute_id is deprecated, please use cluster_id instead", diags[0].Summary) + assert.Equal(t, diag.Warning, diags[0].Severity) +} diff --git a/bundle/config/mutator/expand_pipeline_glob_paths_test.go b/bundle/config/mutator/expand_pipeline_glob_paths_test.go index d1671c256..07dd20215 100644 --- a/bundle/config/mutator/expand_pipeline_glob_paths_test.go +++ b/bundle/config/mutator/expand_pipeline_glob_paths_test.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/pipelines" "github.com/stretchr/testify/require" @@ -105,8 +106,8 @@ func TestExpandGlobPathsInPipelines(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) - bundletest.SetLocation(b, "resources.pipelines.pipeline.libraries[3]", filepath.Join(dir, "relative", "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) + bundletest.SetLocation(b, "resources.pipelines.pipeline.libraries[3]", []dyn.Location{{File: filepath.Join(dir, "relative", "resource.yml")}}) m := ExpandPipelineGlobPaths() diags := bundle.Apply(context.Background(), b, m) diff --git a/bundle/config/mutator/mutator.go b/bundle/config/mutator/mutator.go index 0458beff4..faf50ae6e 100644 --- a/bundle/config/mutator/mutator.go +++ b/bundle/config/mutator/mutator.go @@ -23,6 +23,7 @@ func DefaultMutators() []bundle.Mutator { VerifyCliVersion(), EnvironmentsToTargets(), + ComputeIdToClusterId(), InitializeVariables(), DefineDefaultTarget(), LoadGitDetails(), diff --git a/bundle/config/mutator/override_compute.go b/bundle/config/mutator/override_compute.go index 73fbad364..5700cdf26 100644 --- a/bundle/config/mutator/override_compute.go +++ b/bundle/config/mutator/override_compute.go @@ -39,22 +39,22 @@ func overrideJobCompute(j *resources.Job, compute string) { func (m *overrideCompute) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { if b.Config.Bundle.Mode != config.Development { - if b.Config.Bundle.ComputeID != "" { + if b.Config.Bundle.ClusterId != "" { return diag.Errorf("cannot override compute for an target that does not use 'mode: development'") } return nil } if v := env.Get(ctx, "DATABRICKS_CLUSTER_ID"); v != "" { - b.Config.Bundle.ComputeID = v + b.Config.Bundle.ClusterId = v } - if b.Config.Bundle.ComputeID == "" { + if b.Config.Bundle.ClusterId == "" { return nil } r := b.Config.Resources for i := range r.Jobs { - overrideJobCompute(r.Jobs[i], b.Config.Bundle.ComputeID) + overrideJobCompute(r.Jobs[i], b.Config.Bundle.ClusterId) } return nil diff --git a/bundle/config/mutator/override_compute_test.go b/bundle/config/mutator/override_compute_test.go index 152ee543e..369447d7e 100644 --- a/bundle/config/mutator/override_compute_test.go +++ b/bundle/config/mutator/override_compute_test.go @@ -20,7 +20,7 @@ func TestOverrideDevelopment(t *testing.T) { Config: config.Root{ Bundle: config.Bundle{ Mode: config.Development, - ComputeID: "newClusterID", + ClusterId: "newClusterID", }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -144,7 +144,7 @@ func TestOverrideProduction(t *testing.T) { b := &bundle.Bundle{ Config: config.Root{ Bundle: config.Bundle{ - ComputeID: "newClusterID", + ClusterId: "newClusterID", }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ diff --git a/bundle/config/mutator/paths/job_paths_visitor.go b/bundle/config/mutator/paths/job_paths_visitor.go new file mode 100644 index 000000000..275a8fa53 --- /dev/null +++ b/bundle/config/mutator/paths/job_paths_visitor.go @@ -0,0 +1,115 @@ +package paths + +import ( + "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/libs/dyn" +) + +type jobRewritePattern struct { + pattern dyn.Pattern + kind PathKind + skipRewrite func(string) bool +} + +func noSkipRewrite(string) bool { + return false +} + +func jobTaskRewritePatterns(base dyn.Pattern) []jobRewritePattern { + return []jobRewritePattern{ + { + base.Append(dyn.Key("notebook_task"), dyn.Key("notebook_path")), + PathKindNotebook, + noSkipRewrite, + }, + { + base.Append(dyn.Key("spark_python_task"), dyn.Key("python_file")), + PathKindWorkspaceFile, + noSkipRewrite, + }, + { + base.Append(dyn.Key("dbt_task"), dyn.Key("project_directory")), + PathKindDirectory, + noSkipRewrite, + }, + { + base.Append(dyn.Key("sql_task"), dyn.Key("file"), dyn.Key("path")), + PathKindWorkspaceFile, + noSkipRewrite, + }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("whl")), + PathKindLibrary, + noSkipRewrite, + }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("jar")), + PathKindLibrary, + noSkipRewrite, + }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")), + PathKindWorkspaceFile, + noSkipRewrite, + }, + } +} + +func jobRewritePatterns() []jobRewritePattern { + // Base pattern to match all tasks in all jobs. + base := dyn.NewPattern( + dyn.Key("resources"), + dyn.Key("jobs"), + dyn.AnyKey(), + dyn.Key("tasks"), + dyn.AnyIndex(), + ) + + // Compile list of patterns and their respective rewrite functions. + jobEnvironmentsPatterns := []jobRewritePattern{ + { + dyn.NewPattern( + dyn.Key("resources"), + dyn.Key("jobs"), + dyn.AnyKey(), + dyn.Key("environments"), + dyn.AnyIndex(), + dyn.Key("spec"), + dyn.Key("dependencies"), + dyn.AnyIndex(), + ), + PathKindWithPrefix, + func(s string) bool { + return !libraries.IsLibraryLocal(s) + }, + }, + } + + taskPatterns := jobTaskRewritePatterns(base) + forEachPatterns := jobTaskRewritePatterns(base.Append(dyn.Key("for_each_task"), dyn.Key("task"))) + allPatterns := append(taskPatterns, jobEnvironmentsPatterns...) + allPatterns = append(allPatterns, forEachPatterns...) + return allPatterns +} + +// VisitJobPaths visits all paths in job resources and applies a function to each path. +func VisitJobPaths(value dyn.Value, fn VisitFunc) (dyn.Value, error) { + var err error + var newValue = value + + for _, rewritePattern := range jobRewritePatterns() { + newValue, err = dyn.MapByPattern(newValue, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + if rewritePattern.skipRewrite(v.MustString()) { + return v, nil + } + + return fn(p, rewritePattern.kind, v) + }) + + if err != nil { + return dyn.InvalidValue, err + } + } + + return newValue, nil +} diff --git a/bundle/config/mutator/paths/job_paths_visitor_test.go b/bundle/config/mutator/paths/job_paths_visitor_test.go new file mode 100644 index 000000000..7f0201579 --- /dev/null +++ b/bundle/config/mutator/paths/job_paths_visitor_test.go @@ -0,0 +1,168 @@ +package paths + +import ( + "testing" + + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/require" +) + +func TestVisitJobPaths(t *testing.T) { + task0 := jobs.Task{ + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "abc", + }, + } + task1 := jobs.Task{ + SparkPythonTask: &jobs.SparkPythonTask{ + PythonFile: "abc", + }, + } + task2 := jobs.Task{ + DbtTask: &jobs.DbtTask{ + ProjectDirectory: "abc", + }, + } + task3 := jobs.Task{ + SqlTask: &jobs.SqlTask{ + File: &jobs.SqlTaskFile{ + Path: "abc", + }, + }, + } + task4 := jobs.Task{ + Libraries: []compute.Library{ + {Whl: "dist/foo.whl"}, + }, + } + task5 := jobs.Task{ + Libraries: []compute.Library{ + {Jar: "dist/foo.jar"}, + }, + } + task6 := jobs.Task{ + Libraries: []compute.Library{ + {Requirements: "requirements.txt"}, + }, + } + + job0 := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + task0, + task1, + task2, + task3, + task4, + task5, + task6, + }, + }, + } + + root := config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job0": job0, + }, + }, + } + + actual := visitJobPaths(t, root) + expected := []dyn.Path{ + dyn.MustPathFromString("resources.jobs.job0.tasks[0].notebook_task.notebook_path"), + dyn.MustPathFromString("resources.jobs.job0.tasks[1].spark_python_task.python_file"), + dyn.MustPathFromString("resources.jobs.job0.tasks[2].dbt_task.project_directory"), + dyn.MustPathFromString("resources.jobs.job0.tasks[3].sql_task.file.path"), + dyn.MustPathFromString("resources.jobs.job0.tasks[4].libraries[0].whl"), + dyn.MustPathFromString("resources.jobs.job0.tasks[5].libraries[0].jar"), + dyn.MustPathFromString("resources.jobs.job0.tasks[6].libraries[0].requirements"), + } + + assert.ElementsMatch(t, expected, actual) +} + +func TestVisitJobPaths_environments(t *testing.T) { + environment0 := jobs.JobEnvironment{ + Spec: &compute.Environment{ + Dependencies: []string{ + "dist_0/*.whl", + "dist_1/*.whl", + }, + }, + } + job0 := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Environments: []jobs.JobEnvironment{ + environment0, + }, + }, + } + + root := config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job0": job0, + }, + }, + } + + actual := visitJobPaths(t, root) + expected := []dyn.Path{ + dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[0]"), + dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[1]"), + } + + assert.ElementsMatch(t, expected, actual) +} + +func TestVisitJobPaths_foreach(t *testing.T) { + task0 := jobs.Task{ + ForEachTask: &jobs.ForEachTask{ + Task: jobs.Task{ + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "abc", + }, + }, + }, + } + job0 := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + task0, + }, + }, + } + + root := config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job0": job0, + }, + }, + } + + actual := visitJobPaths(t, root) + expected := []dyn.Path{ + dyn.MustPathFromString("resources.jobs.job0.tasks[0].for_each_task.task.notebook_task.notebook_path"), + } + + assert.ElementsMatch(t, expected, actual) +} + +func visitJobPaths(t *testing.T, root config.Root) []dyn.Path { + var actual []dyn.Path + err := root.Mutate(func(value dyn.Value) (dyn.Value, error) { + return VisitJobPaths(value, func(p dyn.Path, kind PathKind, v dyn.Value) (dyn.Value, error) { + actual = append(actual, p) + return v, nil + }) + }) + require.NoError(t, err) + return actual +} diff --git a/bundle/config/mutator/paths/visitor.go b/bundle/config/mutator/paths/visitor.go new file mode 100644 index 000000000..40d1f14ef --- /dev/null +++ b/bundle/config/mutator/paths/visitor.go @@ -0,0 +1,26 @@ +package paths + +import "github.com/databricks/cli/libs/dyn" + +type PathKind int + +const ( + // PathKindLibrary is a path to a library file + PathKindLibrary = iota + + // PathKindNotebook is a path to a notebook file + PathKindNotebook + + // PathKindWorkspaceFile is a path to a regular workspace file, + // notebooks are not allowed because they are uploaded a special + // kind of workspace object. + PathKindWorkspaceFile + + // PathKindWithPrefix is a path that starts with './' + PathKindWithPrefix + + // PathKindDirectory is a path to directory + PathKindDirectory +) + +type VisitFunc func(path dyn.Path, kind PathKind, value dyn.Value) (dyn.Value, error) diff --git a/bundle/config/mutator/populate_current_user.go b/bundle/config/mutator/populate_current_user.go index b5e0bd437..1e99b327c 100644 --- a/bundle/config/mutator/populate_current_user.go +++ b/bundle/config/mutator/populate_current_user.go @@ -33,7 +33,7 @@ func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) diag. } b.Config.Workspace.CurrentUser = &config.User{ - ShortName: auth.GetShortUserName(me.UserName), + ShortName: auth.GetShortUserName(me), User: me, } diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 42f1929c8..b0eb57ee1 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/cli/libs/tags" sdkconfig "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/ml" @@ -119,6 +120,9 @@ func mockBundle(mode config.Mode) *bundle.Bundle { Schemas: map[string]*resources.Schema{ "schema1": {CreateSchema: &catalog.CreateSchema{Name: "schema1"}}, }, + Clusters: map[string]*resources.Cluster{ + "cluster1": {ClusterSpec: &compute.ClusterSpec{ClusterName: "cluster1", SparkVersion: "13.2.x", NumWorkers: 1}}, + }, }, }, // Use AWS implementation for testing. @@ -177,6 +181,9 @@ func TestProcessTargetModeDevelopment(t *testing.T) { // Schema 1 assert.Equal(t, "dev_lennart_schema1", b.Config.Resources.Schemas["schema1"].Name) + + // Clusters + assert.Equal(t, "[dev lennart] cluster1", b.Config.Resources.Clusters["cluster1"].ClusterName) } func TestProcessTargetModeDevelopmentTagNormalizationForAws(t *testing.T) { @@ -281,6 +288,7 @@ func TestProcessTargetModeDefault(t *testing.T) { assert.Equal(t, "servingendpoint1", b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Name) assert.Equal(t, "registeredmodel1", b.Config.Resources.RegisteredModels["registeredmodel1"].Name) assert.Equal(t, "qualityMonitor1", b.Config.Resources.QualityMonitors["qualityMonitor1"].TableName) + assert.Equal(t, "cluster1", b.Config.Resources.Clusters["cluster1"].ClusterName) } func TestProcessTargetModeProduction(t *testing.T) { @@ -312,6 +320,7 @@ func TestProcessTargetModeProduction(t *testing.T) { b.Config.Resources.Experiments["experiment2"].Permissions = permissions b.Config.Resources.Models["model1"].Permissions = permissions b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Permissions = permissions + b.Config.Resources.Clusters["cluster1"].Permissions = permissions diags = validateProductionMode(context.Background(), b, false) require.NoError(t, diags.Error()) @@ -322,6 +331,7 @@ func TestProcessTargetModeProduction(t *testing.T) { assert.Equal(t, "servingendpoint1", b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Name) assert.Equal(t, "registeredmodel1", b.Config.Resources.RegisteredModels["registeredmodel1"].Name) assert.Equal(t, "qualityMonitor1", b.Config.Resources.QualityMonitors["qualityMonitor1"].TableName) + assert.Equal(t, "cluster1", b.Config.Resources.Clusters["cluster1"].ClusterName) } func TestProcessTargetModeProductionOkForPrincipal(t *testing.T) { diff --git a/bundle/config/mutator/rewrite_sync_paths_test.go b/bundle/config/mutator/rewrite_sync_paths_test.go index fa7f124b7..a66f2763a 100644 --- a/bundle/config/mutator/rewrite_sync_paths_test.go +++ b/bundle/config/mutator/rewrite_sync_paths_test.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" ) @@ -33,12 +34,12 @@ func TestRewriteSyncPathsRelative(t *testing.T) { }, } - bundletest.SetLocation(b, "sync.paths[0]", "./databricks.yml") - bundletest.SetLocation(b, "sync.paths[1]", "./databricks.yml") - bundletest.SetLocation(b, "sync.include[0]", "./file.yml") - bundletest.SetLocation(b, "sync.include[1]", "./a/file.yml") - bundletest.SetLocation(b, "sync.exclude[0]", "./a/b/file.yml") - bundletest.SetLocation(b, "sync.exclude[1]", "./a/b/c/file.yml") + bundletest.SetLocation(b, "sync.paths[0]", []dyn.Location{{File: "./databricks.yml"}}) + bundletest.SetLocation(b, "sync.paths[1]", []dyn.Location{{File: "./databricks.yml"}}) + bundletest.SetLocation(b, "sync.include[0]", []dyn.Location{{File: "./file.yml"}}) + bundletest.SetLocation(b, "sync.include[1]", []dyn.Location{{File: "./a/file.yml"}}) + bundletest.SetLocation(b, "sync.exclude[0]", []dyn.Location{{File: "./a/b/file.yml"}}) + bundletest.SetLocation(b, "sync.exclude[1]", []dyn.Location{{File: "./a/b/c/file.yml"}}) diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths()) assert.NoError(t, diags.Error()) @@ -72,12 +73,12 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { }, } - bundletest.SetLocation(b, "sync.paths[0]", "/tmp/dir/databricks.yml") - bundletest.SetLocation(b, "sync.paths[1]", "/tmp/dir/databricks.yml") - bundletest.SetLocation(b, "sync.include[0]", "/tmp/dir/file.yml") - bundletest.SetLocation(b, "sync.include[1]", "/tmp/dir/a/file.yml") - bundletest.SetLocation(b, "sync.exclude[0]", "/tmp/dir/a/b/file.yml") - bundletest.SetLocation(b, "sync.exclude[1]", "/tmp/dir/a/b/c/file.yml") + bundletest.SetLocation(b, "sync.paths[0]", []dyn.Location{{File: "/tmp/dir/databricks.yml"}}) + bundletest.SetLocation(b, "sync.paths[1]", []dyn.Location{{File: "/tmp/dir/databricks.yml"}}) + bundletest.SetLocation(b, "sync.include[0]", []dyn.Location{{File: "/tmp/dir/file.yml"}}) + bundletest.SetLocation(b, "sync.include[1]", []dyn.Location{{File: "/tmp/dir/a/file.yml"}}) + bundletest.SetLocation(b, "sync.exclude[0]", []dyn.Location{{File: "/tmp/dir/a/b/file.yml"}}) + bundletest.SetLocation(b, "sync.exclude[1]", []dyn.Location{{File: "/tmp/dir/a/b/c/file.yml"}}) diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths()) assert.NoError(t, diags.Error()) diff --git a/bundle/config/mutator/run_as_test.go b/bundle/config/mutator/run_as_test.go index e6cef9ba4..abeea45d0 100644 --- a/bundle/config/mutator/run_as_test.go +++ b/bundle/config/mutator/run_as_test.go @@ -32,6 +32,7 @@ func allResourceTypes(t *testing.T) []string { // the dyn library gives us the correct list of all resources supported. Please // also update this check when adding a new resource require.Equal(t, []string{ + "clusters", "experiments", "jobs", "model_serving_endpoints", @@ -133,6 +134,7 @@ func TestRunAsErrorForUnsupportedResources(t *testing.T) { // some point in the future. These resources are (implicitly) on the deny list, since // they are not on the allow list below. allowList := []string{ + "clusters", "jobs", "models", "registered_models", diff --git a/bundle/config/mutator/sync_infer_root_test.go b/bundle/config/mutator/sync_infer_root_test.go index 383e56769..85e40adc6 100644 --- a/bundle/config/mutator/sync_infer_root_test.go +++ b/bundle/config/mutator/sync_infer_root_test.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -184,7 +185,7 @@ func TestSyncInferRoot_Error(t *testing.T) { }, } - bundletest.SetLocation(b, "sync.paths", "databricks.yml") + bundletest.SetLocation(b, "sync.paths", []dyn.Location{{File: "databricks.yml"}}) ctx := context.Background() diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) diff --git a/bundle/config/mutator/translate_paths_jobs.go b/bundle/config/mutator/translate_paths_jobs.go index e34eeb2f0..c29ff0ea9 100644 --- a/bundle/config/mutator/translate_paths_jobs.go +++ b/bundle/config/mutator/translate_paths_jobs.go @@ -4,97 +4,11 @@ import ( "fmt" "slices" - "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/bundle/config/mutator/paths" + "github.com/databricks/cli/libs/dyn" ) -type jobRewritePattern struct { - pattern dyn.Pattern - fn rewriteFunc - skipRewrite func(string) bool -} - -func noSkipRewrite(string) bool { - return false -} - -func rewritePatterns(t *translateContext, base dyn.Pattern) []jobRewritePattern { - return []jobRewritePattern{ - { - base.Append(dyn.Key("notebook_task"), dyn.Key("notebook_path")), - t.translateNotebookPath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("spark_python_task"), dyn.Key("python_file")), - t.translateFilePath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("dbt_task"), dyn.Key("project_directory")), - t.translateDirectoryPath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("sql_task"), dyn.Key("file"), dyn.Key("path")), - t.translateFilePath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("whl")), - t.translateNoOp, - noSkipRewrite, - }, - { - base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("jar")), - t.translateNoOp, - noSkipRewrite, - }, - { - base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")), - t.translateFilePath, - noSkipRewrite, - }, - } -} - -func (t *translateContext) jobRewritePatterns() []jobRewritePattern { - // Base pattern to match all tasks in all jobs. - base := dyn.NewPattern( - dyn.Key("resources"), - dyn.Key("jobs"), - dyn.AnyKey(), - dyn.Key("tasks"), - dyn.AnyIndex(), - ) - - // Compile list of patterns and their respective rewrite functions. - jobEnvironmentsPatterns := []jobRewritePattern{ - { - dyn.NewPattern( - dyn.Key("resources"), - dyn.Key("jobs"), - dyn.AnyKey(), - dyn.Key("environments"), - dyn.AnyIndex(), - dyn.Key("spec"), - dyn.Key("dependencies"), - dyn.AnyIndex(), - ), - t.translateNoOpWithPrefix, - func(s string) bool { - return !libraries.IsLibraryLocal(s) - }, - }, - } - - taskPatterns := rewritePatterns(t, base) - forEachPatterns := rewritePatterns(t, base.Append(dyn.Key("for_each_task"), dyn.Key("task"))) - allPatterns := append(taskPatterns, jobEnvironmentsPatterns...) - allPatterns = append(allPatterns, forEachPatterns...) - return allPatterns -} - func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) { var err error @@ -111,30 +25,41 @@ func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) } } - for _, rewritePattern := range t.jobRewritePatterns() { - v, err = dyn.MapByPattern(v, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { - key := p[2].Key() + return paths.VisitJobPaths(v, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) { + key := p[2].Key() - // Skip path translation if the job is using git source. - if slices.Contains(ignore, key) { - return v, nil - } + // Skip path translation if the job is using git source. + if slices.Contains(ignore, key) { + return v, nil + } - dir, err := v.Location().Directory() - if err != nil { - return dyn.InvalidValue, fmt.Errorf("unable to determine directory for job %s: %w", key, err) - } + dir, err := v.Location().Directory() + if err != nil { + return dyn.InvalidValue, fmt.Errorf("unable to determine directory for job %s: %w", key, err) + } - sv := v.MustString() - if rewritePattern.skipRewrite(sv) { - return v, nil - } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, fallback[key]) - }) + rewritePatternFn, err := t.getRewritePatternFn(kind) if err != nil { return dyn.InvalidValue, err } + + return t.rewriteRelativeTo(p, v, rewritePatternFn, dir, fallback[key]) + }) +} + +func (t *translateContext) getRewritePatternFn(kind paths.PathKind) (rewriteFunc, error) { + switch kind { + case paths.PathKindLibrary: + return t.translateNoOp, nil + case paths.PathKindNotebook: + return t.translateNotebookPath, nil + case paths.PathKindWorkspaceFile: + return t.translateFilePath, nil + case paths.PathKindDirectory: + return t.translateDirectoryPath, nil + case paths.PathKindWithPrefix: + return t.translateNoOpWithPrefix, nil } - return v, nil + return nil, fmt.Errorf("unsupported path kind: %d", kind) } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 50fcd3b07..c03cee73e 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -82,7 +82,7 @@ func TestTranslatePathsSkippedWithGitSource(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) @@ -210,7 +210,7 @@ func TestTranslatePaths(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) @@ -346,8 +346,8 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { }, } - bundletest.SetLocation(b, "resources.jobs", filepath.Join(dir, "job/resource.yml")) - bundletest.SetLocation(b, "resources.pipelines", filepath.Join(dir, "pipeline/resource.yml")) + bundletest.SetLocation(b, "resources.jobs", []dyn.Location{{File: filepath.Join(dir, "job/resource.yml")}}) + bundletest.SetLocation(b, "resources.pipelines", []dyn.Location{{File: filepath.Join(dir, "pipeline/resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) @@ -408,7 +408,7 @@ func TestTranslatePathsOutsideSyncRoot(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "../resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "../resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.ErrorContains(t, diags.Error(), "is not contained in sync root path") @@ -439,7 +439,7 @@ func TestJobNotebookDoesNotExistError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "fake.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.EqualError(t, diags.Error(), "notebook ./doesnt_exist.py not found") @@ -470,7 +470,7 @@ func TestJobFileDoesNotExistError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "fake.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.EqualError(t, diags.Error(), "file ./doesnt_exist.py not found") @@ -501,7 +501,7 @@ func TestPipelineNotebookDoesNotExistError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "fake.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.EqualError(t, diags.Error(), "notebook ./doesnt_exist.py not found") @@ -532,7 +532,7 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "fake.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.EqualError(t, diags.Error(), "file ./doesnt_exist.py not found") @@ -567,7 +567,7 @@ func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.ErrorContains(t, diags.Error(), `expected a file for "resources.jobs.job.tasks[0].spark_python_task.python_file" but got a notebook`) @@ -602,7 +602,7 @@ func TestJobNotebookTaskWithFileSourceError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.ErrorContains(t, diags.Error(), `expected a notebook for "resources.jobs.job.tasks[0].notebook_task.notebook_path" but got a file`) @@ -637,7 +637,7 @@ func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.ErrorContains(t, diags.Error(), `expected a notebook for "resources.pipelines.pipeline.libraries[0].notebook.path" but got a file`) @@ -672,7 +672,7 @@ func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.ErrorContains(t, diags.Error(), `expected a file for "resources.pipelines.pipeline.libraries[0].file.path" but got a notebook`) @@ -710,7 +710,7 @@ func TestTranslatePathJobEnvironments(t *testing.T) { }, } - bundletest.SetLocation(b, "resources.jobs", filepath.Join(dir, "job/resource.yml")) + bundletest.SetLocation(b, "resources.jobs", []dyn.Location{{File: filepath.Join(dir, "job/resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) @@ -753,8 +753,8 @@ func TestTranslatePathWithComplexVariables(t *testing.T) { }, } - bundletest.SetLocation(b, "variables", filepath.Join(dir, "variables/variables.yml")) - bundletest.SetLocation(b, "resources.jobs", filepath.Join(dir, "job/resource.yml")) + bundletest.SetLocation(b, "variables", []dyn.Location{{File: filepath.Join(dir, "variables/variables.yml")}}) + bundletest.SetLocation(b, "resources.jobs", []dyn.Location{{File: filepath.Join(dir, "job/resource.yml")}}) ctx := context.Background() // Assign the variables to the dynamic configuration. diff --git a/bundle/config/resources.go b/bundle/config/resources.go index 22d69ffb5..a3afb7fc3 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -19,6 +19,7 @@ type Resources struct { RegisteredModels map[string]*resources.RegisteredModel `json:"registered_models,omitempty"` QualityMonitors map[string]*resources.QualityMonitor `json:"quality_monitors,omitempty"` Schemas map[string]*resources.Schema `json:"schemas,omitempty"` + Clusters map[string]*resources.Cluster `json:"clusters,omitempty"` } type ConfigResource interface { diff --git a/bundle/config/resources/clusters.go b/bundle/config/resources/clusters.go new file mode 100644 index 000000000..632345666 --- /dev/null +++ b/bundle/config/resources/clusters.go @@ -0,0 +1,39 @@ +package resources + +import ( + "context" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/marshal" + "github.com/databricks/databricks-sdk-go/service/compute" +) + +type Cluster struct { + ID string `json:"id,omitempty" bundle:"readonly"` + Permissions []Permission `json:"permissions,omitempty"` + ModifiedStatus ModifiedStatus `json:"modified_status,omitempty" bundle:"internal"` + + *compute.ClusterSpec +} + +func (s *Cluster) UnmarshalJSON(b []byte) error { + return marshal.Unmarshal(b, s) +} + +func (s Cluster) MarshalJSON() ([]byte, error) { + return marshal.Marshal(s) +} + +func (s *Cluster) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { + _, err := w.Clusters.GetByClusterId(ctx, id) + if err != nil { + log.Debugf(ctx, "cluster %s does not exist", id) + return false, err + } + return true, nil +} + +func (s *Cluster) TerraformResourceName() string { + return "databricks_cluster" +} diff --git a/bundle/config/root.go b/bundle/config/root.go index 46578769c..ff169e4ce 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -366,9 +366,9 @@ func (r *Root) MergeTargetOverrides(name string) error { } } - // Merge `compute_id`. This field must be overwritten if set, not merged. - if v := target.Get("compute_id"); v.Kind() != dyn.KindInvalid { - root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("compute_id")), v) + // Merge `cluster_id`. This field must be overwritten if set, not merged. + if v := target.Get("cluster_id"); v.Kind() != dyn.KindInvalid { + root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("cluster_id")), v) if err != nil { return err } @@ -409,15 +409,51 @@ func (r *Root) MergeTargetOverrides(name string) error { var variableKeywords = []string{"default", "lookup"} // isFullVariableOverrideDef checks if the given value is a full syntax varaible override. -// A full syntax variable override is a map with only one of the following -// keys: "default", "lookup". +// A full syntax variable override is a map with either 1 of 2 keys. +// If it's 2 keys, the keys should be "default" and "type". +// If it's 1 key, the key should be one of the following keys: "default", "lookup". func isFullVariableOverrideDef(v dyn.Value) bool { mv, ok := v.AsMap() if !ok { return false } - if mv.Len() != 1 { + // If the map has more than 3 keys, it is not a full variable override. + if mv.Len() > 3 { + return false + } + + // If the map has 3 keys, they should be "description", "type" and "default" or "lookup" + if mv.Len() == 3 { + if _, ok := mv.GetByString("type"); ok { + if _, ok := mv.GetByString("description"); ok { + if _, ok := mv.GetByString("default"); ok { + return true + } + } + } + + return false + } + + // If the map has 2 keys, one of them should be "default" or "lookup" and the other is "type" or "description" + if mv.Len() == 2 { + if _, ok := mv.GetByString("type"); ok { + if _, ok := mv.GetByString("default"); ok { + return true + } + } + + if _, ok := mv.GetByString("description"); ok { + if _, ok := mv.GetByString("default"); ok { + return true + } + + if _, ok := mv.GetByString("lookup"); ok { + return true + } + } + return false } diff --git a/bundle/config/root_test.go b/bundle/config/root_test.go index c95e6e86c..9e6123534 100644 --- a/bundle/config/root_test.go +++ b/bundle/config/root_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/databricks/cli/bundle/config/variable" + "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -139,7 +140,7 @@ func TestRootMergeTargetOverridesWithVariables(t *testing.T) { }, Targets: map[string]*Target{ "development": { - Variables: map[string]*variable.Variable{ + Variables: map[string]*variable.TargetVariable{ "foo": { Default: "bar", Description: "wrong", @@ -169,3 +170,87 @@ func TestRootMergeTargetOverridesWithVariables(t *testing.T) { assert.Equal(t, "complex var", root.Variables["complex"].Description) } + +func TestIsFullVariableOverrideDef(t *testing.T) { + testCases := []struct { + value dyn.Value + expected bool + }{ + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + "default": dyn.V("foo"), + "description": dyn.V("foo var"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + "lookup": dyn.V("foo"), + "description": dyn.V("foo var"), + }), + expected: false, + }, + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + "default": dyn.V("foo"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + "lookup": dyn.V("foo"), + }), + expected: false, + }, + { + value: dyn.V(map[string]dyn.Value{ + "description": dyn.V("string"), + "default": dyn.V("foo"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "description": dyn.V("string"), + "lookup": dyn.V("foo"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "default": dyn.V("foo"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "lookup": dyn.V("foo"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + }), + expected: false, + }, + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + "default": dyn.V("foo"), + "description": dyn.V("foo var"), + "lookup": dyn.V("foo"), + }), + expected: false, + }, + } + + for i, tc := range testCases { + assert.Equal(t, tc.expected, isFullVariableOverrideDef(tc.value), "test case %d", i) + } + +} diff --git a/bundle/config/target.go b/bundle/config/target.go index a2ef4d735..fae9c940b 100644 --- a/bundle/config/target.go +++ b/bundle/config/target.go @@ -24,8 +24,11 @@ type Target struct { // name prefix of deployed resources. Presets Presets `json:"presets,omitempty"` - // Overrides the compute used for jobs and other supported assets. - ComputeID string `json:"compute_id,omitempty"` + // DEPRECATED: Overrides the compute used for jobs and other supported assets. + ComputeId string `json:"compute_id,omitempty"` + + // Overrides the cluster used for jobs and other supported assets. + ClusterId string `json:"cluster_id,omitempty"` Bundle *Bundle `json:"bundle,omitempty"` @@ -38,7 +41,26 @@ type Target struct { // Override default values or lookup name for defined variables // Does not permit defining new variables or redefining existing ones // in the scope of an target - Variables map[string]*variable.Variable `json:"variables,omitempty"` + // + // There are two valid ways to define a variable override in a target: + // 1. Direct value override. We normalize this to the variable.Variable + // struct format when loading the configuration YAML: + // + // variables: + // foo: "value" + // + // 2. Override matching the variable.Variable struct. + // + // variables: + // foo: + // default: "value" + // + // OR + // + // variables: + // foo: + // lookup: "resource_name" + Variables map[string]*variable.TargetVariable `json:"variables,omitempty"` Git Git `json:"git,omitempty"` diff --git a/bundle/config/validate/job_task_cluster_spec.go b/bundle/config/validate/job_task_cluster_spec.go new file mode 100644 index 000000000..b80befcdf --- /dev/null +++ b/bundle/config/validate/job_task_cluster_spec.go @@ -0,0 +1,161 @@ +package validate + +import ( + "context" + "fmt" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +// JobTaskClusterSpec validates that job tasks have cluster spec defined +// if task requires a cluster +func JobTaskClusterSpec() bundle.ReadOnlyMutator { + return &jobTaskClusterSpec{} +} + +type jobTaskClusterSpec struct { +} + +func (v *jobTaskClusterSpec) Name() string { + return "validate:job_task_cluster_spec" +} + +func (v *jobTaskClusterSpec) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics { + diags := diag.Diagnostics{} + + jobsPath := dyn.NewPath(dyn.Key("resources"), dyn.Key("jobs")) + + for resourceName, job := range rb.Config().Resources.Jobs { + resourcePath := jobsPath.Append(dyn.Key(resourceName)) + + for taskIndex, task := range job.Tasks { + taskPath := resourcePath.Append(dyn.Key("tasks"), dyn.Index(taskIndex)) + + diags = diags.Extend(validateJobTask(rb, task, taskPath)) + } + } + + return diags +} + +func validateJobTask(rb bundle.ReadOnlyBundle, task jobs.Task, taskPath dyn.Path) diag.Diagnostics { + diags := diag.Diagnostics{} + + var specified []string + var unspecified []string + + if task.JobClusterKey != "" { + specified = append(specified, "job_cluster_key") + } else { + unspecified = append(unspecified, "job_cluster_key") + } + + if task.EnvironmentKey != "" { + specified = append(specified, "environment_key") + } else { + unspecified = append(unspecified, "environment_key") + } + + if task.ExistingClusterId != "" { + specified = append(specified, "existing_cluster_id") + } else { + unspecified = append(unspecified, "existing_cluster_id") + } + + if task.NewCluster != nil { + specified = append(specified, "new_cluster") + } else { + unspecified = append(unspecified, "new_cluster") + } + + if task.ForEachTask != nil { + forEachTaskPath := taskPath.Append(dyn.Key("for_each_task"), dyn.Key("task")) + + diags = diags.Extend(validateJobTask(rb, task.ForEachTask.Task, forEachTaskPath)) + } + + if isComputeTask(task) && len(specified) == 0 { + if task.NotebookTask != nil { + // notebook tasks without cluster spec will use notebook environment + } else { + // path might be not very helpful, adding user-specified task key clarifies the context + detail := fmt.Sprintf( + "Task %q requires a cluster or an environment to run.\nSpecify one of the following fields: %s.", + task.TaskKey, + strings.Join(unspecified, ", "), + ) + + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Error, + Summary: "Missing required cluster or environment settings", + Detail: detail, + Locations: rb.Config().GetLocations(taskPath.String()), + Paths: []dyn.Path{taskPath}, + }) + } + } + + return diags +} + +// isComputeTask returns true if the task runs on a cluster or serverless GC +func isComputeTask(task jobs.Task) bool { + if task.NotebookTask != nil { + // if warehouse_id is set, it's SQL notebook that doesn't need cluster or serverless GC + if task.NotebookTask.WarehouseId != "" { + return false + } else { + // task settings don't require specifying a cluster/serverless GC, but task itself can run on one + // we handle that case separately in validateJobTask + return true + } + } + + if task.PythonWheelTask != nil { + return true + } + + if task.DbtTask != nil { + return true + } + + if task.SparkJarTask != nil { + return true + } + + if task.SparkSubmitTask != nil { + return true + } + + if task.SparkPythonTask != nil { + return true + } + + if task.SqlTask != nil { + return false + } + + if task.PipelineTask != nil { + // while pipelines use clusters, pipeline tasks don't, they only trigger pipelines + return false + } + + if task.RunJobTask != nil { + return false + } + + if task.ConditionTask != nil { + return false + } + + // for each task doesn't use clusters, underlying task(s) can though + if task.ForEachTask != nil { + return false + } + + return false +} diff --git a/bundle/config/validate/job_task_cluster_spec_test.go b/bundle/config/validate/job_task_cluster_spec_test.go new file mode 100644 index 000000000..a3a7ccf25 --- /dev/null +++ b/bundle/config/validate/job_task_cluster_spec_test.go @@ -0,0 +1,203 @@ +package validate + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/assert" +) + +func TestJobTaskClusterSpec(t *testing.T) { + expectedSummary := "Missing required cluster or environment settings" + + type testCase struct { + name string + task jobs.Task + errorPath string + errorDetail string + errorSummary string + } + + testCases := []testCase{ + { + name: "valid notebook task", + task: jobs.Task{ + // while a cluster is needed, it will use notebook environment to create one + NotebookTask: &jobs.NotebookTask{}, + }, + }, + { + name: "valid notebook task (job_cluster_key)", + task: jobs.Task{ + JobClusterKey: "cluster1", + NotebookTask: &jobs.NotebookTask{}, + }, + }, + { + name: "valid notebook task (new_cluster)", + task: jobs.Task{ + NewCluster: &compute.ClusterSpec{}, + NotebookTask: &jobs.NotebookTask{}, + }, + }, + { + name: "valid notebook task (existing_cluster_id)", + task: jobs.Task{ + ExistingClusterId: "cluster1", + NotebookTask: &jobs.NotebookTask{}, + }, + }, + { + name: "valid SQL notebook task", + task: jobs.Task{ + NotebookTask: &jobs.NotebookTask{ + WarehouseId: "warehouse1", + }, + }, + }, + { + name: "valid python wheel task", + task: jobs.Task{ + JobClusterKey: "cluster1", + PythonWheelTask: &jobs.PythonWheelTask{}, + }, + }, + { + name: "valid python wheel task (environment_key)", + task: jobs.Task{ + EnvironmentKey: "environment1", + PythonWheelTask: &jobs.PythonWheelTask{}, + }, + }, + { + name: "valid dbt task", + task: jobs.Task{ + JobClusterKey: "cluster1", + DbtTask: &jobs.DbtTask{}, + }, + }, + { + name: "valid spark jar task", + task: jobs.Task{ + JobClusterKey: "cluster1", + SparkJarTask: &jobs.SparkJarTask{}, + }, + }, + { + name: "valid spark submit", + task: jobs.Task{ + NewCluster: &compute.ClusterSpec{}, + SparkSubmitTask: &jobs.SparkSubmitTask{}, + }, + }, + { + name: "valid spark python task", + task: jobs.Task{ + JobClusterKey: "cluster1", + SparkPythonTask: &jobs.SparkPythonTask{}, + }, + }, + { + name: "valid SQL task", + task: jobs.Task{ + SqlTask: &jobs.SqlTask{}, + }, + }, + { + name: "valid pipeline task", + task: jobs.Task{ + PipelineTask: &jobs.PipelineTask{}, + }, + }, + { + name: "valid run job task", + task: jobs.Task{ + RunJobTask: &jobs.RunJobTask{}, + }, + }, + { + name: "valid condition task", + task: jobs.Task{ + ConditionTask: &jobs.ConditionTask{}, + }, + }, + { + name: "valid for each task", + task: jobs.Task{ + ForEachTask: &jobs.ForEachTask{ + Task: jobs.Task{ + JobClusterKey: "cluster1", + NotebookTask: &jobs.NotebookTask{}, + }, + }, + }, + }, + { + name: "invalid python wheel task", + task: jobs.Task{ + PythonWheelTask: &jobs.PythonWheelTask{}, + TaskKey: "my_task", + }, + errorPath: "resources.jobs.job1.tasks[0]", + errorDetail: `Task "my_task" requires a cluster or an environment to run. +Specify one of the following fields: job_cluster_key, environment_key, existing_cluster_id, new_cluster.`, + errorSummary: expectedSummary, + }, + { + name: "invalid for each task", + task: jobs.Task{ + ForEachTask: &jobs.ForEachTask{ + Task: jobs.Task{ + PythonWheelTask: &jobs.PythonWheelTask{}, + TaskKey: "my_task", + }, + }, + }, + errorPath: "resources.jobs.job1.tasks[0].for_each_task.task", + errorDetail: `Task "my_task" requires a cluster or an environment to run. +Specify one of the following fields: job_cluster_key, environment_key, existing_cluster_id, new_cluster.`, + errorSummary: expectedSummary, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + job := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{tc.task}, + }, + } + + b := createBundle(map[string]*resources.Job{"job1": job}) + diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), JobTaskClusterSpec()) + + if tc.errorPath != "" || tc.errorDetail != "" || tc.errorSummary != "" { + assert.Len(t, diags, 1) + assert.Len(t, diags[0].Paths, 1) + + diag := diags[0] + + assert.Equal(t, tc.errorPath, diag.Paths[0].String()) + assert.Equal(t, tc.errorSummary, diag.Summary) + assert.Equal(t, tc.errorDetail, diag.Detail) + } else { + assert.ElementsMatch(t, []string{}, diags) + } + }) + } +} + +func createBundle(jobs map[string]*resources.Job) *bundle.Bundle { + return &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: jobs, + }, + }, + } +} diff --git a/bundle/config/validate/validate.go b/bundle/config/validate/validate.go index b4da0bc05..79f42bd23 100644 --- a/bundle/config/validate/validate.go +++ b/bundle/config/validate/validate.go @@ -34,6 +34,7 @@ func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics JobClusterKeyDefined(), FilesToSync(), ValidateSyncPatterns(), + JobTaskClusterSpec(), )) } diff --git a/bundle/config/variable/variable.go b/bundle/config/variable/variable.go index ba94f9c8a..2362ad10d 100644 --- a/bundle/config/variable/variable.go +++ b/bundle/config/variable/variable.go @@ -16,6 +16,11 @@ const ( VariableTypeComplex VariableType = "complex" ) +// We alias it here to override the JSON schema associated with a variable value +// in a target override. This is because we allow for directly specifying the value +// in addition to the variable.Variable struct format in a target override. +type TargetVariable Variable + // An input variable for the bundle config type Variable struct { // A type of the variable. This is used to validate the value of the variable diff --git a/bundle/deploy/files/upload.go b/bundle/deploy/files/upload.go index 2c126623e..77b83611b 100644 --- a/bundle/deploy/files/upload.go +++ b/bundle/deploy/files/upload.go @@ -8,9 +8,12 @@ import ( "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/sync" ) -type upload struct{} +type upload struct { + outputHandler sync.OutputHandler +} func (m *upload) Name() string { return "files.Upload" @@ -18,11 +21,18 @@ func (m *upload) Name() string { func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { cmdio.LogString(ctx, fmt.Sprintf("Uploading bundle files to %s...", b.Config.Workspace.FilePath)) - sync, err := GetSync(ctx, bundle.ReadOnly(b)) + opts, err := GetSyncOptions(ctx, bundle.ReadOnly(b)) if err != nil { return diag.FromErr(err) } + opts.OutputHandler = m.outputHandler + sync, err := sync.New(ctx, *opts) + if err != nil { + return diag.FromErr(err) + } + defer sync.Close() + b.Files, err = sync.RunOnce(ctx) if err != nil { return diag.FromErr(err) @@ -32,6 +42,6 @@ func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return nil } -func Upload() bundle.Mutator { - return &upload{} +func Upload(outputHandler sync.OutputHandler) bundle.Mutator { + return &upload{outputHandler} } diff --git a/bundle/deploy/metadata/compute_test.go b/bundle/deploy/metadata/compute_test.go index 6d43f845b..2c2c72376 100644 --- a/bundle/deploy/metadata/compute_test.go +++ b/bundle/deploy/metadata/compute_test.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/internal/bundletest" "github.com/databricks/cli/bundle/metadata" + "github.com/databricks/cli/libs/dyn" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -55,9 +56,9 @@ func TestComputeMetadataMutator(t *testing.T) { }, } - bundletest.SetLocation(b, "resources.jobs.my-job-1", "a/b/c") - bundletest.SetLocation(b, "resources.jobs.my-job-2", "d/e/f") - bundletest.SetLocation(b, "resources.pipelines.my-pipeline", "abc") + bundletest.SetLocation(b, "resources.jobs.my-job-1", []dyn.Location{{File: "a/b/c"}}) + bundletest.SetLocation(b, "resources.jobs.my-job-2", []dyn.Location{{File: "d/e/f"}}) + bundletest.SetLocation(b, "resources.pipelines.my-pipeline", []dyn.Location{{File: "abc"}}) expectedMetadata := metadata.Metadata{ Version: metadata.Version, diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index f13c241ce..5a548e3b5 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -231,6 +231,13 @@ func BundleToTerraform(config *config.Root) *schema.Root { tfroot.Resource.QualityMonitor[k] = &dst } + for k, src := range config.Resources.Clusters { + noResources = false + var dst schema.ResourceCluster + conv(src, &dst) + tfroot.Resource.Cluster[k] = &dst + } + // We explicitly set "resource" to nil to omit it from a JSON encoding. // This is required because the terraform CLI requires >= 1 resources defined // if the "resource" property is used in a .tf.json file. @@ -394,6 +401,16 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { } cur.ID = instance.Attributes.ID config.Resources.Schemas[resource.Name] = cur + case "databricks_cluster": + if config.Resources.Clusters == nil { + config.Resources.Clusters = make(map[string]*resources.Cluster) + } + cur := config.Resources.Clusters[resource.Name] + if cur == nil { + cur = &resources.Cluster{ModifiedStatus: resources.ModifiedStatusDeleted} + } + cur.ID = instance.Attributes.ID + config.Resources.Clusters[resource.Name] = cur case "databricks_permissions": case "databricks_grants": // Ignore; no need to pull these back into the configuration. @@ -443,6 +460,11 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { src.ModifiedStatus = resources.ModifiedStatusCreated } } + for _, src := range config.Resources.Clusters { + if src.ModifiedStatus == "" && src.ID == "" { + src.ModifiedStatus = resources.ModifiedStatusCreated + } + } return nil } diff --git a/bundle/deploy/terraform/convert_test.go b/bundle/deploy/terraform/convert_test.go index e4ef6114a..4c6866d9d 100644 --- a/bundle/deploy/terraform/convert_test.go +++ b/bundle/deploy/terraform/convert_test.go @@ -663,6 +663,14 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "1"}}, }, }, + { + Type: "databricks_cluster", + Mode: "managed", + Name: "test_cluster", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "1"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -692,6 +700,9 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { assert.Equal(t, "1", config.Resources.Schemas["test_schema"].ID) assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Schemas["test_schema"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.Clusters["test_cluster"].ID) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Clusters["test_cluster"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -754,6 +765,13 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { }, }, }, + Clusters: map[string]*resources.Cluster{ + "test_cluster": { + ClusterSpec: &compute.ClusterSpec{ + ClusterName: "test_cluster", + }, + }, + }, }, } var tfState = resourcesState{ @@ -786,6 +804,9 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { assert.Equal(t, "", config.Resources.Schemas["test_schema"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Schemas["test_schema"].ModifiedStatus) + assert.Equal(t, "", config.Resources.Clusters["test_cluster"].ID) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Clusters["test_cluster"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -888,6 +909,18 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { }, }, }, + Clusters: map[string]*resources.Cluster{ + "test_cluster": { + ClusterSpec: &compute.ClusterSpec{ + ClusterName: "test_cluster", + }, + }, + "test_cluster_new": { + ClusterSpec: &compute.ClusterSpec{ + ClusterName: "test_cluster_new", + }, + }, + }, }, } var tfState = resourcesState{ @@ -1020,6 +1053,22 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "2"}}, }, }, + { + Type: "databricks_cluster", + Mode: "managed", + Name: "test_cluster", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "1"}}, + }, + }, + { + Type: "databricks_cluster", + Mode: "managed", + Name: "test_cluster_old", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "2"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -1081,6 +1130,13 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { assert.Equal(t, "", config.Resources.Schemas["test_schema_new"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Schemas["test_schema_new"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.Clusters["test_cluster"].ID) + assert.Equal(t, "", config.Resources.Clusters["test_cluster"].ModifiedStatus) + assert.Equal(t, "2", config.Resources.Clusters["test_cluster_old"].ID) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Clusters["test_cluster_old"].ModifiedStatus) + assert.Equal(t, "", config.Resources.Clusters["test_cluster_new"].ID) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Clusters["test_cluster_new"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } diff --git a/bundle/deploy/terraform/interpolate.go b/bundle/deploy/terraform/interpolate.go index faa098e1c..12894c684 100644 --- a/bundle/deploy/terraform/interpolate.go +++ b/bundle/deploy/terraform/interpolate.go @@ -58,6 +58,8 @@ func (m *interpolateMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.D path = dyn.NewPath(dyn.Key("databricks_quality_monitor")).Append(path[2:]...) case dyn.Key("schemas"): path = dyn.NewPath(dyn.Key("databricks_schema")).Append(path[2:]...) + case dyn.Key("clusters"): + path = dyn.NewPath(dyn.Key("databricks_cluster")).Append(path[2:]...) default: // Trigger "key not found" for unknown resource types. return dyn.GetByPath(root, path) diff --git a/bundle/deploy/terraform/interpolate_test.go b/bundle/deploy/terraform/interpolate_test.go index 5ceb243bc..630a904ac 100644 --- a/bundle/deploy/terraform/interpolate_test.go +++ b/bundle/deploy/terraform/interpolate_test.go @@ -31,6 +31,7 @@ func TestInterpolate(t *testing.T) { "other_model_serving": "${resources.model_serving_endpoints.other_model_serving.id}", "other_registered_model": "${resources.registered_models.other_registered_model.id}", "other_schema": "${resources.schemas.other_schema.id}", + "other_cluster": "${resources.clusters.other_cluster.id}", }, Tasks: []jobs.Task{ { @@ -67,6 +68,7 @@ func TestInterpolate(t *testing.T) { assert.Equal(t, "${databricks_model_serving.other_model_serving.id}", j.Tags["other_model_serving"]) assert.Equal(t, "${databricks_registered_model.other_registered_model.id}", j.Tags["other_registered_model"]) assert.Equal(t, "${databricks_schema.other_schema.id}", j.Tags["other_schema"]) + assert.Equal(t, "${databricks_cluster.other_cluster.id}", j.Tags["other_cluster"]) m := b.Config.Resources.Models["my_model"] assert.Equal(t, "my_model", m.Model.Name) diff --git a/bundle/deploy/terraform/tfdyn/convert_cluster.go b/bundle/deploy/terraform/tfdyn/convert_cluster.go new file mode 100644 index 000000000..f25f09ea8 --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_cluster.go @@ -0,0 +1,52 @@ +package tfdyn + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go/service/compute" +) + +func convertClusterResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { + // Normalize the output value to the target schema. + vout, diags := convert.Normalize(compute.ClusterSpec{}, vin) + for _, diag := range diags { + log.Debugf(ctx, "cluster normalization diagnostic: %s", diag.Summary) + } + + return vout, nil +} + +type clusterConverter struct{} + +func (clusterConverter) Convert(ctx context.Context, key string, vin dyn.Value, out *schema.Resources) error { + vout, err := convertClusterResource(ctx, vin) + if err != nil { + return err + } + + // We always set no_wait as it allows DABs not to wait for cluster to be started. + vout, err = dyn.Set(vout, "no_wait", dyn.V(true)) + if err != nil { + return err + } + + // Add the converted resource to the output. + out.Cluster[key] = vout.AsAny() + + // Configure permissions for this resource. + if permissions := convertPermissionsResource(ctx, vin); permissions != nil { + permissions.JobId = fmt.Sprintf("${databricks_cluster.%s.id}", key) + out.Permissions["cluster_"+key] = permissions + } + + return nil +} + +func init() { + registerConverter("clusters", clusterConverter{}) +} diff --git a/bundle/deploy/terraform/tfdyn/convert_cluster_test.go b/bundle/deploy/terraform/tfdyn/convert_cluster_test.go new file mode 100644 index 000000000..e7d2542fd --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_cluster_test.go @@ -0,0 +1,97 @@ +package tfdyn + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestConvertCluster(t *testing.T) { + var src = resources.Cluster{ + ClusterSpec: &compute.ClusterSpec{ + NumWorkers: 3, + SparkVersion: "13.3.x-scala2.12", + ClusterName: "cluster", + SparkConf: map[string]string{ + "spark.executor.memory": "2g", + }, + AwsAttributes: &compute.AwsAttributes{ + Availability: "ON_DEMAND", + }, + AzureAttributes: &compute.AzureAttributes{ + Availability: "SPOT", + }, + DataSecurityMode: "USER_ISOLATION", + NodeTypeId: "m5.xlarge", + Autoscale: &compute.AutoScale{ + MinWorkers: 1, + MaxWorkers: 10, + }, + }, + + Permissions: []resources.Permission{ + { + Level: "CAN_RUN", + UserName: "jack@gmail.com", + }, + { + Level: "CAN_MANAGE", + ServicePrincipalName: "sp", + }, + }, + } + + vin, err := convert.FromTyped(src, dyn.NilValue) + require.NoError(t, err) + + ctx := context.Background() + out := schema.NewResources() + err = clusterConverter{}.Convert(ctx, "my_cluster", vin, out) + require.NoError(t, err) + + cluster := out.Cluster["my_cluster"] + assert.Equal(t, map[string]any{ + "num_workers": int64(3), + "spark_version": "13.3.x-scala2.12", + "cluster_name": "cluster", + "spark_conf": map[string]any{ + "spark.executor.memory": "2g", + }, + "aws_attributes": map[string]any{ + "availability": "ON_DEMAND", + }, + "azure_attributes": map[string]any{ + "availability": "SPOT", + }, + "data_security_mode": "USER_ISOLATION", + "no_wait": true, + "node_type_id": "m5.xlarge", + "autoscale": map[string]any{ + "min_workers": int64(1), + "max_workers": int64(10), + }, + }, cluster) + + // Assert equality on the permissions + assert.Equal(t, &schema.ResourcePermissions{ + JobId: "${databricks_cluster.my_cluster.id}", + AccessControl: []schema.ResourcePermissionsAccessControl{ + { + PermissionLevel: "CAN_RUN", + UserName: "jack@gmail.com", + }, + { + PermissionLevel: "CAN_MANAGE", + ServicePrincipalName: "sp", + }, + }, + }, out.Permissions["cluster_my_cluster"]) + +} diff --git a/bundle/internal/bundle/schema/main.go b/bundle/internal/bundle/schema/main.go deleted file mode 100644 index c9cc7cd4f..000000000 --- a/bundle/internal/bundle/schema/main.go +++ /dev/null @@ -1,42 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "log" - "os" - - "github.com/databricks/cli/bundle/schema" -) - -func main() { - if len(os.Args) != 2 { - fmt.Println("Usage: go run main.go ") - os.Exit(1) - } - - // Output file, to write the generated schema descriptions to. - outputFile := os.Args[1] - - // Input file, the databricks openapi spec. - inputFile := os.Getenv("DATABRICKS_OPENAPI_SPEC") - if inputFile == "" { - log.Fatal("DATABRICKS_OPENAPI_SPEC environment variable not set") - } - - // Generate the schema descriptions. - docs, err := schema.UpdateBundleDescriptions(inputFile) - if err != nil { - log.Fatal(err) - } - result, err := json.MarshalIndent(docs, "", " ") - if err != nil { - log.Fatal(err) - } - - // Write the schema descriptions to the output file. - err = os.WriteFile(outputFile, result, 0644) - if err != nil { - log.Fatal(err) - } -} diff --git a/bundle/internal/bundletest/location.go b/bundle/internal/bundletest/location.go index 380d6e17d..2ffd621bf 100644 --- a/bundle/internal/bundletest/location.go +++ b/bundle/internal/bundletest/location.go @@ -8,15 +8,13 @@ import ( // SetLocation sets the location of all values in the bundle to the given path. // This is useful for testing where we need to associate configuration // with the path it is loaded from. -func SetLocation(b *bundle.Bundle, prefix string, filePath string) { +func SetLocation(b *bundle.Bundle, prefix string, locations []dyn.Location) { start := dyn.MustPathFromString(prefix) b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { return dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { // If the path has the given prefix, set the location. if p.HasPrefix(start) { - return v.WithLocations([]dyn.Location{{ - File: filePath, - }}), nil + return v.WithLocations(locations), nil } // The path is not nested under the given prefix. diff --git a/bundle/internal/schema/main.go b/bundle/internal/schema/main.go new file mode 100644 index 000000000..4a2371472 --- /dev/null +++ b/bundle/internal/schema/main.go @@ -0,0 +1,109 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "os" + "reflect" + + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/variable" + "github.com/databricks/cli/libs/jsonschema" +) + +func interpolationPattern(s string) string { + return fmt.Sprintf(`\$\{(%s(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\[[0-9]+\])*)+)\}`, s) +} + +func addInterpolationPatterns(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + if typ == reflect.TypeOf(config.Root{}) || typ == reflect.TypeOf(variable.Variable{}) { + return s + } + + // The variables block in a target override allows for directly specifying + // the value of the variable. + if typ == reflect.TypeOf(variable.TargetVariable{}) { + return jsonschema.Schema{ + AnyOf: []jsonschema.Schema{ + // We keep the original schema so that autocomplete suggestions + // continue to work. + s, + // All values are valid for a variable value, be it primitive types + // like string/bool or complex ones like objects/arrays. Thus we override + // the schema to allow all valid JSON values. + {}, + }, + } + } + + switch s.Type { + case jsonschema.ArrayType, jsonschema.ObjectType: + // arrays and objects can have complex variable values specified. + return jsonschema.Schema{ + AnyOf: []jsonschema.Schema{ + s, + { + Type: jsonschema.StringType, + Pattern: interpolationPattern("var"), + }}, + } + case jsonschema.IntegerType, jsonschema.NumberType, jsonschema.BooleanType: + // primitives can have variable values, or references like ${bundle.xyz} + // or ${workspace.xyz} + return jsonschema.Schema{ + AnyOf: []jsonschema.Schema{ + s, + {Type: jsonschema.StringType, Pattern: interpolationPattern("resources")}, + {Type: jsonschema.StringType, Pattern: interpolationPattern("bundle")}, + {Type: jsonschema.StringType, Pattern: interpolationPattern("workspace")}, + {Type: jsonschema.StringType, Pattern: interpolationPattern("artifacts")}, + {Type: jsonschema.StringType, Pattern: interpolationPattern("var")}, + }, + } + default: + return s + } +} + +func main() { + if len(os.Args) != 2 { + fmt.Println("Usage: go run main.go ") + os.Exit(1) + } + + // Output file, where the generated JSON schema will be written to. + outputFile := os.Args[1] + + // Input file, the databricks openapi spec. + inputFile := os.Getenv("DATABRICKS_OPENAPI_SPEC") + if inputFile == "" { + log.Fatal("DATABRICKS_OPENAPI_SPEC environment variable not set") + } + + p, err := newParser(inputFile) + if err != nil { + log.Fatal(err) + } + + // Generate the JSON schema from the bundle Go struct. + s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ + p.addDescriptions, + p.addEnums, + addInterpolationPatterns, + }) + if err != nil { + log.Fatal(err) + } + + b, err := json.MarshalIndent(s, "", " ") + if err != nil { + log.Fatal(err) + } + + // Write the schema descriptions to the output file. + err = os.WriteFile(outputFile, b, 0644) + if err != nil { + log.Fatal(err) + } +} diff --git a/bundle/internal/schema/parser.go b/bundle/internal/schema/parser.go new file mode 100644 index 000000000..ef3d6e719 --- /dev/null +++ b/bundle/internal/schema/parser.go @@ -0,0 +1,123 @@ +package main + +import ( + "encoding/json" + "fmt" + "os" + "path" + "reflect" + "strings" + + "github.com/databricks/cli/libs/jsonschema" +) + +type Components struct { + Schemas map[string]jsonschema.Schema `json:"schemas,omitempty"` +} + +type Specification struct { + Components Components `json:"components"` +} + +type openapiParser struct { + ref map[string]jsonschema.Schema +} + +func newParser(path string) (*openapiParser, error) { + b, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + spec := Specification{} + err = json.Unmarshal(b, &spec) + if err != nil { + return nil, err + } + + p := &openapiParser{} + p.ref = spec.Components.Schemas + return p, nil +} + +// This function checks if the input type: +// 1. Is a Databricks Go SDK type. +// 2. Has a Databricks Go SDK type embedded in it. +// +// If the above conditions are met, the function returns the JSON schema +// corresponding to the Databricks Go SDK type from the OpenAPI spec. +func (p *openapiParser) findRef(typ reflect.Type) (jsonschema.Schema, bool) { + typs := []reflect.Type{typ} + + // Check for embedded Databricks Go SDK types. + if typ.Kind() == reflect.Struct { + for i := 0; i < typ.NumField(); i++ { + if !typ.Field(i).Anonymous { + continue + } + + // Deference current type if it's a pointer. + ctyp := typ.Field(i).Type + for ctyp.Kind() == reflect.Ptr { + ctyp = ctyp.Elem() + } + + typs = append(typs, ctyp) + } + } + + for _, ctyp := range typs { + // Skip if it's not a Go SDK type. + if !strings.HasPrefix(ctyp.PkgPath(), "github.com/databricks/databricks-sdk-go") { + continue + } + + pkgName := path.Base(ctyp.PkgPath()) + k := fmt.Sprintf("%s.%s", pkgName, ctyp.Name()) + + // Skip if the type is not in the openapi spec. + _, ok := p.ref[k] + if !ok { + continue + } + + // Return the first Go SDK type found in the openapi spec. + return p.ref[k], true + } + + return jsonschema.Schema{}, false +} + +// Use the OpenAPI spec to load descriptions for the given type. +func (p *openapiParser) addDescriptions(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + ref, ok := p.findRef(typ) + if !ok { + return s + } + + s.Description = ref.Description + for k, v := range s.Properties { + if refProp, ok := ref.Properties[k]; ok { + v.Description = refProp.Description + } + } + + return s +} + +// Use the OpenAPI spec add enum values for the given type. +func (p *openapiParser) addEnums(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + ref, ok := p.findRef(typ) + if !ok { + return s + } + + s.Enum = append(s.Enum, ref.Enum...) + for k, v := range s.Properties { + if refProp, ok := ref.Properties[k]; ok { + v.Enum = append(v.Enum, refProp.Enum...) + } + } + + return s +} diff --git a/bundle/internal/schema/testdata/fail/basic.yml b/bundle/internal/schema/testdata/fail/basic.yml new file mode 100644 index 000000000..5ab981e3c --- /dev/null +++ b/bundle/internal/schema/testdata/fail/basic.yml @@ -0,0 +1,3 @@ +bundle: + # expected type is 'string' + name: 1234 diff --git a/bundle/internal/schema/testdata/fail/invalid_enum_value_in_job.yml b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_job.yml new file mode 100644 index 000000000..92b1e9fcf --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_job.yml @@ -0,0 +1,4 @@ +resources: + jobs: + myjob: + format: INVALID_VALUE diff --git a/bundle/internal/schema/testdata/fail/invalid_enum_value_in_model.yml b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_model.yml new file mode 100644 index 000000000..278b238f4 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_model.yml @@ -0,0 +1,6 @@ +resources: + models: + mymodel: + latest_versions: + - creation_timestamp: 123 + status: INVALID_VALUE diff --git a/bundle/internal/schema/testdata/fail/invalid_reference_in_job.yml b/bundle/internal/schema/testdata/fail/invalid_reference_in_job.yml new file mode 100644 index 000000000..2e0e2d84f --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_reference_in_job.yml @@ -0,0 +1,8 @@ +resources: + jobs: + outer: + name: outer job + tasks: + - task_key: run job task 1 + run_job_task: + job_id: ${invalid.reference} diff --git a/bundle/internal/schema/testdata/fail/invalid_reference_in_model.yml b/bundle/internal/schema/testdata/fail/invalid_reference_in_model.yml new file mode 100644 index 000000000..899d6d850 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_reference_in_model.yml @@ -0,0 +1,5 @@ +resources: + models: + mymodel: + latest_versions: + - creation_timestamp: ${invalid.reference} diff --git a/bundle/internal/schema/testdata/fail/required_field_missing_in_job.yml b/bundle/internal/schema/testdata/fail/required_field_missing_in_job.yml new file mode 100644 index 000000000..ebb8ecf6b --- /dev/null +++ b/bundle/internal/schema/testdata/fail/required_field_missing_in_job.yml @@ -0,0 +1,9 @@ +resources: + jobs: + foo: + name: my job + tasks: + # All tasks need to have a task_key. + - notebook_task: + notebook_path: /Users/abc/notebooks/inner + existing_cluster_id: abcd diff --git a/bundle/internal/schema/testdata/fail/unknown_field_in_job.yml b/bundle/internal/schema/testdata/fail/unknown_field_in_job.yml new file mode 100644 index 000000000..7e7e0d2d3 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/unknown_field_in_job.yml @@ -0,0 +1,5 @@ +resources: + jobs: + myjob: + # unknown fields should cause schema failure. + unknown_field: "value" diff --git a/bundle/internal/schema/testdata/fail/unknown_field_in_model.yml b/bundle/internal/schema/testdata/fail/unknown_field_in_model.yml new file mode 100644 index 000000000..a00c20935 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/unknown_field_in_model.yml @@ -0,0 +1,6 @@ +resources: + models: + mymodel: + creation_timestamp: 123 + description: "my model" + unknown: "value" diff --git a/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml b/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml new file mode 100644 index 000000000..e8a8866bc --- /dev/null +++ b/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml @@ -0,0 +1 @@ +unknown: value diff --git a/bundle/internal/schema/testdata/pass/artifact_references.yml b/bundle/internal/schema/testdata/pass/artifact_references.yml new file mode 100644 index 000000000..c9b137633 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/artifact_references.yml @@ -0,0 +1,11 @@ +artifacts: + abc: + path: /Workspace/a/b/c + type: wheel + files: + - source: ./x.whl + +resources: + jobs: + foo: + name: ${artifacts.abc.type} diff --git a/bundle/internal/schema/testdata/pass/basic.yml b/bundle/internal/schema/testdata/pass/basic.yml new file mode 100644 index 000000000..de02d20bc --- /dev/null +++ b/bundle/internal/schema/testdata/pass/basic.yml @@ -0,0 +1,2 @@ +bundle: + name: basic diff --git a/bundle/internal/schema/testdata/pass/direct_value_in_target.yml b/bundle/internal/schema/testdata/pass/direct_value_in_target.yml new file mode 100644 index 000000000..5033d8cd9 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/direct_value_in_target.yml @@ -0,0 +1,4 @@ +targets: + development: + variables: + myvar: value diff --git a/bundle/internal/schema/testdata/pass/job.yml b/bundle/internal/schema/testdata/pass/job.yml new file mode 100644 index 000000000..d9b0e832f --- /dev/null +++ b/bundle/internal/schema/testdata/pass/job.yml @@ -0,0 +1,63 @@ +bundle: + name: a job + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: true + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +run_as: + service_principal_name: myserviceprincipal + +resources: + jobs: + myjob: + name: myjob + continuous: + pause_status: PAUSED + edit_mode: EDITABLE + max_concurrent_runs: 10 + description: "my job description" + email_notifications: + no_alert_for_skipped_runs: true + environments: + - environment_key: venv + spec: + dependencies: + - python=3.7 + client: "myclient" + format: MULTI_TASK + tags: + foo: bar + bar: baz + tasks: + - task_key: mytask + notebook_task: + notebook_path: ${var.simplevar} + existing_cluster_id: abcd + - task_key: mytask2 + for_each_task: + inputs: av + concurrency: 10 + task: + task_key: inside_for_each + notebook_task: + notebook_path: ${var.complexvar.key3[0]} + - ${var.complexvar} diff --git a/bundle/internal/schema/testdata/pass/ml.yml b/bundle/internal/schema/testdata/pass/ml.yml new file mode 100644 index 000000000..b1558f101 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/ml.yml @@ -0,0 +1,72 @@ +bundle: + name: ML + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: "true" + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +resources: + models: + mymodel: + creation_timestamp: 123 + description: "my model" + latest_versions: + - creation_timestamp: 123 + tags: ${var.complexvar.key1} + status: READY + permissions: + - service_principal_name: myserviceprincipal + level: CAN_MANAGE + + experiments: + myexperiment: + artifact_location: /dbfs/myexperiment + last_update_time: ${var.complexvar.key2} + lifecycle_stage: ${var.simplevar} + permissions: + - service_principal_name: myserviceprincipal + level: CAN_MANAGE + + model_serving_endpoints: + myendpoint: + config: + served_models: + - model_name: ${resources.models.mymodel.name} + model_version: abc + scale_to_zero_enabled: true + workload_size: Large + name: myendpoint + + schemas: + myschema: + catalog_name: mycatalog + name: myschema + + registered_models: + myregisteredmodel: + catalog_name: mycatalog + name: myregisteredmodel + schema_name: ${resources.schemas.myschema.name} + grants: + - principal: abcd + privileges: + - SELECT + - INSERT diff --git a/bundle/internal/schema/testdata/pass/pipeline.yml b/bundle/internal/schema/testdata/pass/pipeline.yml new file mode 100644 index 000000000..1b2b1a10f --- /dev/null +++ b/bundle/internal/schema/testdata/pass/pipeline.yml @@ -0,0 +1,54 @@ +bundle: + name: a pipeline + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: true + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +artifacts: + mywheel: + path: ./mywheel.whl + type: WHEEL + +run_as: + service_principal_name: myserviceprincipal + +resources: + jobs: + myjob: + name: myjob + tasks: + - task_key: ${bundle.name} pipeline trigger + pipeline_task: + pipeline_id: ${resources.mypipeline.id} + + pipelines: + mypipeline: + name: mypipeline + libraries: + - whl: ./mywheel.whl + catalog: 3{var.complexvar.key2} + development: true + clusters: + - autoscale: + mode: ENHANCED + max_workers: 10 + min_workers: 1 diff --git a/bundle/internal/schema/testdata/pass/quality_monitor.yml b/bundle/internal/schema/testdata/pass/quality_monitor.yml new file mode 100644 index 000000000..a9be59329 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/quality_monitor.yml @@ -0,0 +1,16 @@ +bundle: + name: quality_monitor + +resources: + quality_monitors: + myqualitymonitor: + inference_log: + granularities: + - a + - b + model_id_col: a + prediction_col: b + timestamp_col: c + problem_type: PROBLEM_TYPE_CLASSIFICATION + assets_dir: /dbfs/mnt/abc + output_schema_name: default diff --git a/bundle/internal/schema/testdata/pass/run_job_task.yml b/bundle/internal/schema/testdata/pass/run_job_task.yml new file mode 100644 index 000000000..be2ca22cd --- /dev/null +++ b/bundle/internal/schema/testdata/pass/run_job_task.yml @@ -0,0 +1,56 @@ +bundle: + name: a run job task + databricks_cli_version: 0.200.0 + compute_id: "mycompute" + + +variables: + simplevar: + default: 5678 + description: "simplevar description" + + complexvar: + default: + key1: 1234 + key2: value2 + key3: + - value3 + - 9999 + description: "complexvar description" + +resources: + jobs: + inner: + permissions: + - user_name: user1 + level: CAN_MANAGE + + name: inner job + tasks: + - task_key: inner notebook task + notebook_task: + notebook_path: /Users/abc/notebooks/inner + existing_cluster_id: abcd + + outer: + name: outer job + tasks: + - task_key: run job task 1 + run_job_task: + job_id: 1234 + + - task_key: run job task 2 + run_job_task: + job_id: ${var.complexvar.key1} + + - task_key: run job task 3 + run_job_task: + job_id: ${var.simplevar} + + - task_key: run job task 4 + run_job_task: + job_id: ${resources.inner.id} + + - task_key: run job task 5 + run_job_task: + job_id: ${var.complexvar.key3[1]} diff --git a/bundle/internal/schema/testdata/pass/schema.yml b/bundle/internal/schema/testdata/pass/schema.yml new file mode 100644 index 000000000..37d0f6f7a --- /dev/null +++ b/bundle/internal/schema/testdata/pass/schema.yml @@ -0,0 +1,24 @@ +bundle: + name: basic + +variables: + complexvar: + default: + key1: 1234 + key2: value2 + key3: + - value3 + - 9999 + description: complexvar description + +resources: + schemas: + myschema: + name: myschema + catalog_name: main + grants: + - ${var.complexvar} + - principal: ${workspace.current_user.me} + privileges: + - ${var.complexvar.key3[0]} + - ${var.complexvar.key2} diff --git a/bundle/internal/tf/codegen/generator/generator.go b/bundle/internal/tf/codegen/generator/generator.go index 86d762439..b31fdf153 100644 --- a/bundle/internal/tf/codegen/generator/generator.go +++ b/bundle/internal/tf/codegen/generator/generator.go @@ -51,9 +51,15 @@ func (r *root) Generate(path string) error { } func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error { - // Generate types for resources. + // Generate types for resources var resources []*namedBlock for _, k := range sortKeys(schema.ResourceSchemas) { + // Skipping all plugin framework struct generation. + // TODO: This is a temporary fix, generation should be fixed in the future. + if strings.HasSuffix(k, "_pluginframework") { + continue + } + v := schema.ResourceSchemas[k] b := &namedBlock{ filePattern: "resource_%s.go", @@ -71,6 +77,12 @@ func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error // Generate types for data sources. var dataSources []*namedBlock for _, k := range sortKeys(schema.DataSourceSchemas) { + // Skipping all plugin framework struct generation. + // TODO: This is a temporary fix, generation should be fixed in the future. + if strings.HasSuffix(k, "_pluginframework") { + continue + } + v := schema.DataSourceSchemas[k] b := &namedBlock{ filePattern: "data_source_%s.go", diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index efb297243..b71ea7d1c 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.50.0" +const ProviderVersion = "1.52.0" diff --git a/bundle/internal/tf/schema/data_source_clusters.go b/bundle/internal/tf/schema/data_source_clusters.go index 7a5f3053d..8c5f9578e 100644 --- a/bundle/internal/tf/schema/data_source_clusters.go +++ b/bundle/internal/tf/schema/data_source_clusters.go @@ -2,8 +2,16 @@ package schema -type DataSourceClusters struct { - ClusterNameContains string `json:"cluster_name_contains,omitempty"` - Id string `json:"id,omitempty"` - Ids []string `json:"ids,omitempty"` +type DataSourceClustersFilterBy struct { + ClusterSources []string `json:"cluster_sources,omitempty"` + ClusterStates []string `json:"cluster_states,omitempty"` + IsPinned bool `json:"is_pinned,omitempty"` + PolicyId string `json:"policy_id,omitempty"` +} + +type DataSourceClusters struct { + ClusterNameContains string `json:"cluster_name_contains,omitempty"` + Id string `json:"id,omitempty"` + Ids []string `json:"ids,omitempty"` + FilterBy *DataSourceClustersFilterBy `json:"filter_by,omitempty"` } diff --git a/bundle/internal/tf/schema/data_source_external_location.go b/bundle/internal/tf/schema/data_source_external_location.go index a3e78cbd3..e1ad9dc3d 100644 --- a/bundle/internal/tf/schema/data_source_external_location.go +++ b/bundle/internal/tf/schema/data_source_external_location.go @@ -19,6 +19,7 @@ type DataSourceExternalLocationExternalLocationInfo struct { CreatedBy string `json:"created_by,omitempty"` CredentialId string `json:"credential_id,omitempty"` CredentialName string `json:"credential_name,omitempty"` + Fallback bool `json:"fallback,omitempty"` IsolationMode string `json:"isolation_mode,omitempty"` MetastoreId string `json:"metastore_id,omitempty"` Name string `json:"name,omitempty"` diff --git a/bundle/internal/tf/schema/data_source_share.go b/bundle/internal/tf/schema/data_source_share.go index 3b40fbb51..da9afaaef 100644 --- a/bundle/internal/tf/schema/data_source_share.go +++ b/bundle/internal/tf/schema/data_source_share.go @@ -18,12 +18,14 @@ type DataSourceShareObject struct { AddedBy string `json:"added_by,omitempty"` CdfEnabled bool `json:"cdf_enabled,omitempty"` Comment string `json:"comment,omitempty"` + Content string `json:"content,omitempty"` DataObjectType string `json:"data_object_type"` HistoryDataSharingStatus string `json:"history_data_sharing_status,omitempty"` Name string `json:"name"` SharedAs string `json:"shared_as,omitempty"` StartVersion int `json:"start_version,omitempty"` Status string `json:"status,omitempty"` + StringSharedAs string `json:"string_shared_as,omitempty"` Partition []DataSourceShareObjectPartition `json:"partition,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go b/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go index e95639de8..5d7f6a140 100644 --- a/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go +++ b/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go @@ -2,20 +2,14 @@ package schema -type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceEnablementDetails struct { - ForcedForComplianceMode bool `json:"forced_for_compliance_mode,omitempty"` - UnavailableForDisabledEntitlement bool `json:"unavailable_for_disabled_entitlement,omitempty"` - UnavailableForNonEnterpriseTier bool `json:"unavailable_for_non_enterprise_tier,omitempty"` -} - type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindowWeekDayBasedScheduleWindowStartTime struct { - Hours int `json:"hours,omitempty"` - Minutes int `json:"minutes,omitempty"` + Hours int `json:"hours"` + Minutes int `json:"minutes"` } type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindowWeekDayBasedSchedule struct { - DayOfWeek string `json:"day_of_week,omitempty"` - Frequency string `json:"frequency,omitempty"` + DayOfWeek string `json:"day_of_week"` + Frequency string `json:"frequency"` WindowStartTime *ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindowWeekDayBasedScheduleWindowStartTime `json:"window_start_time,omitempty"` } @@ -25,9 +19,9 @@ type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspa type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspace struct { CanToggle bool `json:"can_toggle,omitempty"` - Enabled bool `json:"enabled,omitempty"` + Enabled bool `json:"enabled"` + EnablementDetails []any `json:"enablement_details,omitempty"` RestartEvenIfNoUpdatesAvailable bool `json:"restart_even_if_no_updates_available,omitempty"` - EnablementDetails *ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceEnablementDetails `json:"enablement_details,omitempty"` MaintenanceWindow *ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindow `json:"maintenance_window,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_cluster.go b/bundle/internal/tf/schema/resource_cluster.go index e4106d049..4ae063c89 100644 --- a/bundle/internal/tf/schema/resource_cluster.go +++ b/bundle/internal/tf/schema/resource_cluster.go @@ -176,6 +176,7 @@ type ResourceCluster struct { IdempotencyToken string `json:"idempotency_token,omitempty"` InstancePoolId string `json:"instance_pool_id,omitempty"` IsPinned bool `json:"is_pinned,omitempty"` + NoWait bool `json:"no_wait,omitempty"` NodeTypeId string `json:"node_type_id,omitempty"` NumWorkers int `json:"num_workers,omitempty"` PolicyId string `json:"policy_id,omitempty"` diff --git a/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go b/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go index 50815f753..8265adaed 100644 --- a/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go +++ b/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go @@ -3,8 +3,8 @@ package schema type ResourceComplianceSecurityProfileWorkspaceSettingComplianceSecurityProfileWorkspace struct { - ComplianceStandards []string `json:"compliance_standards,omitempty"` - IsEnabled bool `json:"is_enabled,omitempty"` + ComplianceStandards []string `json:"compliance_standards"` + IsEnabled bool `json:"is_enabled"` } type ResourceComplianceSecurityProfileWorkspaceSetting struct { diff --git a/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go b/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go index 2f552402a..e9c3b0abb 100644 --- a/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go +++ b/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go @@ -3,7 +3,7 @@ package schema type ResourceEnhancedSecurityMonitoringWorkspaceSettingEnhancedSecurityMonitoringWorkspace struct { - IsEnabled bool `json:"is_enabled,omitempty"` + IsEnabled bool `json:"is_enabled"` } type ResourceEnhancedSecurityMonitoringWorkspaceSetting struct { diff --git a/bundle/internal/tf/schema/resource_model_serving.go b/bundle/internal/tf/schema/resource_model_serving.go index 379807a5d..29d55cd5f 100644 --- a/bundle/internal/tf/schema/resource_model_serving.go +++ b/bundle/internal/tf/schema/resource_model_serving.go @@ -95,14 +95,16 @@ type ResourceModelServingConfigServedEntities struct { } type ResourceModelServingConfigServedModels struct { - EnvironmentVars map[string]string `json:"environment_vars,omitempty"` - InstanceProfileArn string `json:"instance_profile_arn,omitempty"` - ModelName string `json:"model_name"` - ModelVersion string `json:"model_version"` - Name string `json:"name,omitempty"` - ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` - WorkloadSize string `json:"workload_size"` - WorkloadType string `json:"workload_type,omitempty"` + EnvironmentVars map[string]string `json:"environment_vars,omitempty"` + InstanceProfileArn string `json:"instance_profile_arn,omitempty"` + MaxProvisionedThroughput int `json:"max_provisioned_throughput,omitempty"` + MinProvisionedThroughput int `json:"min_provisioned_throughput,omitempty"` + ModelName string `json:"model_name"` + ModelVersion string `json:"model_version"` + Name string `json:"name,omitempty"` + ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` + WorkloadSize string `json:"workload_size,omitempty"` + WorkloadType string `json:"workload_type,omitempty"` } type ResourceModelServingConfigTrafficConfigRoutes struct { diff --git a/bundle/internal/tf/schema/resource_share.go b/bundle/internal/tf/schema/resource_share.go index e531e7770..37f4d4546 100644 --- a/bundle/internal/tf/schema/resource_share.go +++ b/bundle/internal/tf/schema/resource_share.go @@ -18,20 +18,27 @@ type ResourceShareObject struct { AddedBy string `json:"added_by,omitempty"` CdfEnabled bool `json:"cdf_enabled,omitempty"` Comment string `json:"comment,omitempty"` + Content string `json:"content,omitempty"` DataObjectType string `json:"data_object_type"` HistoryDataSharingStatus string `json:"history_data_sharing_status,omitempty"` Name string `json:"name"` SharedAs string `json:"shared_as,omitempty"` StartVersion int `json:"start_version,omitempty"` Status string `json:"status,omitempty"` + StringSharedAs string `json:"string_shared_as,omitempty"` Partition []ResourceShareObjectPartition `json:"partition,omitempty"` } type ResourceShare struct { - CreatedAt int `json:"created_at,omitempty"` - CreatedBy string `json:"created_by,omitempty"` - Id string `json:"id,omitempty"` - Name string `json:"name"` - Owner string `json:"owner,omitempty"` - Object []ResourceShareObject `json:"object,omitempty"` + Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + Id string `json:"id,omitempty"` + Name string `json:"name"` + Owner string `json:"owner,omitempty"` + StorageLocation string `json:"storage_location,omitempty"` + StorageRoot string `json:"storage_root,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` + Object []ResourceShareObject `json:"object,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_sql_table.go b/bundle/internal/tf/schema/resource_sql_table.go index 51fb3bc0d..4f305c52e 100644 --- a/bundle/internal/tf/schema/resource_sql_table.go +++ b/bundle/internal/tf/schema/resource_sql_table.go @@ -15,6 +15,7 @@ type ResourceSqlTable struct { ClusterKeys []string `json:"cluster_keys,omitempty"` Comment string `json:"comment,omitempty"` DataSourceFormat string `json:"data_source_format,omitempty"` + EffectiveProperties map[string]string `json:"effective_properties,omitempty"` Id string `json:"id,omitempty"` Name string `json:"name"` Options map[string]string `json:"options,omitempty"` diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index ebdb7f095..5fc34d6b4 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.50.0" +const ProviderVersion = "1.52.0" func NewRoot() *Root { return &Root{ diff --git a/bundle/libraries/expand_glob_references_test.go b/bundle/libraries/expand_glob_references_test.go index e7f2e1693..2dfbddb74 100644 --- a/bundle/libraries/expand_glob_references_test.go +++ b/bundle/libraries/expand_glob_references_test.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/internal/bundletest" "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/dyn" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/require" @@ -61,7 +62,7 @@ func TestGlobReferencesExpandedForTaskLibraries(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, ExpandGlobReferences()) require.Empty(t, diags) @@ -146,7 +147,7 @@ func TestGlobReferencesExpandedForForeachTaskLibraries(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, ExpandGlobReferences()) require.Empty(t, diags) @@ -221,7 +222,7 @@ func TestGlobReferencesExpandedForEnvironmentsDeps(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, ExpandGlobReferences()) require.Empty(t, diags) diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 49544227e..097c561eb 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -18,6 +18,7 @@ import ( "github.com/databricks/cli/bundle/python" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/sync" terraformlib "github.com/databricks/cli/libs/terraform" tfjson "github.com/hashicorp/terraform-json" ) @@ -128,7 +129,7 @@ properties such as the 'catalog' or 'storage' are changed:` } // The deploy phase deploys artifacts and resources. -func Deploy() bundle.Mutator { +func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { // Core mutators that CRUD resources and modify deployment state. These // mutators need informed consent if they are potentially destructive. deployCore := bundle.Defer( @@ -157,7 +158,7 @@ func Deploy() bundle.Mutator { libraries.ExpandGlobReferences(), libraries.Upload(), python.TransformWheelTask(), - files.Upload(), + files.Upload(outputHandler), deploy.StateUpdate(), deploy.StatePush(), permissions.ApplyWorkspaceRootPermissions(), diff --git a/bundle/schema/README.md b/bundle/schema/README.md deleted file mode 100644 index bf6b87df6..000000000 --- a/bundle/schema/README.md +++ /dev/null @@ -1,18 +0,0 @@ -### Overview - -`docs/bundle_descriptions.json` contains both autogenerated as well as manually written -descriptions for the json schema. Specifically -1. `resources` : almost all descriptions are autogenerated from the OpenAPI spec -2. `targets` : almost all descriptions are copied over from root level entities (eg: `bundle`, `artifacts`) -3. `bundle` : manually editted -4. `include` : manually editted -5. `workspace` : manually editted -6. `artifacts` : manually editted - -These descriptions are rendered in the inline documentation in an IDE - -### SOP: Add schema descriptions for new fields in bundle config - -Manually edit bundle_descriptions.json to add your descriptions. Note that the -descriptions in `resources` block is generated from the OpenAPI spec, and thus -any changes there will be overwritten. diff --git a/bundle/schema/docs.go b/bundle/schema/docs.go deleted file mode 100644 index 6e9289f92..000000000 --- a/bundle/schema/docs.go +++ /dev/null @@ -1,109 +0,0 @@ -package schema - -import ( - _ "embed" - "encoding/json" - "fmt" - "os" - "reflect" - - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/libs/jsonschema" -) - -// A subset of Schema struct -type Docs struct { - Description string `json:"description"` - Properties map[string]*Docs `json:"properties,omitempty"` - Items *Docs `json:"items,omitempty"` - AdditionalProperties *Docs `json:"additionalproperties,omitempty"` -} - -//go:embed docs/bundle_descriptions.json -var bundleDocs []byte - -func (docs *Docs) refreshTargetsDocs() error { - targetsDocs, ok := docs.Properties["targets"] - if !ok || targetsDocs.AdditionalProperties == nil || - targetsDocs.AdditionalProperties.Properties == nil { - return fmt.Errorf("invalid targets descriptions") - } - targetProperties := targetsDocs.AdditionalProperties.Properties - propertiesToCopy := []string{"artifacts", "bundle", "resources", "workspace"} - for _, p := range propertiesToCopy { - targetProperties[p] = docs.Properties[p] - } - return nil -} - -func LoadBundleDescriptions() (*Docs, error) { - embedded := Docs{} - err := json.Unmarshal(bundleDocs, &embedded) - return &embedded, err -} - -func UpdateBundleDescriptions(openapiSpecPath string) (*Docs, error) { - embedded, err := LoadBundleDescriptions() - if err != nil { - return nil, err - } - - // Generate schema from the embedded descriptions, and convert it back to docs. - // This creates empty descriptions for any properties that were missing in the - // embedded descriptions. - schema, err := New(reflect.TypeOf(config.Root{}), embedded) - if err != nil { - return nil, err - } - docs := schemaToDocs(schema) - - // Load the Databricks OpenAPI spec - openapiSpec, err := os.ReadFile(openapiSpecPath) - if err != nil { - return nil, err - } - spec := &Specification{} - err = json.Unmarshal(openapiSpec, spec) - if err != nil { - return nil, err - } - openapiReader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - - // Generate descriptions for the "resources" field - resourcesDocs, err := openapiReader.ResourcesDocs() - if err != nil { - return nil, err - } - resourceSchema, err := New(reflect.TypeOf(config.Resources{}), resourcesDocs) - if err != nil { - return nil, err - } - docs.Properties["resources"] = schemaToDocs(resourceSchema) - docs.refreshTargetsDocs() - return docs, nil -} - -// *Docs are a subset of *Schema, this function selects that subset -func schemaToDocs(jsonSchema *jsonschema.Schema) *Docs { - // terminate recursion if schema is nil - if jsonSchema == nil { - return nil - } - docs := &Docs{ - Description: jsonSchema.Description, - } - if len(jsonSchema.Properties) > 0 { - docs.Properties = make(map[string]*Docs) - } - for k, v := range jsonSchema.Properties { - docs.Properties[k] = schemaToDocs(v) - } - docs.Items = schemaToDocs(jsonSchema.Items) - if additionalProperties, ok := jsonSchema.AdditionalProperties.(*jsonschema.Schema); ok { - docs.AdditionalProperties = schemaToDocs(additionalProperties) - } - return docs -} diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json deleted file mode 100644 index f03a44802..000000000 --- a/bundle/schema/docs/bundle_descriptions.json +++ /dev/null @@ -1,6447 +0,0 @@ -{ - "description": "", - "properties": { - "artifacts": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "build": { - "description": "" - }, - "executable": { - "description": "" - }, - "files": { - "description": "", - "items": { - "description": "", - "properties": { - "source": { - "description": "" - } - } - } - }, - "path": { - "description": "" - }, - "type": { - "description": "" - } - } - } - }, - "bundle": { - "description": "", - "properties": { - "compute_id": { - "description": "" - }, - "databricks_cli_version": { - "description": "" - }, - "deployment": { - "description": "", - "properties": { - "fail_on_active_runs": { - "description": "" - }, - "lock": { - "description": "", - "properties": { - "enabled": { - "description": "" - }, - "force": { - "description": "" - } - } - } - } - }, - "git": { - "description": "", - "properties": { - "branch": { - "description": "" - }, - "origin_url": { - "description": "" - } - } - }, - "name": { - "description": "" - } - } - }, - "experimental": { - "description": "", - "properties": { - "pydabs": { - "description": "", - "properties": { - "enabled": { - "description": "" - }, - "import": { - "description": "", - "items": { - "description": "" - } - }, - "venv_path": { - "description": "" - } - } - }, - "python_wheel_wrapper": { - "description": "" - }, - "scripts": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "use_legacy_run_as": { - "description": "" - } - } - }, - "include": { - "description": "", - "items": { - "description": "" - } - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "presets": { - "description": "", - "properties": { - "jobs_max_concurrent_runs": { - "description": "" - }, - "name_prefix": { - "description": "" - }, - "pipelines_development": { - "description": "" - }, - "tags": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "trigger_pause_status": { - "description": "" - } - } - }, - "resources": { - "description": "Collection of Databricks resources to deploy.", - "properties": { - "experiments": { - "description": "List of MLflow experiments", - "additionalproperties": { - "description": "", - "properties": { - "artifact_location": { - "description": "Location where artifacts for the experiment are stored." - }, - "creation_time": { - "description": "Creation time" - }, - "experiment_id": { - "description": "Unique identifier for the experiment." - }, - "last_update_time": { - "description": "Last update time" - }, - "lifecycle_stage": { - "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs." - }, - "name": { - "description": "Human readable name that identifies the experiment." - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - } - } - } - }, - "jobs": { - "description": "List of Databricks jobs", - "additionalproperties": { - "description": "", - "properties": { - "continuous": { - "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", - "properties": { - "pause_status": { - "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." - } - } - }, - "deployment": { - "description": "Deployment information for jobs managed by external sources.", - "properties": { - "kind": { - "description": "The kind of deployment that manages the job.\n\n* `BUNDLE`: The job is managed by Databricks Asset Bundle." - }, - "metadata_file_path": { - "description": "Path of the file that contains deployment metadata." - } - } - }, - "description": { - "description": "An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding." - }, - "edit_mode": { - "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified." - }, - "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted.", - "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, - "on_duration_warning_threshold_exceeded": { - "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", - "items": { - "description": "" - } - }, - "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_start": { - "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_streaming_backlog_exceeded": { - "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", - "items": { - "description": "" - } - }, - "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - } - } - }, - "environments": { - "description": "A list of task execution environment specifications that can be referenced by tasks of this job.", - "items": { - "description": "", - "properties": { - "environment_key": { - "description": "The key of an environment. It has to be unique within a job." - }, - "spec": { - "description": "", - "properties": { - "client": { - "description": "Client version used by the environment\nThe client is the user-facing environment of the runtime.\nEach client comes with a specific set of pre-installed libraries.\nThe version is a string, consisting of the major client version." - }, - "dependencies": { - "description": "List of pip dependencies, as supported by the version of pip in this environment.\nEach dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/\nAllowed dependency could be \u003crequirement specifier\u003e, \u003carchive url/path\u003e, \u003clocal project path\u003e(WSFS or Volumes in Databricks), \u003cvcs project url\u003e\nE.g. dependencies: [\"foo==0.0.1\", \"-r /Workspace/test/requirements.txt\"]", - "items": { - "description": "" - } - } - } - } - } - } - }, - "format": { - "description": "Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `\"MULTI_TASK\"`." - }, - "git_source": { - "description": "An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.\n\nIf `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.\n\nNote: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.", - "properties": { - "git_branch": { - "description": "Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit." - }, - "git_commit": { - "description": "Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag." - }, - "git_provider": { - "description": "Unique identifier of the service used to host the Git repository. The value is case insensitive." - }, - "git_snapshot": { - "description": "", - "properties": { - "used_commit": { - "description": "Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to." - } - } - }, - "git_tag": { - "description": "Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit." - }, - "git_url": { - "description": "URL of the repository to be cloned by this job." - }, - "job_source": { - "description": "The source of the job specification in the remote repository when the job is source controlled.", - "properties": { - "dirty_state": { - "description": "Dirty state indicates the job is not fully synced with the job specification in the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced." - }, - "import_from_git_branch": { - "description": "Name of the branch which the job is imported from." - }, - "job_config_path": { - "description": "Path of the job YAML file that contains the job specification." - } - } - } - } - }, - "health": { - "description": "", - "properties": { - "rules": { - "description": "", - "items": { - "description": "", - "properties": { - "metric": { - "description": "" - }, - "op": { - "description": "" - }, - "value": { - "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule." - } - } - } - } - } - }, - "job_clusters": { - "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", - "items": { - "description": "", - "properties": { - "job_cluster_key": { - "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." - }, - "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", - "properties": { - "apply_policy_default_values": { - "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - } - } - }, - "autotermination_minutes": { - "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination." - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "cluster_name": { - "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n" - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "data_security_mode": { - "description": "" - }, - "docker_image": { - "description": "", - "properties": { - "basic_auth": { - "description": "", - "properties": { - "password": { - "description": "Password of the user" - }, - "username": { - "description": "Name of the user" - } - } - }, - "url": { - "description": "URL of the docker image." - } - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n" - }, - "enable_elastic_disk": { - "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable LUKS on cluster VMs' local disks" - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "runtime_engine": { - "description": "" - }, - "single_user_name": { - "description": "Single user name if data_security_mode is `SINGLE_USER`" - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "spark_version": { - "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.\n" - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - }, - "workload_type": { - "description": "", - "properties": { - "clients": { - "description": " defined what type of clients can use the cluster. E.g. Notebooks, Jobs", - "properties": { - "jobs": { - "description": "With jobs set, the cluster can be used for jobs" - }, - "notebooks": { - "description": "With notebooks set, this cluster can be used for notebooks" - } - } - } - } - } - } - } - } - } - }, - "max_concurrent_runs": { - "description": "An optional maximum allowed number of concurrent runs of the job.\nSet this value if you want to be able to execute multiple runs of the same job concurrently.\nThis is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.\nThis setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs.\nHowever, from then on, new runs are skipped unless there are fewer than 3 active runs.\nThis value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped." - }, - "name": { - "description": "An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding." - }, - "notification_settings": { - "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", - "properties": { - "no_alert_for_canceled_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." - }, - "no_alert_for_skipped_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." - } - } - }, - "parameters": { - "description": "Job-level parameter definitions", - "items": { - "description": "", - "properties": { - "default": { - "description": "Default value of the parameter." - }, - "name": { - "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`" - } - } - } - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "queue": { - "description": "The queue settings of the job.", - "properties": { - "enabled": { - "description": "If true, enable queueing for the job. This is a required field." - } - } - }, - "run_as": { - "description": "", - "properties": { - "service_principal_name": { - "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role." - }, - "user_name": { - "description": "The email of an active workspace user. Non-admin users can only set this field to their own email." - } - } - }, - "schedule": { - "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", - "properties": { - "pause_status": { - "description": "Indicate whether this schedule is paused or not." - }, - "quartz_cron_expression": { - "description": "A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required." - }, - "timezone_id": { - "description": "A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required." - } - } - }, - "tags": { - "description": "A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job.", - "additionalproperties": { - "description": "" - } - }, - "tasks": { - "description": "A list of task specifications to be executed by this job.", - "items": { - "description": "", - "properties": { - "condition_task": { - "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", - "properties": { - "left": { - "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference." - }, - "op": { - "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison." - }, - "right": { - "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference." - } - } - }, - "dbt_task": { - "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", - "properties": { - "catalog": { - "description": "Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks \u003e= 1.1.1." - }, - "commands": { - "description": "A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided.", - "items": { - "description": "" - } - }, - "profiles_directory": { - "description": "Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used." - }, - "project_directory": { - "description": "Path to the project directory. Optional for Git sourced tasks, in which\ncase if no value is provided, the root of the Git repository is used." - }, - "schema": { - "description": "Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used." - }, - "source": { - "description": "Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Project is located in Databricks workspace.\n* `GIT`: Project is located in cloud Git provider." - }, - "warehouse_id": { - "description": "ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument." - } - } - }, - "depends_on": { - "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.\nThe key is `task_key`, and the value is the name assigned to the dependent task.", - "items": { - "description": "", - "properties": { - "outcome": { - "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run." - }, - "task_key": { - "description": "The name of the task this task depends on." - } - } - } - }, - "description": { - "description": "An optional description for this task." - }, - "disable_auto_optimization": { - "description": "An option to disable auto optimization in serverless" - }, - "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", - "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, - "on_duration_warning_threshold_exceeded": { - "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", - "items": { - "description": "" - } - }, - "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_start": { - "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_streaming_backlog_exceeded": { - "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", - "items": { - "description": "" - } - }, - "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - } - } - }, - "environment_key": { - "description": "The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute." - }, - "existing_cluster_id": { - "description": "If existing_cluster_id, the ID of an existing cluster that is used for all runs.\nWhen running jobs or tasks on an existing cluster, you may need to manually restart\nthe cluster if it stops responding. We suggest running jobs and tasks on new clusters for\ngreater reliability" - }, - "for_each_task": { - "description": "" - }, - "health": { - "description": "", - "properties": { - "rules": { - "description": "", - "items": { - "description": "", - "properties": { - "metric": { - "description": "" - }, - "op": { - "description": "" - }, - "value": { - "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule." - } - } - } - } - } - }, - "job_cluster_key": { - "description": "If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`." - }, - "libraries": { - "description": "An optional list of libraries to be installed on the cluster.\nThe default value is an empty list.", - "items": { - "description": "", - "properties": { - "cran": { - "description": "Specification of a CRAN library to be installed as part of the library", - "properties": { - "package": { - "description": "The name of the CRAN package to install." - }, - "repo": { - "description": "The repository where the package can be found. If not specified, the default CRAN repo is used." - } - } - }, - "egg": { - "description": "Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above." - }, - "jar": { - "description": "URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"jar\": \"/Workspace/path/to/library.jar\" }`, `{ \"jar\" : \"/Volumes/path/to/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." - }, - "maven": { - "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", - "properties": { - "coordinates": { - "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." - }, - "exclusions": { - "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", - "items": { - "description": "" - } - }, - "repo": { - "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched." - } - } - }, - "pypi": { - "description": "Specification of a PyPi library to be installed. For example:\n`{ \"package\": \"simplejson\" }`", - "properties": { - "package": { - "description": "The name of the pypi package to install. An optional exact version specification is also\nsupported. Examples: \"simplejson\" and \"simplejson==3.8.0\"." - }, - "repo": { - "description": "The repository where the package can be found. If not specified, the default pip index is\nused." - } - } - }, - "requirements": { - "description": "URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported.\nFor example: `{ \"requirements\": \"/Workspace/path/to/requirements.txt\" }` or `{ \"requirements\" : \"/Volumes/path/to/requirements.txt\" }`" - }, - "whl": { - "description": "URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"whl\": \"/Workspace/path/to/library.whl\" }`, `{ \"whl\" : \"/Volumes/path/to/library.whl\" }` or\n`{ \"whl\": \"s3://my-bucket/library.whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." - } - } - } - }, - "max_retries": { - "description": "An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry." - }, - "min_retry_interval_millis": { - "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." - }, - "new_cluster": { - "description": "If new_cluster, a description of a new cluster that is created for each run.", - "properties": { - "apply_policy_default_values": { - "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - } - } - }, - "autotermination_minutes": { - "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination." - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "cluster_name": { - "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n" - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "data_security_mode": { - "description": "" - }, - "docker_image": { - "description": "", - "properties": { - "basic_auth": { - "description": "", - "properties": { - "password": { - "description": "Password of the user" - }, - "username": { - "description": "Name of the user" - } - } - }, - "url": { - "description": "URL of the docker image." - } - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n" - }, - "enable_elastic_disk": { - "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable LUKS on cluster VMs' local disks" - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "runtime_engine": { - "description": "" - }, - "single_user_name": { - "description": "Single user name if data_security_mode is `SINGLE_USER`" - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "spark_version": { - "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.\n" - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - }, - "workload_type": { - "description": "", - "properties": { - "clients": { - "description": " defined what type of clients can use the cluster. E.g. Notebooks, Jobs", - "properties": { - "jobs": { - "description": "With jobs set, the cluster can be used for jobs" - }, - "notebooks": { - "description": "With notebooks set, this cluster can be used for notebooks" - } - } - } - } - } - } - }, - "notebook_task": { - "description": "If notebook_task, indicates that this task must run a notebook. This field may not be specified in conjunction with spark_jar_task.", - "properties": { - "base_parameters": { - "description": "Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run\nNow with parameters specified, the two parameters maps are merged. If the same key is specified in\n`base_parameters` and in `run-now`, the value from `run-now` is used.\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nIf the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters,\nthe default value from the notebook is used.\n\nRetrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets).\n\nThe JSON representation of this field cannot exceed 1MB.", - "additionalproperties": { - "description": "" - } - }, - "notebook_path": { - "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required." - }, - "source": { - "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n* `WORKSPACE`: Notebook is located in Databricks workspace.\n* `GIT`: Notebook is located in cloud Git provider." - }, - "warehouse_id": { - "description": "Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses.\n\nNote that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail." - } - } - }, - "notification_settings": { - "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task.", - "properties": { - "alert_on_last_attempt": { - "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run." - }, - "no_alert_for_canceled_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." - }, - "no_alert_for_skipped_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." - } - } - }, - "pipeline_task": { - "description": "If pipeline_task, indicates that this task must execute a Pipeline.", - "properties": { - "full_refresh": { - "description": "If true, triggers a full refresh on the delta live table." - }, - "pipeline_id": { - "description": "The full name of the pipeline task to execute." - } - } - }, - "python_wheel_task": { - "description": "If python_wheel_task, indicates that this job must execute a PythonWheel.", - "properties": { - "entry_point": { - "description": "Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()`" - }, - "named_parameters": { - "description": "Command-line parameters passed to Python wheel task in the form of `[\"--name=task\", \"--data=dbfs:/path/to/data.json\"]`. Leave it empty if `parameters` is not null.", - "additionalproperties": { - "description": "" - } - }, - "package_name": { - "description": "Name of the package to execute" - }, - "parameters": { - "description": "Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null.", - "items": { - "description": "" - } - } - } - }, - "retry_on_timeout": { - "description": "An optional policy to specify whether to retry a job when it times out. The default behavior\nis to not retry on timeout." - }, - "run_if": { - "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed" - }, - "run_job_task": { - "description": "If run_job_task, indicates that this task must execute another job.", - "properties": { - "dbt_commands": { - "description": "An array of commands to execute for jobs with the dbt task, for example `\"dbt_commands\": [\"dbt deps\", \"dbt seed\", \"dbt deps\", \"dbt seed\", \"dbt run\"]`", - "items": { - "description": "" - } - }, - "jar_params": { - "description": "A list of parameters for jobs with Spark JAR tasks, for example `\"jar_params\": [\"john doe\", \"35\"]`.\nThe parameters are used to invoke the main function of the main class specified in the Spark JAR task.\nIf not specified upon `run-now`, it defaults to an empty list.\njar_params cannot be specified in conjunction with notebook_params.\nThe JSON representation of this field (for example `{\"jar_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes.\n\nUse [Task parameter variables](/jobs.html\\\"#parameter-variables\\\") to set parameters containing information about job runs.", - "items": { - "description": "" - } - }, - "job_id": { - "description": "ID of the job to trigger." - }, - "job_parameters": { - "description": "Job-level parameters used to trigger the job.", - "additionalproperties": { - "description": "" - } - }, - "notebook_params": { - "description": "A map from keys to values for jobs with notebook task, for example `\"notebook_params\": {\"name\": \"john doe\", \"age\": \"35\"}`.\nThe map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function.\n\nIf not specified upon `run-now`, the triggered run uses the job’s base parameters.\n\nnotebook_params cannot be specified in conjunction with jar_params.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nThe JSON representation of this field (for example `{\"notebook_params\":{\"name\":\"john doe\",\"age\":\"35\"}}`) cannot exceed 10,000 bytes.", - "additionalproperties": { - "description": "" - } - }, - "pipeline_params": { - "description": "", - "properties": { - "full_refresh": { - "description": "If true, triggers a full refresh on the delta live table." - } - } - }, - "python_named_params": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "python_params": { - "description": "A list of parameters for jobs with Python tasks, for example `\"python_params\": [\"john doe\", \"35\"]`.\nThe parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite\nthe parameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", - "items": { - "description": "" - } - }, - "spark_submit_params": { - "description": "A list of parameters for jobs with spark submit task, for example `\"spark_submit_params\": [\"--class\", \"org.apache.spark.examples.SparkPi\"]`.\nThe parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the\nparameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", - "items": { - "description": "" - } - }, - "sql_params": { - "description": "A map from keys to values for jobs with SQL task, for example `\"sql_params\": {\"name\": \"john doe\", \"age\": \"35\"}`. The SQL alert task does not support custom parameters.", - "additionalproperties": { - "description": "" - } - } - } - }, - "spark_jar_task": { - "description": "If spark_jar_task, indicates that this task must run a JAR.", - "properties": { - "jar_uri": { - "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create." - }, - "main_class_name": { - "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail." - }, - "parameters": { - "description": "Parameters passed to the main method.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - } - } - }, - "spark_python_task": { - "description": "If spark_python_task, indicates that this task must run a Python file.", - "properties": { - "parameters": { - "description": "Command line parameters passed to the Python file.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - }, - "python_file": { - "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." - }, - "source": { - "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local\nDatabricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository." - } - } - }, - "spark_submit_task": { - "description": "If `spark_submit_task`, indicates that this task must be launched by the spark submit script. This task can run only on new clusters.\n\nIn the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations.\n\n`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters.\n\nBy default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage.\n\nThe `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths.", - "properties": { - "parameters": { - "description": "Command-line parameters passed to spark submit.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - } - } - }, - "sql_task": { - "description": "If sql_task, indicates that this job must execute a SQL task.", - "properties": { - "alert": { - "description": "If alert, indicates that this job must refresh a SQL alert.", - "properties": { - "alert_id": { - "description": "The canonical identifier of the SQL alert." - }, - "pause_subscriptions": { - "description": "If true, the alert notifications are not sent to subscribers." - }, - "subscriptions": { - "description": "If specified, alert notifications are sent to subscribers.", - "items": { - "description": "", - "properties": { - "destination_id": { - "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications." - }, - "user_name": { - "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications." - } - } - } - } - } - }, - "dashboard": { - "description": "If dashboard, indicates that this job must refresh a SQL dashboard.", - "properties": { - "custom_subject": { - "description": "Subject of the email sent to subscribers of this task." - }, - "dashboard_id": { - "description": "The canonical identifier of the SQL dashboard." - }, - "pause_subscriptions": { - "description": "If true, the dashboard snapshot is not taken, and emails are not sent to subscribers." - }, - "subscriptions": { - "description": "If specified, dashboard snapshots are sent to subscriptions.", - "items": { - "description": "", - "properties": { - "destination_id": { - "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications." - }, - "user_name": { - "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications." - } - } - } - } - } - }, - "file": { - "description": "If file, indicates that this job runs a SQL file in a remote Git repository.", - "properties": { - "path": { - "description": "Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths." - }, - "source": { - "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider." - } - } - }, - "parameters": { - "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", - "additionalproperties": { - "description": "" - } - }, - "query": { - "description": "If query, indicates that this job must execute a SQL query.", - "properties": { - "query_id": { - "description": "The canonical identifier of the SQL query." - } - } - }, - "warehouse_id": { - "description": "The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs." - } - } - }, - "task_key": { - "description": "A unique name for the task. This field is used to refer to this task from other tasks.\nThis field is required and must be unique within its parent job.\nOn Update or Reset, this field is used to reference the tasks to be updated or reset." - }, - "timeout_seconds": { - "description": "An optional timeout applied to each run of this job task. A value of `0` means no timeout." - }, - "webhook_notifications": { - "description": "A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications.", - "properties": { - "on_duration_warning_threshold_exceeded": { - "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_failure": { - "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_start": { - "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_streaming_backlog_exceeded": { - "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_success": { - "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - } - } - } - } - } - }, - "timeout_seconds": { - "description": "An optional timeout applied to each run of this job. A value of `0` means no timeout." - }, - "trigger": { - "description": "A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", - "properties": { - "file_arrival": { - "description": "File arrival trigger settings.", - "properties": { - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds" - }, - "url": { - "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location." - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - }, - "pause_status": { - "description": "Whether this trigger is paused or not." - }, - "periodic": { - "description": "Periodic trigger settings.", - "properties": { - "interval": { - "description": "The interval at which the trigger should run." - }, - "unit": { - "description": "The unit of time for the interval." - } - } - }, - "table": { - "description": "Old table trigger settings name. Deprecated in favor of `table_update`.", - "properties": { - "condition": { - "description": "The table(s) condition based on which to trigger a job run." - }, - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds." - }, - "table_names": { - "description": "A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", - "items": { - "description": "" - } - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - }, - "table_update": { - "description": "", - "properties": { - "condition": { - "description": "The table(s) condition based on which to trigger a job run." - }, - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds." - }, - "table_names": { - "description": "A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", - "items": { - "description": "" - } - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - } - } - }, - "webhook_notifications": { - "description": "A collection of system notification IDs to notify when runs of this job begin or complete.", - "properties": { - "on_duration_warning_threshold_exceeded": { - "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_failure": { - "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_start": { - "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_streaming_backlog_exceeded": { - "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_success": { - "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - } - } - } - } - } - }, - "model_serving_endpoints": { - "description": "List of Model Serving Endpoints", - "additionalproperties": { - "description": "", - "properties": { - "config": { - "description": "The core config of the serving endpoint.", - "properties": { - "auto_capture_config": { - "description": "Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.", - "properties": { - "catalog_name": { - "description": "The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled." - }, - "enabled": { - "description": "Indicates whether the inference table is enabled." - }, - "schema_name": { - "description": "The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled." - }, - "table_name_prefix": { - "description": "The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled." - } - } - }, - "served_entities": { - "description": "A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities.", - "items": { - "description": "", - "properties": { - "entity_name": { - "description": "The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC),\nor a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of\n__catalog_name__.__schema_name__.__model_name__.\n" - }, - "entity_version": { - "description": "The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC." - }, - "environment_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity.\nNote: this is an experimental feature and subject to change. \nExample entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", - "additionalproperties": { - "description": "" - } - }, - "external_model": { - "description": "The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled)\ncan be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model,\nit cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.\nThe task type of all external models within an endpoint must be the same.\n", - "properties": { - "ai21labs_config": { - "description": "AI21Labs Config. Only required if the provider is 'ai21labs'.", - "properties": { - "ai21labs_api_key": { - "description": "The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`." - }, - "ai21labs_api_key_plaintext": { - "description": "An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`." - } - } - }, - "amazon_bedrock_config": { - "description": "Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'.", - "properties": { - "aws_access_key_id": { - "description": "The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`." - }, - "aws_access_key_id_plaintext": { - "description": "An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`." - }, - "aws_region": { - "description": "The AWS region to use. Bedrock has to be enabled there." - }, - "aws_secret_access_key": { - "description": "The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`." - }, - "aws_secret_access_key_plaintext": { - "description": "An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`." - }, - "bedrock_provider": { - "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon." - } - } - }, - "anthropic_config": { - "description": "Anthropic Config. Only required if the provider is 'anthropic'.", - "properties": { - "anthropic_api_key": { - "description": "The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`." - }, - "anthropic_api_key_plaintext": { - "description": "The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`." - } - } - }, - "cohere_config": { - "description": "Cohere Config. Only required if the provider is 'cohere'.", - "properties": { - "cohere_api_base": { - "description": "This is an optional field to provide a customized base URL for the Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n" - }, - "cohere_api_key": { - "description": "The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`." - }, - "cohere_api_key_plaintext": { - "description": "The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`." - } - } - }, - "databricks_model_serving_config": { - "description": "Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'.", - "properties": { - "databricks_api_token": { - "description": "The Databricks secret key reference for a Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model.\nIf you prefer to paste your API key directly, see `databricks_api_token_plaintext`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n" - }, - "databricks_api_token_plaintext": { - "description": "The Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `databricks_api_token`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n" - }, - "databricks_workspace_url": { - "description": "The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.\n" - } - } - }, - "google_cloud_vertex_ai_config": { - "description": "Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'.", - "properties": { - "private_key": { - "description": "The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`" - }, - "private_key_plaintext": { - "description": "The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`." - }, - "project_id": { - "description": "This is the Google Cloud project id that the service account is associated with." - }, - "region": { - "description": "This is the region for the Google Cloud Vertex AI Service. See [supported regions](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. Some models are only available in specific regions." - } - } - }, - "name": { - "description": "The name of the external model." - }, - "openai_config": { - "description": "OpenAI Config. Only required if the provider is 'openai'.", - "properties": { - "microsoft_entra_client_id": { - "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.\n" - }, - "microsoft_entra_client_secret": { - "description": "The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.\nIf you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n" - }, - "microsoft_entra_client_secret_plaintext": { - "description": "The client secret used for Microsoft Entra ID authentication provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n" - }, - "microsoft_entra_tenant_id": { - "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.\n" - }, - "openai_api_base": { - "description": "This is a field to provide a customized base URl for the OpenAI API.\nFor Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service\nprovided by Azure.\nFor other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used.\n" - }, - "openai_api_key": { - "description": "The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`." - }, - "openai_api_key_plaintext": { - "description": "The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`." - }, - "openai_api_type": { - "description": "This is an optional field to specify the type of OpenAI API to use.\nFor Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security\naccess validation protocol. For access token validation, use azure. For authentication using Azure Active\nDirectory (Azure AD) use, azuread.\n" - }, - "openai_api_version": { - "description": "This is an optional field to specify the OpenAI API version.\nFor Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to\nutilize, specified by a date.\n" - }, - "openai_deployment_name": { - "description": "This field is only required for Azure OpenAI and is the name of the deployment resource for the\nAzure OpenAI service.\n" - }, - "openai_organization": { - "description": "This is an optional field to specify the organization in OpenAI or Azure OpenAI.\n" - } - } - }, - "palm_config": { - "description": "PaLM Config. Only required if the provider is 'palm'.", - "properties": { - "palm_api_key": { - "description": "The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`." - }, - "palm_api_key_plaintext": { - "description": "The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`." - } - } - }, - "provider": { - "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n" - }, - "task": { - "description": "The task type of the external model." - } - } - }, - "instance_profile_arn": { - "description": "ARN of the instance profile that the served entity uses to access AWS resources." - }, - "max_provisioned_throughput": { - "description": "The maximum tokens per second that the endpoint can scale up to." - }, - "min_provisioned_throughput": { - "description": "The minimum tokens per second that the endpoint can scale down to." - }, - "name": { - "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores.\nIf not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other\nentities, it defaults to \u003centity-name\u003e-\u003centity-version\u003e.\n" - }, - "scale_to_zero_enabled": { - "description": "Whether the compute resources for the served entity should scale down to zero." - }, - "workload_size": { - "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.\n" - }, - "workload_type": { - "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n" - } - } - } - }, - "served_models": { - "description": "(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models.", - "items": { - "description": "", - "properties": { - "environment_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this model.\nNote: this is an experimental feature and subject to change. \nExample model environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", - "additionalproperties": { - "description": "" - } - }, - "instance_profile_arn": { - "description": "ARN of the instance profile that the served model will use to access AWS resources." - }, - "max_provisioned_throughput": { - "description": "The maximum tokens per second that the endpoint can scale up to." - }, - "min_provisioned_throughput": { - "description": "The minimum tokens per second that the endpoint can scale down to." - }, - "model_name": { - "description": "The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model,\nin the form of __catalog_name__.__schema_name__.__model_name__.\n" - }, - "model_version": { - "description": "The version of the model in Databricks Model Registry or Unity Catalog to be served." - }, - "name": { - "description": "The name of a served model. It must be unique across an endpoint. If not specified, this field will default to \u003cmodel-name\u003e-\u003cmodel-version\u003e.\nA served model name can consist of alphanumeric characters, dashes, and underscores.\n" - }, - "scale_to_zero_enabled": { - "description": "Whether the compute resources for the served model should scale down to zero." - }, - "workload_size": { - "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n" - }, - "workload_type": { - "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n" - } - } - } - }, - "traffic_config": { - "description": "The traffic config defining how invocations to the serving endpoint should be routed.", - "properties": { - "routes": { - "description": "The list of routes that define traffic to each served entity.", - "items": { - "description": "", - "properties": { - "served_model_name": { - "description": "The name of the served model this route configures traffic for." - }, - "traffic_percentage": { - "description": "The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive." - } - } - } - } - } - } - } - }, - "name": { - "description": "The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.\nAn endpoint name can consist of alphanumeric characters, dashes, and underscores.\n" - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "rate_limits": { - "description": "Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.", - "items": { - "description": "", - "properties": { - "calls": { - "description": "Used to specify how many calls are allowed for a key within the renewal_period." - }, - "key": { - "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified." - }, - "renewal_period": { - "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported." - } - } - } - }, - "route_optimized": { - "description": "Enable route optimization for the serving endpoint." - }, - "tags": { - "description": "Tags to be attached to the serving endpoint and automatically propagated to billing logs.", - "items": { - "description": "", - "properties": { - "key": { - "description": "Key field for a serving endpoint tag." - }, - "value": { - "description": "Optional value field for a serving endpoint tag." - } - } - } - } - } - } - }, - "models": { - "description": "List of MLflow models", - "additionalproperties": { - "description": "", - "properties": { - "creation_timestamp": { - "description": "Timestamp recorded when this `registered_model` was created." - }, - "description": { - "description": "Description of this `registered_model`." - }, - "last_updated_timestamp": { - "description": "Timestamp recorded when metadata for this `registered_model` was last updated." - }, - "latest_versions": { - "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", - "items": { - "description": "", - "properties": { - "creation_timestamp": { - "description": "Timestamp recorded when this `model_version` was created." - }, - "current_stage": { - "description": "Current stage for this `model_version`." - }, - "description": { - "description": "Description of this `model_version`." - }, - "last_updated_timestamp": { - "description": "Timestamp recorded when metadata for this `model_version` was last updated." - }, - "name": { - "description": "Unique name of the model" - }, - "run_id": { - "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server." - }, - "run_link": { - "description": "Run Link: Direct link to the run that generated this version" - }, - "source": { - "description": "URI indicating the location of the source model artifacts, used when creating `model_version`" - }, - "status": { - "description": "Current status of `model_version`" - }, - "status_message": { - "description": "Details on current `status`, if it is pending or failed." - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs for this `model_version`.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - }, - "user_id": { - "description": "User that created this `model_version`." - }, - "version": { - "description": "Model's version number." - } - } - } - }, - "name": { - "description": "Unique name for the model." - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - }, - "user_id": { - "description": "User that created this `registered_model`" - } - } - } - }, - "pipelines": { - "description": "List of DLT pipelines", - "additionalproperties": { - "description": "", - "properties": { - "catalog": { - "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog." - }, - "channel": { - "description": "DLT Release Channel that specifies which version to use." - }, - "clusters": { - "description": "Cluster settings for this pipeline deployment.", - "items": { - "description": "", - "properties": { - "apply_policy_default_values": { - "description": "Note: This field won't be persisted. Only API users will check this field." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers the cluster can scale down to when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - }, - "mode": { - "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n" - } - } - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver.\nNote that this field is optional; if unset, the driver node type will be set as the same value\nas `node_type_id` defined above." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable local disk encryption for the cluster." - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "label": { - "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nSee :method:clusters/create for more details.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - } - } - } - }, - "configuration": { - "description": "String-String configuration for this pipeline execution.", - "additionalproperties": { - "description": "" - } - }, - "continuous": { - "description": "Whether the pipeline is continuous or triggered. This replaces `trigger`." - }, - "deployment": { - "description": "Deployment type of this pipeline.", - "properties": { - "kind": { - "description": "The deployment method that manages the pipeline." - }, - "metadata_file_path": { - "description": "The path to the file containing metadata about the deployment." - } - } - }, - "development": { - "description": "Whether the pipeline is in Development mode. Defaults to false." - }, - "edition": { - "description": "Pipeline product edition." - }, - "filters": { - "description": "Filters on which Pipeline packages to include in the deployed graph.", - "properties": { - "exclude": { - "description": "Paths to exclude.", - "items": { - "description": "" - } - }, - "include": { - "description": "Paths to include.", - "items": { - "description": "" - } - } - } - }, - "gateway_definition": { - "description": "The definition of a gateway pipeline to support CDC.", - "properties": { - "connection_id": { - "description": "Immutable. The Unity Catalog connection this gateway pipeline uses to communicate with the source." - }, - "gateway_storage_catalog": { - "description": "Required, Immutable. The name of the catalog for the gateway pipeline's storage location." - }, - "gateway_storage_name": { - "description": "Optional. The Unity Catalog-compatible name for the gateway storage location.\nThis is the destination to use for the data that is extracted by the gateway.\nDelta Live Tables system will automatically create the storage location under the catalog and schema.\n" - }, - "gateway_storage_schema": { - "description": "Required, Immutable. The name of the schema for the gateway pipelines's storage location." - } - } - }, - "id": { - "description": "Unique identifier for this pipeline." - }, - "ingestion_definition": { - "description": "The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings.", - "properties": { - "connection_name": { - "description": "Immutable. The Unity Catalog connection this ingestion pipeline uses to communicate with the source. Specify either ingestion_gateway_id or connection_name." - }, - "ingestion_gateway_id": { - "description": "Immutable. Identifier for the ingestion gateway used by this ingestion pipeline to communicate with the source. Specify either ingestion_gateway_id or connection_name." - }, - "objects": { - "description": "Required. Settings specifying tables to replicate and the destination for the replicated tables.", - "items": { - "description": "", - "properties": { - "schema": { - "description": "Select tables from a specific source schema.", - "properties": { - "destination_catalog": { - "description": "Required. Destination catalog to store tables." - }, - "destination_schema": { - "description": "Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists." - }, - "source_catalog": { - "description": "The source catalog name. Might be optional depending on the type of source." - }, - "source_schema": { - "description": "Required. Schema name in the source database." - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - }, - "table": { - "description": "Select tables from a specific source table.", - "properties": { - "destination_catalog": { - "description": "Required. Destination catalog to store table." - }, - "destination_schema": { - "description": "Required. Destination schema to store table." - }, - "destination_table": { - "description": "Optional. Destination table name. The pipeline fails If a table with that name already exists. If not set, the source table name is used." - }, - "source_catalog": { - "description": "Source catalog name. Might be optional depending on the type of source." - }, - "source_schema": { - "description": "Schema name in the source database. Might be optional depending on the type of source." - }, - "source_table": { - "description": "Required. Table name in the source database." - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - } - } - } - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - }, - "libraries": { - "description": "Libraries or code needed by this deployment.", - "items": { - "description": "", - "properties": { - "file": { - "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", - "properties": { - "path": { - "description": "The absolute path of the file." - } - } - }, - "jar": { - "description": "URI of the jar to be installed. Currently only DBFS is supported.\n" - }, - "maven": { - "description": "Specification of a maven library to be installed.\n", - "properties": { - "coordinates": { - "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." - }, - "exclusions": { - "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", - "items": { - "description": "" - } - }, - "repo": { - "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched." - } - } - }, - "notebook": { - "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\n", - "properties": { - "path": { - "description": "The absolute path of the notebook." - } - } - }, - "whl": { - "description": "URI of the whl to be installed." - } - } - } - }, - "name": { - "description": "Friendly identifier for this pipeline." - }, - "notifications": { - "description": "List of notification settings for this pipeline.", - "items": { - "description": "", - "properties": { - "alerts": { - "description": "A list of alerts that trigger the sending of notifications to the configured\ndestinations. The supported alerts are:\n\n* `on-update-success`: A pipeline update completes successfully.\n* `on-update-failure`: Each time a pipeline update fails.\n* `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error.\n* `on-flow-failure`: A single data flow fails.\n", - "items": { - "description": "" - } - }, - "email_recipients": { - "description": "A list of email addresses notified when a configured alert is triggered.\n", - "items": { - "description": "" - } - } - } - } - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "photon": { - "description": "Whether Photon is enabled for this pipeline." - }, - "serverless": { - "description": "Whether serverless compute is enabled for this pipeline." - }, - "storage": { - "description": "DBFS root directory for storing checkpoints and tables." - }, - "target": { - "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`." - }, - "trigger": { - "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", - "properties": { - "cron": { - "description": "", - "properties": { - "quartz_cron_schedule": { - "description": "" - }, - "timezone_id": { - "description": "" - } - } - }, - "manual": { - "description": "" - } - } - } - } - } - }, - "quality_monitors": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "assets_dir": { - "description": "" - }, - "baseline_table_name": { - "description": "" - }, - "custom_metrics": { - "description": "", - "items": { - "description": "", - "properties": { - "definition": { - "description": "" - }, - "input_columns": { - "description": "", - "items": { - "description": "" - } - }, - "name": { - "description": "" - }, - "output_data_type": { - "description": "" - }, - "type": { - "description": "" - } - } - } - }, - "data_classification_config": { - "description": "", - "properties": { - "enabled": { - "description": "" - } - } - }, - "inference_log": { - "description": "", - "properties": { - "granularities": { - "description": "", - "items": { - "description": "" - } - }, - "label_col": { - "description": "" - }, - "model_id_col": { - "description": "" - }, - "prediction_col": { - "description": "" - }, - "prediction_proba_col": { - "description": "" - }, - "problem_type": { - "description": "" - }, - "timestamp_col": { - "description": "" - } - } - }, - "notifications": { - "description": "", - "properties": { - "on_failure": { - "description": "", - "properties": { - "email_addresses": { - "description": "", - "items": { - "description": "" - } - } - } - }, - "on_new_classification_tag_detected": { - "description": "", - "properties": { - "email_addresses": { - "description": "", - "items": { - "description": "" - } - } - } - } - } - }, - "output_schema_name": { - "description": "" - }, - "schedule": { - "description": "", - "properties": { - "pause_status": { - "description": "" - }, - "quartz_cron_expression": { - "description": "" - }, - "timezone_id": { - "description": "" - } - } - }, - "skip_builtin_dashboard": { - "description": "" - }, - "slicing_exprs": { - "description": "", - "items": { - "description": "" - } - }, - "snapshot": { - "description": "" - }, - "time_series": { - "description": "", - "properties": { - "granularities": { - "description": "", - "items": { - "description": "" - } - }, - "timestamp_col": { - "description": "" - } - } - }, - "warehouse_id": { - "description": "" - } - } - } - }, - "registered_models": { - "description": "List of Registered Models", - "additionalproperties": { - "description": "", - "properties": { - "catalog_name": { - "description": "The name of the catalog where the schema and the registered model reside" - }, - "comment": { - "description": "The comment attached to the registered model" - }, - "grants": { - "description": "", - "items": { - "description": "", - "properties": { - "principal": { - "description": "" - }, - "privileges": { - "description": "", - "items": { - "description": "" - } - } - } - } - }, - "name": { - "description": "The name of the registered model" - }, - "schema_name": { - "description": "The name of the schema where the registered model resides" - }, - "storage_location": { - "description": "The storage location on the cloud under which model version data files are stored" - } - } - } - }, - "schemas": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "catalog_name": { - "description": "" - }, - "comment": { - "description": "" - }, - "grants": { - "description": "", - "items": { - "description": "", - "properties": { - "principal": { - "description": "" - }, - "privileges": { - "description": "", - "items": { - "description": "" - } - } - } - } - }, - "name": { - "description": "" - }, - "properties": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "storage_root": { - "description": "" - } - } - } - } - } - }, - "run_as": { - "description": "", - "properties": { - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - }, - "sync": { - "description": "", - "properties": { - "exclude": { - "description": "", - "items": { - "description": "" - } - }, - "include": { - "description": "", - "items": { - "description": "" - } - }, - "paths": { - "description": "", - "items": { - "description": "" - } - } - } - }, - "targets": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "artifacts": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "build": { - "description": "" - }, - "executable": { - "description": "" - }, - "files": { - "description": "", - "items": { - "description": "", - "properties": { - "source": { - "description": "" - } - } - } - }, - "path": { - "description": "" - }, - "type": { - "description": "" - } - } - } - }, - "bundle": { - "description": "", - "properties": { - "compute_id": { - "description": "" - }, - "databricks_cli_version": { - "description": "" - }, - "deployment": { - "description": "", - "properties": { - "fail_on_active_runs": { - "description": "" - }, - "lock": { - "description": "", - "properties": { - "enabled": { - "description": "" - }, - "force": { - "description": "" - } - } - } - } - }, - "git": { - "description": "", - "properties": { - "branch": { - "description": "" - }, - "origin_url": { - "description": "" - } - } - }, - "name": { - "description": "" - } - } - }, - "compute_id": { - "description": "" - }, - "default": { - "description": "" - }, - "git": { - "description": "", - "properties": { - "branch": { - "description": "" - }, - "origin_url": { - "description": "" - } - } - }, - "mode": { - "description": "" - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "presets": { - "description": "", - "properties": { - "jobs_max_concurrent_runs": { - "description": "" - }, - "name_prefix": { - "description": "" - }, - "pipelines_development": { - "description": "" - }, - "tags": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "trigger_pause_status": { - "description": "" - } - } - }, - "resources": { - "description": "Collection of Databricks resources to deploy.", - "properties": { - "experiments": { - "description": "List of MLflow experiments", - "additionalproperties": { - "description": "", - "properties": { - "artifact_location": { - "description": "Location where artifacts for the experiment are stored." - }, - "creation_time": { - "description": "Creation time" - }, - "experiment_id": { - "description": "Unique identifier for the experiment." - }, - "last_update_time": { - "description": "Last update time" - }, - "lifecycle_stage": { - "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs." - }, - "name": { - "description": "Human readable name that identifies the experiment." - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - } - } - } - }, - "jobs": { - "description": "List of Databricks jobs", - "additionalproperties": { - "description": "", - "properties": { - "continuous": { - "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", - "properties": { - "pause_status": { - "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." - } - } - }, - "deployment": { - "description": "Deployment information for jobs managed by external sources.", - "properties": { - "kind": { - "description": "The kind of deployment that manages the job.\n\n* `BUNDLE`: The job is managed by Databricks Asset Bundle." - }, - "metadata_file_path": { - "description": "Path of the file that contains deployment metadata." - } - } - }, - "description": { - "description": "An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding." - }, - "edit_mode": { - "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified." - }, - "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted.", - "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, - "on_duration_warning_threshold_exceeded": { - "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", - "items": { - "description": "" - } - }, - "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_start": { - "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_streaming_backlog_exceeded": { - "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", - "items": { - "description": "" - } - }, - "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - } - } - }, - "environments": { - "description": "A list of task execution environment specifications that can be referenced by tasks of this job.", - "items": { - "description": "", - "properties": { - "environment_key": { - "description": "The key of an environment. It has to be unique within a job." - }, - "spec": { - "description": "", - "properties": { - "client": { - "description": "Client version used by the environment\nThe client is the user-facing environment of the runtime.\nEach client comes with a specific set of pre-installed libraries.\nThe version is a string, consisting of the major client version." - }, - "dependencies": { - "description": "List of pip dependencies, as supported by the version of pip in this environment.\nEach dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/\nAllowed dependency could be \u003crequirement specifier\u003e, \u003carchive url/path\u003e, \u003clocal project path\u003e(WSFS or Volumes in Databricks), \u003cvcs project url\u003e\nE.g. dependencies: [\"foo==0.0.1\", \"-r /Workspace/test/requirements.txt\"]", - "items": { - "description": "" - } - } - } - } - } - } - }, - "format": { - "description": "Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `\"MULTI_TASK\"`." - }, - "git_source": { - "description": "An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.\n\nIf `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.\n\nNote: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.", - "properties": { - "git_branch": { - "description": "Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit." - }, - "git_commit": { - "description": "Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag." - }, - "git_provider": { - "description": "Unique identifier of the service used to host the Git repository. The value is case insensitive." - }, - "git_snapshot": { - "description": "", - "properties": { - "used_commit": { - "description": "Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to." - } - } - }, - "git_tag": { - "description": "Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit." - }, - "git_url": { - "description": "URL of the repository to be cloned by this job." - }, - "job_source": { - "description": "The source of the job specification in the remote repository when the job is source controlled.", - "properties": { - "dirty_state": { - "description": "Dirty state indicates the job is not fully synced with the job specification in the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced." - }, - "import_from_git_branch": { - "description": "Name of the branch which the job is imported from." - }, - "job_config_path": { - "description": "Path of the job YAML file that contains the job specification." - } - } - } - } - }, - "health": { - "description": "", - "properties": { - "rules": { - "description": "", - "items": { - "description": "", - "properties": { - "metric": { - "description": "" - }, - "op": { - "description": "" - }, - "value": { - "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule." - } - } - } - } - } - }, - "job_clusters": { - "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", - "items": { - "description": "", - "properties": { - "job_cluster_key": { - "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." - }, - "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", - "properties": { - "apply_policy_default_values": { - "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - } - } - }, - "autotermination_minutes": { - "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination." - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "cluster_name": { - "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n" - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "data_security_mode": { - "description": "" - }, - "docker_image": { - "description": "", - "properties": { - "basic_auth": { - "description": "", - "properties": { - "password": { - "description": "Password of the user" - }, - "username": { - "description": "Name of the user" - } - } - }, - "url": { - "description": "URL of the docker image." - } - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n" - }, - "enable_elastic_disk": { - "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable LUKS on cluster VMs' local disks" - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "runtime_engine": { - "description": "" - }, - "single_user_name": { - "description": "Single user name if data_security_mode is `SINGLE_USER`" - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "spark_version": { - "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.\n" - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - }, - "workload_type": { - "description": "", - "properties": { - "clients": { - "description": " defined what type of clients can use the cluster. E.g. Notebooks, Jobs", - "properties": { - "jobs": { - "description": "With jobs set, the cluster can be used for jobs" - }, - "notebooks": { - "description": "With notebooks set, this cluster can be used for notebooks" - } - } - } - } - } - } - } - } - } - }, - "max_concurrent_runs": { - "description": "An optional maximum allowed number of concurrent runs of the job.\nSet this value if you want to be able to execute multiple runs of the same job concurrently.\nThis is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.\nThis setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs.\nHowever, from then on, new runs are skipped unless there are fewer than 3 active runs.\nThis value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped." - }, - "name": { - "description": "An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding." - }, - "notification_settings": { - "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", - "properties": { - "no_alert_for_canceled_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." - }, - "no_alert_for_skipped_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." - } - } - }, - "parameters": { - "description": "Job-level parameter definitions", - "items": { - "description": "", - "properties": { - "default": { - "description": "Default value of the parameter." - }, - "name": { - "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`" - } - } - } - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "queue": { - "description": "The queue settings of the job.", - "properties": { - "enabled": { - "description": "If true, enable queueing for the job. This is a required field." - } - } - }, - "run_as": { - "description": "", - "properties": { - "service_principal_name": { - "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role." - }, - "user_name": { - "description": "The email of an active workspace user. Non-admin users can only set this field to their own email." - } - } - }, - "schedule": { - "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", - "properties": { - "pause_status": { - "description": "Indicate whether this schedule is paused or not." - }, - "quartz_cron_expression": { - "description": "A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required." - }, - "timezone_id": { - "description": "A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required." - } - } - }, - "tags": { - "description": "A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job.", - "additionalproperties": { - "description": "" - } - }, - "tasks": { - "description": "A list of task specifications to be executed by this job.", - "items": { - "description": "", - "properties": { - "condition_task": { - "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", - "properties": { - "left": { - "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference." - }, - "op": { - "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison." - }, - "right": { - "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference." - } - } - }, - "dbt_task": { - "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", - "properties": { - "catalog": { - "description": "Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks \u003e= 1.1.1." - }, - "commands": { - "description": "A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided.", - "items": { - "description": "" - } - }, - "profiles_directory": { - "description": "Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used." - }, - "project_directory": { - "description": "Path to the project directory. Optional for Git sourced tasks, in which\ncase if no value is provided, the root of the Git repository is used." - }, - "schema": { - "description": "Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used." - }, - "source": { - "description": "Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Project is located in Databricks workspace.\n* `GIT`: Project is located in cloud Git provider." - }, - "warehouse_id": { - "description": "ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument." - } - } - }, - "depends_on": { - "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.\nThe key is `task_key`, and the value is the name assigned to the dependent task.", - "items": { - "description": "", - "properties": { - "outcome": { - "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run." - }, - "task_key": { - "description": "The name of the task this task depends on." - } - } - } - }, - "description": { - "description": "An optional description for this task." - }, - "disable_auto_optimization": { - "description": "An option to disable auto optimization in serverless" - }, - "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", - "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, - "on_duration_warning_threshold_exceeded": { - "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", - "items": { - "description": "" - } - }, - "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_start": { - "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - }, - "on_streaming_backlog_exceeded": { - "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", - "items": { - "description": "" - } - }, - "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", - "items": { - "description": "" - } - } - } - }, - "environment_key": { - "description": "The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute." - }, - "existing_cluster_id": { - "description": "If existing_cluster_id, the ID of an existing cluster that is used for all runs.\nWhen running jobs or tasks on an existing cluster, you may need to manually restart\nthe cluster if it stops responding. We suggest running jobs and tasks on new clusters for\ngreater reliability" - }, - "for_each_task": { - "description": "" - }, - "health": { - "description": "", - "properties": { - "rules": { - "description": "", - "items": { - "description": "", - "properties": { - "metric": { - "description": "" - }, - "op": { - "description": "" - }, - "value": { - "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule." - } - } - } - } - } - }, - "job_cluster_key": { - "description": "If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`." - }, - "libraries": { - "description": "An optional list of libraries to be installed on the cluster.\nThe default value is an empty list.", - "items": { - "description": "", - "properties": { - "cran": { - "description": "Specification of a CRAN library to be installed as part of the library", - "properties": { - "package": { - "description": "The name of the CRAN package to install." - }, - "repo": { - "description": "The repository where the package can be found. If not specified, the default CRAN repo is used." - } - } - }, - "egg": { - "description": "Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above." - }, - "jar": { - "description": "URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"jar\": \"/Workspace/path/to/library.jar\" }`, `{ \"jar\" : \"/Volumes/path/to/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." - }, - "maven": { - "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", - "properties": { - "coordinates": { - "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." - }, - "exclusions": { - "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", - "items": { - "description": "" - } - }, - "repo": { - "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched." - } - } - }, - "pypi": { - "description": "Specification of a PyPi library to be installed. For example:\n`{ \"package\": \"simplejson\" }`", - "properties": { - "package": { - "description": "The name of the pypi package to install. An optional exact version specification is also\nsupported. Examples: \"simplejson\" and \"simplejson==3.8.0\"." - }, - "repo": { - "description": "The repository where the package can be found. If not specified, the default pip index is\nused." - } - } - }, - "requirements": { - "description": "URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported.\nFor example: `{ \"requirements\": \"/Workspace/path/to/requirements.txt\" }` or `{ \"requirements\" : \"/Volumes/path/to/requirements.txt\" }`" - }, - "whl": { - "description": "URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"whl\": \"/Workspace/path/to/library.whl\" }`, `{ \"whl\" : \"/Volumes/path/to/library.whl\" }` or\n`{ \"whl\": \"s3://my-bucket/library.whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." - } - } - } - }, - "max_retries": { - "description": "An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry." - }, - "min_retry_interval_millis": { - "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." - }, - "new_cluster": { - "description": "If new_cluster, a description of a new cluster that is created for each run.", - "properties": { - "apply_policy_default_values": { - "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - } - } - }, - "autotermination_minutes": { - "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination." - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "cluster_name": { - "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n" - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "data_security_mode": { - "description": "" - }, - "docker_image": { - "description": "", - "properties": { - "basic_auth": { - "description": "", - "properties": { - "password": { - "description": "Password of the user" - }, - "username": { - "description": "Name of the user" - } - } - }, - "url": { - "description": "URL of the docker image." - } - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n" - }, - "enable_elastic_disk": { - "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable LUKS on cluster VMs' local disks" - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "runtime_engine": { - "description": "" - }, - "single_user_name": { - "description": "Single user name if data_security_mode is `SINGLE_USER`" - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "spark_version": { - "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.\n" - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - }, - "workload_type": { - "description": "", - "properties": { - "clients": { - "description": " defined what type of clients can use the cluster. E.g. Notebooks, Jobs", - "properties": { - "jobs": { - "description": "With jobs set, the cluster can be used for jobs" - }, - "notebooks": { - "description": "With notebooks set, this cluster can be used for notebooks" - } - } - } - } - } - } - }, - "notebook_task": { - "description": "If notebook_task, indicates that this task must run a notebook. This field may not be specified in conjunction with spark_jar_task.", - "properties": { - "base_parameters": { - "description": "Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run\nNow with parameters specified, the two parameters maps are merged. If the same key is specified in\n`base_parameters` and in `run-now`, the value from `run-now` is used.\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nIf the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters,\nthe default value from the notebook is used.\n\nRetrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets).\n\nThe JSON representation of this field cannot exceed 1MB.", - "additionalproperties": { - "description": "" - } - }, - "notebook_path": { - "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required." - }, - "source": { - "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n* `WORKSPACE`: Notebook is located in Databricks workspace.\n* `GIT`: Notebook is located in cloud Git provider." - }, - "warehouse_id": { - "description": "Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses.\n\nNote that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail." - } - } - }, - "notification_settings": { - "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task.", - "properties": { - "alert_on_last_attempt": { - "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run." - }, - "no_alert_for_canceled_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." - }, - "no_alert_for_skipped_runs": { - "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." - } - } - }, - "pipeline_task": { - "description": "If pipeline_task, indicates that this task must execute a Pipeline.", - "properties": { - "full_refresh": { - "description": "If true, triggers a full refresh on the delta live table." - }, - "pipeline_id": { - "description": "The full name of the pipeline task to execute." - } - } - }, - "python_wheel_task": { - "description": "If python_wheel_task, indicates that this job must execute a PythonWheel.", - "properties": { - "entry_point": { - "description": "Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()`" - }, - "named_parameters": { - "description": "Command-line parameters passed to Python wheel task in the form of `[\"--name=task\", \"--data=dbfs:/path/to/data.json\"]`. Leave it empty if `parameters` is not null.", - "additionalproperties": { - "description": "" - } - }, - "package_name": { - "description": "Name of the package to execute" - }, - "parameters": { - "description": "Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null.", - "items": { - "description": "" - } - } - } - }, - "retry_on_timeout": { - "description": "An optional policy to specify whether to retry a job when it times out. The default behavior\nis to not retry on timeout." - }, - "run_if": { - "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed" - }, - "run_job_task": { - "description": "If run_job_task, indicates that this task must execute another job.", - "properties": { - "dbt_commands": { - "description": "An array of commands to execute for jobs with the dbt task, for example `\"dbt_commands\": [\"dbt deps\", \"dbt seed\", \"dbt deps\", \"dbt seed\", \"dbt run\"]`", - "items": { - "description": "" - } - }, - "jar_params": { - "description": "A list of parameters for jobs with Spark JAR tasks, for example `\"jar_params\": [\"john doe\", \"35\"]`.\nThe parameters are used to invoke the main function of the main class specified in the Spark JAR task.\nIf not specified upon `run-now`, it defaults to an empty list.\njar_params cannot be specified in conjunction with notebook_params.\nThe JSON representation of this field (for example `{\"jar_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes.\n\nUse [Task parameter variables](/jobs.html\\\"#parameter-variables\\\") to set parameters containing information about job runs.", - "items": { - "description": "" - } - }, - "job_id": { - "description": "ID of the job to trigger." - }, - "job_parameters": { - "description": "Job-level parameters used to trigger the job.", - "additionalproperties": { - "description": "" - } - }, - "notebook_params": { - "description": "A map from keys to values for jobs with notebook task, for example `\"notebook_params\": {\"name\": \"john doe\", \"age\": \"35\"}`.\nThe map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function.\n\nIf not specified upon `run-now`, the triggered run uses the job’s base parameters.\n\nnotebook_params cannot be specified in conjunction with jar_params.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nThe JSON representation of this field (for example `{\"notebook_params\":{\"name\":\"john doe\",\"age\":\"35\"}}`) cannot exceed 10,000 bytes.", - "additionalproperties": { - "description": "" - } - }, - "pipeline_params": { - "description": "", - "properties": { - "full_refresh": { - "description": "If true, triggers a full refresh on the delta live table." - } - } - }, - "python_named_params": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "python_params": { - "description": "A list of parameters for jobs with Python tasks, for example `\"python_params\": [\"john doe\", \"35\"]`.\nThe parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite\nthe parameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", - "items": { - "description": "" - } - }, - "spark_submit_params": { - "description": "A list of parameters for jobs with spark submit task, for example `\"spark_submit_params\": [\"--class\", \"org.apache.spark.examples.SparkPi\"]`.\nThe parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the\nparameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", - "items": { - "description": "" - } - }, - "sql_params": { - "description": "A map from keys to values for jobs with SQL task, for example `\"sql_params\": {\"name\": \"john doe\", \"age\": \"35\"}`. The SQL alert task does not support custom parameters.", - "additionalproperties": { - "description": "" - } - } - } - }, - "spark_jar_task": { - "description": "If spark_jar_task, indicates that this task must run a JAR.", - "properties": { - "jar_uri": { - "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create." - }, - "main_class_name": { - "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail." - }, - "parameters": { - "description": "Parameters passed to the main method.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - } - } - }, - "spark_python_task": { - "description": "If spark_python_task, indicates that this task must run a Python file.", - "properties": { - "parameters": { - "description": "Command line parameters passed to the Python file.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - }, - "python_file": { - "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." - }, - "source": { - "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local\nDatabricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository." - } - } - }, - "spark_submit_task": { - "description": "If `spark_submit_task`, indicates that this task must be launched by the spark submit script. This task can run only on new clusters.\n\nIn the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations.\n\n`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters.\n\nBy default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage.\n\nThe `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths.", - "properties": { - "parameters": { - "description": "Command-line parameters passed to spark submit.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", - "items": { - "description": "" - } - } - } - }, - "sql_task": { - "description": "If sql_task, indicates that this job must execute a SQL task.", - "properties": { - "alert": { - "description": "If alert, indicates that this job must refresh a SQL alert.", - "properties": { - "alert_id": { - "description": "The canonical identifier of the SQL alert." - }, - "pause_subscriptions": { - "description": "If true, the alert notifications are not sent to subscribers." - }, - "subscriptions": { - "description": "If specified, alert notifications are sent to subscribers.", - "items": { - "description": "", - "properties": { - "destination_id": { - "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications." - }, - "user_name": { - "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications." - } - } - } - } - } - }, - "dashboard": { - "description": "If dashboard, indicates that this job must refresh a SQL dashboard.", - "properties": { - "custom_subject": { - "description": "Subject of the email sent to subscribers of this task." - }, - "dashboard_id": { - "description": "The canonical identifier of the SQL dashboard." - }, - "pause_subscriptions": { - "description": "If true, the dashboard snapshot is not taken, and emails are not sent to subscribers." - }, - "subscriptions": { - "description": "If specified, dashboard snapshots are sent to subscriptions.", - "items": { - "description": "", - "properties": { - "destination_id": { - "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications." - }, - "user_name": { - "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications." - } - } - } - } - } - }, - "file": { - "description": "If file, indicates that this job runs a SQL file in a remote Git repository.", - "properties": { - "path": { - "description": "Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths." - }, - "source": { - "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider." - } - } - }, - "parameters": { - "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", - "additionalproperties": { - "description": "" - } - }, - "query": { - "description": "If query, indicates that this job must execute a SQL query.", - "properties": { - "query_id": { - "description": "The canonical identifier of the SQL query." - } - } - }, - "warehouse_id": { - "description": "The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs." - } - } - }, - "task_key": { - "description": "A unique name for the task. This field is used to refer to this task from other tasks.\nThis field is required and must be unique within its parent job.\nOn Update or Reset, this field is used to reference the tasks to be updated or reset." - }, - "timeout_seconds": { - "description": "An optional timeout applied to each run of this job task. A value of `0` means no timeout." - }, - "webhook_notifications": { - "description": "A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications.", - "properties": { - "on_duration_warning_threshold_exceeded": { - "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_failure": { - "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_start": { - "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_streaming_backlog_exceeded": { - "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_success": { - "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - } - } - } - } - } - }, - "timeout_seconds": { - "description": "An optional timeout applied to each run of this job. A value of `0` means no timeout." - }, - "trigger": { - "description": "A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", - "properties": { - "file_arrival": { - "description": "File arrival trigger settings.", - "properties": { - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds" - }, - "url": { - "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location." - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - }, - "pause_status": { - "description": "Whether this trigger is paused or not." - }, - "periodic": { - "description": "Periodic trigger settings.", - "properties": { - "interval": { - "description": "The interval at which the trigger should run." - }, - "unit": { - "description": "The unit of time for the interval." - } - } - }, - "table": { - "description": "Old table trigger settings name. Deprecated in favor of `table_update`.", - "properties": { - "condition": { - "description": "The table(s) condition based on which to trigger a job run." - }, - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds." - }, - "table_names": { - "description": "A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", - "items": { - "description": "" - } - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - }, - "table_update": { - "description": "", - "properties": { - "condition": { - "description": "The table(s) condition based on which to trigger a job run." - }, - "min_time_between_triggers_seconds": { - "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds." - }, - "table_names": { - "description": "A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", - "items": { - "description": "" - } - }, - "wait_after_last_change_seconds": { - "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds." - } - } - } - } - }, - "webhook_notifications": { - "description": "A collection of system notification IDs to notify when runs of this job begin or complete.", - "properties": { - "on_duration_warning_threshold_exceeded": { - "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_failure": { - "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_start": { - "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_streaming_backlog_exceeded": { - "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - }, - "on_success": { - "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", - "items": { - "description": "", - "properties": { - "id": { - "description": "" - } - } - } - } - } - } - } - } - }, - "model_serving_endpoints": { - "description": "List of Model Serving Endpoints", - "additionalproperties": { - "description": "", - "properties": { - "config": { - "description": "The core config of the serving endpoint.", - "properties": { - "auto_capture_config": { - "description": "Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.", - "properties": { - "catalog_name": { - "description": "The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled." - }, - "enabled": { - "description": "Indicates whether the inference table is enabled." - }, - "schema_name": { - "description": "The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled." - }, - "table_name_prefix": { - "description": "The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled." - } - } - }, - "served_entities": { - "description": "A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities.", - "items": { - "description": "", - "properties": { - "entity_name": { - "description": "The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC),\nor a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of\n__catalog_name__.__schema_name__.__model_name__.\n" - }, - "entity_version": { - "description": "The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC." - }, - "environment_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity.\nNote: this is an experimental feature and subject to change. \nExample entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", - "additionalproperties": { - "description": "" - } - }, - "external_model": { - "description": "The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled)\ncan be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model,\nit cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.\nThe task type of all external models within an endpoint must be the same.\n", - "properties": { - "ai21labs_config": { - "description": "AI21Labs Config. Only required if the provider is 'ai21labs'.", - "properties": { - "ai21labs_api_key": { - "description": "The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`." - }, - "ai21labs_api_key_plaintext": { - "description": "An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`." - } - } - }, - "amazon_bedrock_config": { - "description": "Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'.", - "properties": { - "aws_access_key_id": { - "description": "The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`." - }, - "aws_access_key_id_plaintext": { - "description": "An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`." - }, - "aws_region": { - "description": "The AWS region to use. Bedrock has to be enabled there." - }, - "aws_secret_access_key": { - "description": "The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`." - }, - "aws_secret_access_key_plaintext": { - "description": "An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`." - }, - "bedrock_provider": { - "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon." - } - } - }, - "anthropic_config": { - "description": "Anthropic Config. Only required if the provider is 'anthropic'.", - "properties": { - "anthropic_api_key": { - "description": "The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`." - }, - "anthropic_api_key_plaintext": { - "description": "The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`." - } - } - }, - "cohere_config": { - "description": "Cohere Config. Only required if the provider is 'cohere'.", - "properties": { - "cohere_api_base": { - "description": "This is an optional field to provide a customized base URL for the Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n" - }, - "cohere_api_key": { - "description": "The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`." - }, - "cohere_api_key_plaintext": { - "description": "The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`." - } - } - }, - "databricks_model_serving_config": { - "description": "Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'.", - "properties": { - "databricks_api_token": { - "description": "The Databricks secret key reference for a Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model.\nIf you prefer to paste your API key directly, see `databricks_api_token_plaintext`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n" - }, - "databricks_api_token_plaintext": { - "description": "The Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `databricks_api_token`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n" - }, - "databricks_workspace_url": { - "description": "The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.\n" - } - } - }, - "google_cloud_vertex_ai_config": { - "description": "Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'.", - "properties": { - "private_key": { - "description": "The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`" - }, - "private_key_plaintext": { - "description": "The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`." - }, - "project_id": { - "description": "This is the Google Cloud project id that the service account is associated with." - }, - "region": { - "description": "This is the region for the Google Cloud Vertex AI Service. See [supported regions](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. Some models are only available in specific regions." - } - } - }, - "name": { - "description": "The name of the external model." - }, - "openai_config": { - "description": "OpenAI Config. Only required if the provider is 'openai'.", - "properties": { - "microsoft_entra_client_id": { - "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.\n" - }, - "microsoft_entra_client_secret": { - "description": "The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.\nIf you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n" - }, - "microsoft_entra_client_secret_plaintext": { - "description": "The client secret used for Microsoft Entra ID authentication provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n" - }, - "microsoft_entra_tenant_id": { - "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.\n" - }, - "openai_api_base": { - "description": "This is a field to provide a customized base URl for the OpenAI API.\nFor Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service\nprovided by Azure.\nFor other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used.\n" - }, - "openai_api_key": { - "description": "The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`." - }, - "openai_api_key_plaintext": { - "description": "The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`." - }, - "openai_api_type": { - "description": "This is an optional field to specify the type of OpenAI API to use.\nFor Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security\naccess validation protocol. For access token validation, use azure. For authentication using Azure Active\nDirectory (Azure AD) use, azuread.\n" - }, - "openai_api_version": { - "description": "This is an optional field to specify the OpenAI API version.\nFor Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to\nutilize, specified by a date.\n" - }, - "openai_deployment_name": { - "description": "This field is only required for Azure OpenAI and is the name of the deployment resource for the\nAzure OpenAI service.\n" - }, - "openai_organization": { - "description": "This is an optional field to specify the organization in OpenAI or Azure OpenAI.\n" - } - } - }, - "palm_config": { - "description": "PaLM Config. Only required if the provider is 'palm'.", - "properties": { - "palm_api_key": { - "description": "The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`." - }, - "palm_api_key_plaintext": { - "description": "The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`." - } - } - }, - "provider": { - "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n" - }, - "task": { - "description": "The task type of the external model." - } - } - }, - "instance_profile_arn": { - "description": "ARN of the instance profile that the served entity uses to access AWS resources." - }, - "max_provisioned_throughput": { - "description": "The maximum tokens per second that the endpoint can scale up to." - }, - "min_provisioned_throughput": { - "description": "The minimum tokens per second that the endpoint can scale down to." - }, - "name": { - "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores.\nIf not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other\nentities, it defaults to \u003centity-name\u003e-\u003centity-version\u003e.\n" - }, - "scale_to_zero_enabled": { - "description": "Whether the compute resources for the served entity should scale down to zero." - }, - "workload_size": { - "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.\n" - }, - "workload_type": { - "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n" - } - } - } - }, - "served_models": { - "description": "(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models.", - "items": { - "description": "", - "properties": { - "environment_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this model.\nNote: this is an experimental feature and subject to change. \nExample model environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", - "additionalproperties": { - "description": "" - } - }, - "instance_profile_arn": { - "description": "ARN of the instance profile that the served model will use to access AWS resources." - }, - "max_provisioned_throughput": { - "description": "The maximum tokens per second that the endpoint can scale up to." - }, - "min_provisioned_throughput": { - "description": "The minimum tokens per second that the endpoint can scale down to." - }, - "model_name": { - "description": "The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model,\nin the form of __catalog_name__.__schema_name__.__model_name__.\n" - }, - "model_version": { - "description": "The version of the model in Databricks Model Registry or Unity Catalog to be served." - }, - "name": { - "description": "The name of a served model. It must be unique across an endpoint. If not specified, this field will default to \u003cmodel-name\u003e-\u003cmodel-version\u003e.\nA served model name can consist of alphanumeric characters, dashes, and underscores.\n" - }, - "scale_to_zero_enabled": { - "description": "Whether the compute resources for the served model should scale down to zero." - }, - "workload_size": { - "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n" - }, - "workload_type": { - "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n" - } - } - } - }, - "traffic_config": { - "description": "The traffic config defining how invocations to the serving endpoint should be routed.", - "properties": { - "routes": { - "description": "The list of routes that define traffic to each served entity.", - "items": { - "description": "", - "properties": { - "served_model_name": { - "description": "The name of the served model this route configures traffic for." - }, - "traffic_percentage": { - "description": "The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive." - } - } - } - } - } - } - } - }, - "name": { - "description": "The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.\nAn endpoint name can consist of alphanumeric characters, dashes, and underscores.\n" - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "rate_limits": { - "description": "Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.", - "items": { - "description": "", - "properties": { - "calls": { - "description": "Used to specify how many calls are allowed for a key within the renewal_period." - }, - "key": { - "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified." - }, - "renewal_period": { - "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported." - } - } - } - }, - "route_optimized": { - "description": "Enable route optimization for the serving endpoint." - }, - "tags": { - "description": "Tags to be attached to the serving endpoint and automatically propagated to billing logs.", - "items": { - "description": "", - "properties": { - "key": { - "description": "Key field for a serving endpoint tag." - }, - "value": { - "description": "Optional value field for a serving endpoint tag." - } - } - } - } - } - } - }, - "models": { - "description": "List of MLflow models", - "additionalproperties": { - "description": "", - "properties": { - "creation_timestamp": { - "description": "Timestamp recorded when this `registered_model` was created." - }, - "description": { - "description": "Description of this `registered_model`." - }, - "last_updated_timestamp": { - "description": "Timestamp recorded when metadata for this `registered_model` was last updated." - }, - "latest_versions": { - "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", - "items": { - "description": "", - "properties": { - "creation_timestamp": { - "description": "Timestamp recorded when this `model_version` was created." - }, - "current_stage": { - "description": "Current stage for this `model_version`." - }, - "description": { - "description": "Description of this `model_version`." - }, - "last_updated_timestamp": { - "description": "Timestamp recorded when metadata for this `model_version` was last updated." - }, - "name": { - "description": "Unique name of the model" - }, - "run_id": { - "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server." - }, - "run_link": { - "description": "Run Link: Direct link to the run that generated this version" - }, - "source": { - "description": "URI indicating the location of the source model artifacts, used when creating `model_version`" - }, - "status": { - "description": "Current status of `model_version`" - }, - "status_message": { - "description": "Details on current `status`, if it is pending or failed." - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs for this `model_version`.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - }, - "user_id": { - "description": "User that created this `model_version`." - }, - "version": { - "description": "Model's version number." - } - } - } - }, - "name": { - "description": "Unique name for the model." - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "tags": { - "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", - "items": { - "description": "", - "properties": { - "key": { - "description": "The tag key." - }, - "value": { - "description": "The tag value." - } - } - } - }, - "user_id": { - "description": "User that created this `registered_model`" - } - } - } - }, - "pipelines": { - "description": "List of DLT pipelines", - "additionalproperties": { - "description": "", - "properties": { - "catalog": { - "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog." - }, - "channel": { - "description": "DLT Release Channel that specifies which version to use." - }, - "clusters": { - "description": "Cluster settings for this pipeline deployment.", - "items": { - "description": "", - "properties": { - "apply_policy_default_values": { - "description": "Note: This field won't be persisted. Only API users will check this field." - }, - "autoscale": { - "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", - "properties": { - "max_workers": { - "description": "The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`." - }, - "min_workers": { - "description": "The minimum number of workers the cluster can scale down to when underutilized.\nIt is also the initial number of workers the cluster will have after creation." - }, - "mode": { - "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n" - } - } - }, - "aws_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "ebs_volume_count": { - "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden." - }, - "ebs_volume_iops": { - "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_size": { - "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096." - }, - "ebs_volume_throughput": { - "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used." - }, - "ebs_volume_type": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "instance_profile_arn": { - "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists." - }, - "spot_bid_price_percent": { - "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent." - }, - "zone_id": { - "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method." - } - } - }, - "azure_attributes": { - "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "first_on_demand": { - "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster." - }, - "log_analytics_info": { - "description": "Defines values necessary to configure and run Azure Log Analytics agent", - "properties": { - "log_analytics_primary_key": { - "description": "\u003cneeds content added\u003e" - }, - "log_analytics_workspace_id": { - "description": "\u003cneeds content added\u003e" - } - } - }, - "spot_bid_max_price": { - "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1." - } - } - }, - "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", - "properties": { - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - } - } - }, - "custom_tags": { - "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", - "additionalproperties": { - "description": "" - } - }, - "driver_instance_pool_id": { - "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." - }, - "driver_node_type_id": { - "description": "The node type of the Spark driver.\nNote that this field is optional; if unset, the driver node type will be set as the same value\nas `node_type_id` defined above." - }, - "enable_local_disk_encryption": { - "description": "Whether to enable local disk encryption for the cluster." - }, - "gcp_attributes": { - "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", - "properties": { - "availability": { - "description": "" - }, - "boot_disk_size": { - "description": "boot disk size in GB" - }, - "google_service_account": { - "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." - }, - "local_ssd_count": { - "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." - }, - "use_preemptible_executors": { - "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead." - }, - "zone_id": { - "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones." - } - } - }, - "init_scripts": { - "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", - "items": { - "description": "", - "properties": { - "abfss": { - "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", - "properties": { - "destination": { - "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`." - } - } - }, - "dbfs": { - "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", - "properties": { - "destination": { - "description": "dbfs destination, e.g. `dbfs:/my/path`" - } - } - }, - "file": { - "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", - "properties": { - "destination": { - "description": "local file destination, e.g. `file:/my/local/file.sh`" - } - } - }, - "gcs": { - "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", - "properties": { - "destination": { - "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`" - } - } - }, - "s3": { - "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", - "properties": { - "canned_acl": { - "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." - }, - "destination": { - "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." - }, - "enable_encryption": { - "description": "(Optional) Flag to enable server side encryption, `false` by default." - }, - "encryption_type": { - "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." - }, - "endpoint": { - "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." - }, - "kms_key": { - "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." - }, - "region": { - "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." - } - } - }, - "volumes": { - "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`" - } - } - }, - "workspace": { - "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", - "properties": { - "destination": { - "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" - } - } - } - } - } - }, - "instance_pool_id": { - "description": "The optional ID of the instance pool to which the cluster belongs." - }, - "label": { - "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`." - }, - "node_type_id": { - "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" - }, - "num_workers": { - "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned." - }, - "policy_id": { - "description": "The ID of the cluster policy used to create the cluster if applicable." - }, - "spark_conf": { - "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nSee :method:clusters/create for more details.\n", - "additionalproperties": { - "description": "" - } - }, - "spark_env_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", - "additionalproperties": { - "description": "" - } - }, - "ssh_public_keys": { - "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", - "items": { - "description": "" - } - } - } - } - }, - "configuration": { - "description": "String-String configuration for this pipeline execution.", - "additionalproperties": { - "description": "" - } - }, - "continuous": { - "description": "Whether the pipeline is continuous or triggered. This replaces `trigger`." - }, - "deployment": { - "description": "Deployment type of this pipeline.", - "properties": { - "kind": { - "description": "The deployment method that manages the pipeline." - }, - "metadata_file_path": { - "description": "The path to the file containing metadata about the deployment." - } - } - }, - "development": { - "description": "Whether the pipeline is in Development mode. Defaults to false." - }, - "edition": { - "description": "Pipeline product edition." - }, - "filters": { - "description": "Filters on which Pipeline packages to include in the deployed graph.", - "properties": { - "exclude": { - "description": "Paths to exclude.", - "items": { - "description": "" - } - }, - "include": { - "description": "Paths to include.", - "items": { - "description": "" - } - } - } - }, - "gateway_definition": { - "description": "The definition of a gateway pipeline to support CDC.", - "properties": { - "connection_id": { - "description": "Immutable. The Unity Catalog connection this gateway pipeline uses to communicate with the source." - }, - "gateway_storage_catalog": { - "description": "Required, Immutable. The name of the catalog for the gateway pipeline's storage location." - }, - "gateway_storage_name": { - "description": "Optional. The Unity Catalog-compatible name for the gateway storage location.\nThis is the destination to use for the data that is extracted by the gateway.\nDelta Live Tables system will automatically create the storage location under the catalog and schema.\n" - }, - "gateway_storage_schema": { - "description": "Required, Immutable. The name of the schema for the gateway pipelines's storage location." - } - } - }, - "id": { - "description": "Unique identifier for this pipeline." - }, - "ingestion_definition": { - "description": "The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings.", - "properties": { - "connection_name": { - "description": "Immutable. The Unity Catalog connection this ingestion pipeline uses to communicate with the source. Specify either ingestion_gateway_id or connection_name." - }, - "ingestion_gateway_id": { - "description": "Immutable. Identifier for the ingestion gateway used by this ingestion pipeline to communicate with the source. Specify either ingestion_gateway_id or connection_name." - }, - "objects": { - "description": "Required. Settings specifying tables to replicate and the destination for the replicated tables.", - "items": { - "description": "", - "properties": { - "schema": { - "description": "Select tables from a specific source schema.", - "properties": { - "destination_catalog": { - "description": "Required. Destination catalog to store tables." - }, - "destination_schema": { - "description": "Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists." - }, - "source_catalog": { - "description": "The source catalog name. Might be optional depending on the type of source." - }, - "source_schema": { - "description": "Required. Schema name in the source database." - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - }, - "table": { - "description": "Select tables from a specific source table.", - "properties": { - "destination_catalog": { - "description": "Required. Destination catalog to store table." - }, - "destination_schema": { - "description": "Required. Destination schema to store table." - }, - "destination_table": { - "description": "Optional. Destination table name. The pipeline fails If a table with that name already exists. If not set, the source table name is used." - }, - "source_catalog": { - "description": "Source catalog name. Might be optional depending on the type of source." - }, - "source_schema": { - "description": "Schema name in the source database. Might be optional depending on the type of source." - }, - "source_table": { - "description": "Required. Table name in the source database." - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - } - } - } - }, - "table_configuration": { - "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline.", - "properties": { - "primary_keys": { - "description": "The primary key of the table used to apply changes.", - "items": { - "description": "" - } - }, - "salesforce_include_formula_fields": { - "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector" - }, - "scd_type": { - "description": "The SCD type to use to ingest the table." - } - } - } - } - }, - "libraries": { - "description": "Libraries or code needed by this deployment.", - "items": { - "description": "", - "properties": { - "file": { - "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", - "properties": { - "path": { - "description": "The absolute path of the file." - } - } - }, - "jar": { - "description": "URI of the jar to be installed. Currently only DBFS is supported.\n" - }, - "maven": { - "description": "Specification of a maven library to be installed.\n", - "properties": { - "coordinates": { - "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." - }, - "exclusions": { - "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", - "items": { - "description": "" - } - }, - "repo": { - "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched." - } - } - }, - "notebook": { - "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\n", - "properties": { - "path": { - "description": "The absolute path of the notebook." - } - } - }, - "whl": { - "description": "URI of the whl to be installed." - } - } - } - }, - "name": { - "description": "Friendly identifier for this pipeline." - }, - "notifications": { - "description": "List of notification settings for this pipeline.", - "items": { - "description": "", - "properties": { - "alerts": { - "description": "A list of alerts that trigger the sending of notifications to the configured\ndestinations. The supported alerts are:\n\n* `on-update-success`: A pipeline update completes successfully.\n* `on-update-failure`: Each time a pipeline update fails.\n* `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error.\n* `on-flow-failure`: A single data flow fails.\n", - "items": { - "description": "" - } - }, - "email_recipients": { - "description": "A list of email addresses notified when a configured alert is triggered.\n", - "items": { - "description": "" - } - } - } - } - }, - "permissions": { - "description": "", - "items": { - "description": "", - "properties": { - "group_name": { - "description": "" - }, - "level": { - "description": "" - }, - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - } - }, - "photon": { - "description": "Whether Photon is enabled for this pipeline." - }, - "serverless": { - "description": "Whether serverless compute is enabled for this pipeline." - }, - "storage": { - "description": "DBFS root directory for storing checkpoints and tables." - }, - "target": { - "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`." - }, - "trigger": { - "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", - "properties": { - "cron": { - "description": "", - "properties": { - "quartz_cron_schedule": { - "description": "" - }, - "timezone_id": { - "description": "" - } - } - }, - "manual": { - "description": "" - } - } - } - } - } - }, - "quality_monitors": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "assets_dir": { - "description": "" - }, - "baseline_table_name": { - "description": "" - }, - "custom_metrics": { - "description": "", - "items": { - "description": "", - "properties": { - "definition": { - "description": "" - }, - "input_columns": { - "description": "", - "items": { - "description": "" - } - }, - "name": { - "description": "" - }, - "output_data_type": { - "description": "" - }, - "type": { - "description": "" - } - } - } - }, - "data_classification_config": { - "description": "", - "properties": { - "enabled": { - "description": "" - } - } - }, - "inference_log": { - "description": "", - "properties": { - "granularities": { - "description": "", - "items": { - "description": "" - } - }, - "label_col": { - "description": "" - }, - "model_id_col": { - "description": "" - }, - "prediction_col": { - "description": "" - }, - "prediction_proba_col": { - "description": "" - }, - "problem_type": { - "description": "" - }, - "timestamp_col": { - "description": "" - } - } - }, - "notifications": { - "description": "", - "properties": { - "on_failure": { - "description": "", - "properties": { - "email_addresses": { - "description": "", - "items": { - "description": "" - } - } - } - }, - "on_new_classification_tag_detected": { - "description": "", - "properties": { - "email_addresses": { - "description": "", - "items": { - "description": "" - } - } - } - } - } - }, - "output_schema_name": { - "description": "" - }, - "schedule": { - "description": "", - "properties": { - "pause_status": { - "description": "" - }, - "quartz_cron_expression": { - "description": "" - }, - "timezone_id": { - "description": "" - } - } - }, - "skip_builtin_dashboard": { - "description": "" - }, - "slicing_exprs": { - "description": "", - "items": { - "description": "" - } - }, - "snapshot": { - "description": "" - }, - "time_series": { - "description": "", - "properties": { - "granularities": { - "description": "", - "items": { - "description": "" - } - }, - "timestamp_col": { - "description": "" - } - } - }, - "warehouse_id": { - "description": "" - } - } - } - }, - "registered_models": { - "description": "List of Registered Models", - "additionalproperties": { - "description": "", - "properties": { - "catalog_name": { - "description": "The name of the catalog where the schema and the registered model reside" - }, - "comment": { - "description": "The comment attached to the registered model" - }, - "grants": { - "description": "", - "items": { - "description": "", - "properties": { - "principal": { - "description": "" - }, - "privileges": { - "description": "", - "items": { - "description": "" - } - } - } - } - }, - "name": { - "description": "The name of the registered model" - }, - "schema_name": { - "description": "The name of the schema where the registered model resides" - }, - "storage_location": { - "description": "The storage location on the cloud under which model version data files are stored" - } - } - } - }, - "schemas": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "catalog_name": { - "description": "" - }, - "comment": { - "description": "" - }, - "grants": { - "description": "", - "items": { - "description": "", - "properties": { - "principal": { - "description": "" - }, - "privileges": { - "description": "", - "items": { - "description": "" - } - } - } - } - }, - "name": { - "description": "" - }, - "properties": { - "description": "", - "additionalproperties": { - "description": "" - } - }, - "storage_root": { - "description": "" - } - } - } - } - } - }, - "run_as": { - "description": "", - "properties": { - "service_principal_name": { - "description": "" - }, - "user_name": { - "description": "" - } - } - }, - "sync": { - "description": "", - "properties": { - "exclude": { - "description": "", - "items": { - "description": "" - } - }, - "include": { - "description": "", - "items": { - "description": "" - } - }, - "paths": { - "description": "", - "items": { - "description": "" - } - } - } - }, - "variables": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "default": { - "description": "" - }, - "description": { - "description": "" - }, - "lookup": { - "description": "", - "properties": { - "alert": { - "description": "" - }, - "cluster": { - "description": "" - }, - "cluster_policy": { - "description": "" - }, - "dashboard": { - "description": "" - }, - "instance_pool": { - "description": "" - }, - "job": { - "description": "" - }, - "metastore": { - "description": "" - }, - "pipeline": { - "description": "" - }, - "query": { - "description": "" - }, - "service_principal": { - "description": "" - }, - "warehouse": { - "description": "" - } - } - }, - "type": { - "description": "" - } - } - } - }, - "workspace": { - "description": "", - "properties": { - "artifact_path": { - "description": "" - }, - "auth_type": { - "description": "" - }, - "azure_client_id": { - "description": "" - }, - "azure_environment": { - "description": "" - }, - "azure_login_app_id": { - "description": "" - }, - "azure_tenant_id": { - "description": "" - }, - "azure_use_msi": { - "description": "" - }, - "azure_workspace_resource_id": { - "description": "" - }, - "client_id": { - "description": "" - }, - "file_path": { - "description": "" - }, - "google_service_account": { - "description": "" - }, - "host": { - "description": "" - }, - "profile": { - "description": "" - }, - "root_path": { - "description": "" - }, - "state_path": { - "description": "" - } - } - } - } - } - }, - "variables": { - "description": "", - "additionalproperties": { - "description": "", - "properties": { - "default": { - "description": "" - }, - "description": { - "description": "" - }, - "lookup": { - "description": "", - "properties": { - "alert": { - "description": "" - }, - "cluster": { - "description": "" - }, - "cluster_policy": { - "description": "" - }, - "dashboard": { - "description": "" - }, - "instance_pool": { - "description": "" - }, - "job": { - "description": "" - }, - "metastore": { - "description": "" - }, - "pipeline": { - "description": "" - }, - "query": { - "description": "" - }, - "service_principal": { - "description": "" - }, - "warehouse": { - "description": "" - } - } - }, - "type": { - "description": "" - } - } - } - }, - "workspace": { - "description": "", - "properties": { - "artifact_path": { - "description": "" - }, - "auth_type": { - "description": "" - }, - "azure_client_id": { - "description": "" - }, - "azure_environment": { - "description": "" - }, - "azure_login_app_id": { - "description": "" - }, - "azure_tenant_id": { - "description": "" - }, - "azure_use_msi": { - "description": "" - }, - "azure_workspace_resource_id": { - "description": "" - }, - "client_id": { - "description": "" - }, - "file_path": { - "description": "" - }, - "google_service_account": { - "description": "" - }, - "host": { - "description": "" - }, - "profile": { - "description": "" - }, - "root_path": { - "description": "" - }, - "state_path": { - "description": "" - } - } - } - } -} \ No newline at end of file diff --git a/bundle/schema/docs_test.go b/bundle/schema/docs_test.go deleted file mode 100644 index 83ee681b0..000000000 --- a/bundle/schema/docs_test.go +++ /dev/null @@ -1,62 +0,0 @@ -package schema - -import ( - "encoding/json" - "testing" - - "github.com/databricks/cli/libs/jsonschema" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestSchemaToDocs(t *testing.T) { - jsonSchema := &jsonschema.Schema{ - Type: "object", - Description: "root doc", - Properties: map[string]*jsonschema.Schema{ - "foo": {Type: "number", Description: "foo doc"}, - "bar": {Type: "string"}, - "octave": { - Type: "object", - AdditionalProperties: &jsonschema.Schema{Type: "number"}, - Description: "octave docs", - }, - "scales": { - Type: "object", - Description: "scale docs", - Items: &jsonschema.Schema{Type: "string"}, - }, - }, - } - docs := schemaToDocs(jsonSchema) - docsJson, err := json.MarshalIndent(docs, " ", " ") - require.NoError(t, err) - - expected := - `{ - "description": "root doc", - "properties": { - "bar": { - "description": "" - }, - "foo": { - "description": "foo doc" - }, - "octave": { - "description": "octave docs", - "additionalproperties": { - "description": "" - } - }, - "scales": { - "description": "scale docs", - "items": { - "description": "" - } - } - } - }` - t.Log("[DEBUG] actual: ", string(docsJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(docsJson)) -} diff --git a/bundle/schema/embed.go b/bundle/schema/embed.go new file mode 100644 index 000000000..68f42a8e5 --- /dev/null +++ b/bundle/schema/embed.go @@ -0,0 +1,6 @@ +package schema + +import _ "embed" + +//go:embed jsonschema.json +var Bytes []byte diff --git a/bundle/schema/embed_test.go b/bundle/schema/embed_test.go new file mode 100644 index 000000000..ee0b5a615 --- /dev/null +++ b/bundle/schema/embed_test.go @@ -0,0 +1,71 @@ +package schema_test + +import ( + "encoding/json" + "testing" + + "github.com/databricks/cli/bundle/schema" + "github.com/databricks/cli/libs/jsonschema" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func walk(defs map[string]any, p ...string) jsonschema.Schema { + v, ok := defs[p[0]] + if !ok { + panic("not found: " + p[0]) + } + + if len(p) == 1 { + b, err := json.Marshal(v) + if err != nil { + panic(err) + } + res := jsonschema.Schema{} + err = json.Unmarshal(b, &res) + if err != nil { + panic(err) + } + return res + } + + return walk(v.(map[string]any), p[1:]...) +} + +func TestJsonSchema(t *testing.T) { + s := jsonschema.Schema{} + err := json.Unmarshal(schema.Bytes, &s) + require.NoError(t, err) + + // Assert job fields have their descriptions loaded. + resourceJob := walk(s.Definitions, "github.com", "databricks", "cli", "bundle", "config", "resources.Job") + fields := []string{"name", "continuous", "deployment", "tasks", "trigger"} + for _, field := range fields { + assert.NotEmpty(t, resourceJob.AnyOf[0].Properties[field].Description) + } + + // Assert descriptions were also loaded for a job task definition. + jobTask := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.Task") + fields = []string{"notebook_task", "spark_jar_task", "spark_python_task", "spark_submit_task", "description", "depends_on", "environment_key", "for_each_task", "existing_cluster_id"} + for _, field := range fields { + assert.NotEmpty(t, jobTask.AnyOf[0].Properties[field].Description) + } + + // Assert descriptions are loaded for pipelines + pipeline := walk(s.Definitions, "github.com", "databricks", "cli", "bundle", "config", "resources.Pipeline") + fields = []string{"name", "catalog", "clusters", "channel", "continuous", "deployment", "development"} + for _, field := range fields { + assert.NotEmpty(t, pipeline.AnyOf[0].Properties[field].Description) + } + + // Assert enum values are loaded + schedule := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "catalog.MonitorCronSchedule") + assert.Contains(t, schedule.AnyOf[0].Properties["pause_status"].Enum, "PAUSED") + assert.Contains(t, schedule.AnyOf[0].Properties["pause_status"].Enum, "UNPAUSED") + + providers := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.GitProvider") + assert.Contains(t, providers.Enum, "gitHub") + assert.Contains(t, providers.Enum, "bitbucketCloud") + assert.Contains(t, providers.Enum, "gitHubEnterprise") + assert.Contains(t, providers.Enum, "bitbucketServer") +} diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json new file mode 100644 index 000000000..2db1a5ab4 --- /dev/null +++ b/bundle/schema/jsonschema.json @@ -0,0 +1,5561 @@ +{ + "$defs": { + "bool": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "string", + "pattern": "\\$\\{(resources(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(bundle(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(workspace(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(artifacts(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "float64": { + "anyOf": [ + { + "type": "number" + }, + { + "type": "string", + "pattern": "\\$\\{(resources(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(bundle(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(workspace(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(artifacts(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "github.com": { + "databricks": { + "cli": { + "bundle": { + "config": { + "resources.Grant": { + "anyOf": [ + { + "type": "object", + "properties": { + "principal": { + "$ref": "#/$defs/string" + }, + "privileges": { + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false, + "required": [ + "privileges", + "principal" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Job": { + "anyOf": [ + { + "type": "object", + "properties": { + "continuous": { + "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Continuous" + }, + "deployment": { + "description": "Deployment information for jobs managed by external sources.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobDeployment" + }, + "description": { + "description": "An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding.", + "$ref": "#/$defs/string" + }, + "edit_mode": { + "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobEditMode" + }, + "email_notifications": { + "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobEmailNotifications" + }, + "environments": { + "description": "A list of task execution environment specifications that can be referenced by tasks of this job.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobEnvironment" + }, + "format": { + "description": "Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `\"MULTI_TASK\"`.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Format" + }, + "git_source": { + "description": "An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.\n\nIf `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.\n\nNote: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.GitSource" + }, + "health": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules" + }, + "job_clusters": { + "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobCluster" + }, + "max_concurrent_runs": { + "description": "An optional maximum allowed number of concurrent runs of the job.\nSet this value if you want to be able to execute multiple runs of the same job concurrently.\nThis is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.\nThis setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs.\nHowever, from then on, new runs are skipped unless there are fewer than 3 active runs.\nThis value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped.", + "$ref": "#/$defs/int" + }, + "name": { + "description": "An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding.", + "$ref": "#/$defs/string" + }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobNotificationSettings" + }, + "parameters": { + "description": "Job-level parameter definitions", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobParameterDefinition" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "queue": { + "description": "The queue settings of the job.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.QueueSettings" + }, + "run_as": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs" + }, + "schedule": { + "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.CronSchedule" + }, + "tags": { + "description": "A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job.", + "$ref": "#/$defs/map/string" + }, + "tasks": { + "description": "A list of task specifications to be executed by this job.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Task" + }, + "timeout_seconds": { + "description": "An optional timeout applied to each run of this job. A value of `0` means no timeout.", + "$ref": "#/$defs/int" + }, + "trigger": { + "description": "A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TriggerSettings" + }, + "webhook_notifications": { + "description": "A collection of system notification IDs to notify when runs of this job begin or complete.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.WebhookNotifications" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.MlflowExperiment": { + "anyOf": [ + { + "type": "object", + "properties": { + "artifact_location": { + "description": "Location where artifacts for the experiment are stored.", + "$ref": "#/$defs/string" + }, + "creation_time": { + "description": "Creation time", + "$ref": "#/$defs/int64" + }, + "experiment_id": { + "description": "Unique identifier for the experiment.", + "$ref": "#/$defs/string" + }, + "last_update_time": { + "description": "Last update time", + "$ref": "#/$defs/int64" + }, + "lifecycle_stage": { + "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs.", + "$ref": "#/$defs/string" + }, + "name": { + "description": "Human readable name that identifies the experiment.", + "$ref": "#/$defs/string" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "tags": { + "description": "Tags: Additional metadata key-value pairs.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ExperimentTag" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.MlflowModel": { + "anyOf": [ + { + "type": "object", + "properties": { + "creation_timestamp": { + "description": "Timestamp recorded when this `registered_model` was created.", + "$ref": "#/$defs/int64" + }, + "description": { + "description": "Description of this `registered_model`.", + "$ref": "#/$defs/string" + }, + "last_updated_timestamp": { + "description": "Timestamp recorded when metadata for this `registered_model` was last updated.", + "$ref": "#/$defs/int64" + }, + "latest_versions": { + "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ModelVersion" + }, + "name": { + "description": "Unique name for the model.", + "$ref": "#/$defs/string" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "tags": { + "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ModelTag" + }, + "user_id": { + "description": "User that created this `registered_model`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.ModelServingEndpoint": { + "anyOf": [ + { + "type": "object", + "properties": { + "config": { + "description": "The core config of the serving endpoint.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.EndpointCoreConfigInput" + }, + "name": { + "description": "The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.\nAn endpoint name can consist of alphanumeric characters, dashes, and underscores.\n", + "$ref": "#/$defs/string" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "rate_limits": { + "description": "Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.RateLimit" + }, + "route_optimized": { + "description": "Enable route optimization for the serving endpoint.", + "$ref": "#/$defs/bool" + }, + "tags": { + "description": "Tags to be attached to the serving endpoint and automatically propagated to billing logs.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.EndpointTag" + } + }, + "additionalProperties": false, + "required": [ + "config", + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Permission": { + "anyOf": [ + { + "type": "object", + "properties": { + "group_name": { + "$ref": "#/$defs/string" + }, + "level": { + "$ref": "#/$defs/string" + }, + "service_principal_name": { + "$ref": "#/$defs/string" + }, + "user_name": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "level" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Pipeline": { + "anyOf": [ + { + "type": "object", + "properties": { + "catalog": { + "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog.", + "$ref": "#/$defs/string" + }, + "channel": { + "description": "DLT Release Channel that specifies which version to use.", + "$ref": "#/$defs/string" + }, + "clusters": { + "description": "Cluster settings for this pipeline deployment.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineCluster" + }, + "configuration": { + "description": "String-String configuration for this pipeline execution.", + "$ref": "#/$defs/map/string" + }, + "continuous": { + "description": "Whether the pipeline is continuous or triggered. This replaces `trigger`.", + "$ref": "#/$defs/bool" + }, + "deployment": { + "description": "Deployment type of this pipeline.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineDeployment" + }, + "development": { + "description": "Whether the pipeline is in Development mode. Defaults to false.", + "$ref": "#/$defs/bool" + }, + "edition": { + "description": "Pipeline product edition.", + "$ref": "#/$defs/string" + }, + "filters": { + "description": "Filters on which Pipeline packages to include in the deployed graph.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.Filters" + }, + "gateway_definition": { + "description": "The definition of a gateway pipeline to support CDC.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.IngestionGatewayPipelineDefinition" + }, + "id": { + "description": "Unique identifier for this pipeline.", + "$ref": "#/$defs/string" + }, + "ingestion_definition": { + "description": "The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.IngestionPipelineDefinition" + }, + "libraries": { + "description": "Libraries or code needed by this deployment.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineLibrary" + }, + "name": { + "description": "Friendly identifier for this pipeline.", + "$ref": "#/$defs/string" + }, + "notifications": { + "description": "List of notification settings for this pipeline.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.Notifications" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "photon": { + "description": "Whether Photon is enabled for this pipeline.", + "$ref": "#/$defs/bool" + }, + "serverless": { + "description": "Whether serverless compute is enabled for this pipeline.", + "$ref": "#/$defs/bool" + }, + "storage": { + "description": "DBFS root directory for storing checkpoints and tables.", + "$ref": "#/$defs/string" + }, + "target": { + "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`.", + "$ref": "#/$defs/string" + }, + "trigger": { + "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.QualityMonitor": { + "anyOf": [ + { + "type": "object", + "properties": { + "assets_dir": { + "description": "The directory to store monitoring assets (e.g. dashboard, metric tables).", + "$ref": "#/$defs/string" + }, + "baseline_table_name": { + "description": "Name of the baseline table from which drift metrics are computed from.\nColumns in the monitored table should also be present in the baseline table.\n", + "$ref": "#/$defs/string" + }, + "custom_metrics": { + "description": "Custom metrics to compute on the monitored table. These can be aggregate metrics, derived\nmetrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time\nwindows).\n", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetric" + }, + "data_classification_config": { + "description": "The data classification config for the monitor.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorDataClassificationConfig" + }, + "inference_log": { + "description": "Configuration for monitoring inference logs.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorInferenceLog" + }, + "notifications": { + "description": "The notification settings for the monitor.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorNotifications" + }, + "output_schema_name": { + "description": "Schema where output metric tables are created.", + "$ref": "#/$defs/string" + }, + "schedule": { + "description": "The schedule for automatically updating and refreshing metric tables.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedule" + }, + "skip_builtin_dashboard": { + "description": "Whether to skip creating a default dashboard summarizing data quality metrics.", + "$ref": "#/$defs/bool" + }, + "slicing_exprs": { + "description": "List of column expressions to slice data with for targeted analysis. The data is grouped by\neach expression independently, resulting in a separate slice for each predicate and its\ncomplements. For high-cardinality columns, only the top 100 unique values by frequency will\ngenerate slices.\n", + "$ref": "#/$defs/slice/string" + }, + "snapshot": { + "description": "Configuration for monitoring snapshot tables.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorSnapshot" + }, + "time_series": { + "description": "Configuration for monitoring time series tables.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorTimeSeries" + }, + "warehouse_id": { + "description": "Optional argument to specify the warehouse for dashboard creation. If not specified, the first running\nwarehouse will be used.\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "assets_dir", + "output_schema_name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.RegisteredModel": { + "anyOf": [ + { + "type": "object", + "properties": { + "catalog_name": { + "description": "The name of the catalog where the schema and the registered model reside", + "$ref": "#/$defs/string" + }, + "comment": { + "description": "The comment attached to the registered model", + "$ref": "#/$defs/string" + }, + "grants": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Grant" + }, + "name": { + "description": "The name of the registered model", + "$ref": "#/$defs/string" + }, + "schema_name": { + "description": "The name of the schema where the registered model resides", + "$ref": "#/$defs/string" + }, + "storage_location": { + "description": "The storage location on the cloud under which model version data files are stored", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "catalog_name", + "name", + "schema_name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Schema": { + "anyOf": [ + { + "type": "object", + "properties": { + "catalog_name": { + "description": "Name of parent catalog.", + "$ref": "#/$defs/string" + }, + "comment": { + "description": "User-provided free-form text description.", + "$ref": "#/$defs/string" + }, + "grants": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Grant" + }, + "name": { + "description": "Name of schema, relative to parent catalog.", + "$ref": "#/$defs/string" + }, + "properties": { + "$ref": "#/$defs/map/string" + }, + "storage_root": { + "description": "Storage root URL for managed tables within schema.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "catalog_name", + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "variable.Lookup": { + "anyOf": [ + { + "type": "object", + "properties": { + "alert": { + "$ref": "#/$defs/string" + }, + "cluster": { + "$ref": "#/$defs/string" + }, + "cluster_policy": { + "$ref": "#/$defs/string" + }, + "dashboard": { + "$ref": "#/$defs/string" + }, + "instance_pool": { + "$ref": "#/$defs/string" + }, + "job": { + "$ref": "#/$defs/string" + }, + "metastore": { + "$ref": "#/$defs/string" + }, + "pipeline": { + "$ref": "#/$defs/string" + }, + "query": { + "$ref": "#/$defs/string" + }, + "service_principal": { + "$ref": "#/$defs/string" + }, + "warehouse": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "variable.TargetVariable": { + "anyOf": [ + { + "type": "object", + "properties": { + "default": { + "$ref": "#/$defs/interface" + }, + "description": { + "$ref": "#/$defs/string" + }, + "lookup": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.Lookup" + }, + "type": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.VariableType" + } + }, + "additionalProperties": false + }, + {} + ] + }, + "variable.Variable": { + "type": "object", + "properties": { + "default": { + "$ref": "#/$defs/interface" + }, + "description": { + "$ref": "#/$defs/string" + }, + "lookup": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.Lookup" + }, + "type": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.VariableType" + } + }, + "additionalProperties": false + }, + "variable.VariableType": { + "type": "string" + } + }, + "config.Artifact": { + "anyOf": [ + { + "type": "object", + "properties": { + "build": { + "$ref": "#/$defs/string" + }, + "executable": { + "$ref": "#/$defs/github.com/databricks/cli/libs/exec.ExecutableType" + }, + "files": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config.ArtifactFile" + }, + "path": { + "$ref": "#/$defs/string" + }, + "type": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.ArtifactType" + } + }, + "additionalProperties": false, + "required": [ + "type" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.ArtifactFile": { + "anyOf": [ + { + "type": "object", + "properties": { + "source": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "source" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.ArtifactType": { + "type": "string" + }, + "config.Bundle": { + "anyOf": [ + { + "type": "object", + "properties": { + "compute_id": { + "$ref": "#/$defs/string" + }, + "databricks_cli_version": { + "$ref": "#/$defs/string" + }, + "deployment": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Deployment" + }, + "git": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git" + }, + "name": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Command": { + "type": "string" + }, + "config.Deployment": { + "anyOf": [ + { + "type": "object", + "properties": { + "fail_on_active_runs": { + "$ref": "#/$defs/bool" + }, + "lock": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Lock" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Experimental": { + "anyOf": [ + { + "type": "object", + "properties": { + "pydabs": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.PyDABs" + }, + "python_wheel_wrapper": { + "$ref": "#/$defs/bool" + }, + "scripts": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Command" + }, + "use_legacy_run_as": { + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Git": { + "anyOf": [ + { + "type": "object", + "properties": { + "branch": { + "$ref": "#/$defs/string" + }, + "origin_url": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Lock": { + "anyOf": [ + { + "type": "object", + "properties": { + "enabled": { + "$ref": "#/$defs/bool" + }, + "force": { + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Mode": { + "type": "string" + }, + "config.Presets": { + "anyOf": [ + { + "type": "object", + "properties": { + "jobs_max_concurrent_runs": { + "$ref": "#/$defs/int" + }, + "name_prefix": { + "$ref": "#/$defs/string" + }, + "pipelines_development": { + "$ref": "#/$defs/bool" + }, + "tags": { + "$ref": "#/$defs/map/string" + }, + "trigger_pause_status": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.PyDABs": { + "anyOf": [ + { + "type": "object", + "properties": { + "enabled": { + "$ref": "#/$defs/bool" + }, + "import": { + "$ref": "#/$defs/slice/string" + }, + "venv_path": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Resources": { + "anyOf": [ + { + "type": "object", + "properties": { + "experiments": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.MlflowExperiment" + }, + "jobs": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Job" + }, + "model_serving_endpoints": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint" + }, + "models": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.MlflowModel" + }, + "pipelines": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Pipeline" + }, + "quality_monitors": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.QualityMonitor" + }, + "registered_models": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.RegisteredModel" + }, + "schemas": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Schema" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Sync": { + "anyOf": [ + { + "type": "object", + "properties": { + "exclude": { + "$ref": "#/$defs/slice/string" + }, + "include": { + "$ref": "#/$defs/slice/string" + }, + "paths": { + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Target": { + "anyOf": [ + { + "type": "object", + "properties": { + "artifacts": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact" + }, + "bundle": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Bundle" + }, + "compute_id": { + "$ref": "#/$defs/string" + }, + "default": { + "$ref": "#/$defs/bool" + }, + "git": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git" + }, + "mode": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Mode" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "presets": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Presets" + }, + "resources": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Resources" + }, + "run_as": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs" + }, + "sync": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync" + }, + "variables": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.TargetVariable" + }, + "workspace": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Workspace": { + "anyOf": [ + { + "type": "object", + "properties": { + "artifact_path": { + "$ref": "#/$defs/string" + }, + "auth_type": { + "$ref": "#/$defs/string" + }, + "azure_client_id": { + "$ref": "#/$defs/string" + }, + "azure_environment": { + "$ref": "#/$defs/string" + }, + "azure_login_app_id": { + "$ref": "#/$defs/string" + }, + "azure_tenant_id": { + "$ref": "#/$defs/string" + }, + "azure_use_msi": { + "$ref": "#/$defs/bool" + }, + "azure_workspace_resource_id": { + "$ref": "#/$defs/string" + }, + "client_id": { + "$ref": "#/$defs/string" + }, + "file_path": { + "$ref": "#/$defs/string" + }, + "google_service_account": { + "$ref": "#/$defs/string" + }, + "host": { + "$ref": "#/$defs/string" + }, + "profile": { + "$ref": "#/$defs/string" + }, + "root_path": { + "$ref": "#/$defs/string" + }, + "state_path": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + }, + "libs": { + "exec.ExecutableType": { + "type": "string" + } + } + }, + "databricks-sdk-go": { + "service": { + "catalog.MonitorCronSchedule": { + "anyOf": [ + { + "type": "object", + "properties": { + "pause_status": { + "description": "Read only field that indicates whether a schedule is paused or not.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedulePauseStatus", + "enum": [ + "UNPAUSED", + "PAUSED" + ] + }, + "quartz_cron_expression": { + "description": "The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html).\n", + "$ref": "#/$defs/string" + }, + "timezone_id": { + "description": "The timezone id (e.g., ``\"PST\"``) in which to evaluate the quartz expression.\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "quartz_cron_expression", + "timezone_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorCronSchedulePauseStatus": { + "type": "string" + }, + "catalog.MonitorDataClassificationConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "enabled": { + "description": "Whether data classification is enabled.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorDestination": { + "anyOf": [ + { + "type": "object", + "properties": { + "email_addresses": { + "description": "The list of email addresses to send the notification to. A maximum of 5 email addresses is supported.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorInferenceLog": { + "anyOf": [ + { + "type": "object", + "properties": { + "granularities": { + "description": "Granularities for aggregating data into time windows based on their timestamp. Currently the following static\ngranularities are supported:\n{``\"5 minutes\"``, ``\"30 minutes\"``, ``\"1 hour\"``, ``\"1 day\"``, ``\"\u003cn\u003e week(s)\"``, ``\"1 month\"``, ``\"1 year\"``}.\n", + "$ref": "#/$defs/slice/string" + }, + "label_col": { + "description": "Optional column that contains the ground truth for the prediction.", + "$ref": "#/$defs/string" + }, + "model_id_col": { + "description": "Column that contains the id of the model generating the predictions. Metrics will be computed per model id by\ndefault, and also across all model ids.\n", + "$ref": "#/$defs/string" + }, + "prediction_col": { + "description": "Column that contains the output/prediction from the model.", + "$ref": "#/$defs/string" + }, + "prediction_proba_col": { + "description": "Optional column that contains the prediction probabilities for each class in a classification problem type.\nThe values in this column should be a map, mapping each class label to the prediction probability for a given\nsample. The map should be of PySpark MapType().\n", + "$ref": "#/$defs/string" + }, + "problem_type": { + "description": "Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorInferenceLogProblemType", + "enum": [ + "PROBLEM_TYPE_CLASSIFICATION", + "PROBLEM_TYPE_REGRESSION" + ] + }, + "timestamp_col": { + "description": "Column that contains the timestamps of requests. The column must be one of the following:\n- A ``TimestampType`` column\n- A column whose values can be converted to timestamps through the pyspark\n ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html).\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "granularities", + "model_id_col", + "prediction_col", + "problem_type", + "timestamp_col" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorInferenceLogProblemType": { + "type": "string" + }, + "catalog.MonitorMetric": { + "anyOf": [ + { + "type": "object", + "properties": { + "definition": { + "description": "Jinja template for a SQL expression that specifies how to compute the metric. See [create metric definition](https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition).", + "$ref": "#/$defs/string" + }, + "input_columns": { + "description": "A list of column names in the input table the metric should be computed for.\nCan use ``\":table\"`` to indicate that the metric needs information from multiple columns.\n", + "$ref": "#/$defs/slice/string" + }, + "name": { + "description": "Name of the metric in the output tables.", + "$ref": "#/$defs/string" + }, + "output_data_type": { + "description": "The output type of the custom metric.", + "$ref": "#/$defs/string" + }, + "type": { + "description": "Can only be one of ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"``, ``\"CUSTOM_METRIC_TYPE_DERIVED\"``, or ``\"CUSTOM_METRIC_TYPE_DRIFT\"``.\nThe ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"`` and ``\"CUSTOM_METRIC_TYPE_DERIVED\"`` metrics\nare computed on a single table, whereas the ``\"CUSTOM_METRIC_TYPE_DRIFT\"`` compare metrics across\nbaseline and input table, or across the two consecutive time windows.\n- CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table\n- CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics\n- CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetricType", + "enum": [ + "CUSTOM_METRIC_TYPE_AGGREGATE", + "CUSTOM_METRIC_TYPE_DERIVED", + "CUSTOM_METRIC_TYPE_DRIFT" + ] + } + }, + "additionalProperties": false, + "required": [ + "definition", + "input_columns", + "name", + "output_data_type", + "type" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorMetricType": { + "type": "string" + }, + "catalog.MonitorNotifications": { + "anyOf": [ + { + "type": "object", + "properties": { + "on_failure": { + "description": "Who to send notifications to on monitor failure.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorDestination" + }, + "on_new_classification_tag_detected": { + "description": "Who to send notifications to when new data classification tags are detected.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorDestination" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorSnapshot": { + "anyOf": [ + { + "type": "object", + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "catalog.MonitorTimeSeries": { + "anyOf": [ + { + "type": "object", + "properties": { + "granularities": { + "description": "Granularities for aggregating data into time windows based on their timestamp. Currently the following static\ngranularities are supported:\n{``\"5 minutes\"``, ``\"30 minutes\"``, ``\"1 hour\"``, ``\"1 day\"``, ``\"\u003cn\u003e week(s)\"``, ``\"1 month\"``, ``\"1 year\"``}.\n", + "$ref": "#/$defs/slice/string" + }, + "timestamp_col": { + "description": "Column that contains the timestamps of requests. The column must be one of the following:\n- A ``TimestampType`` column\n- A column whose values can be converted to timestamps through the pyspark\n ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html).\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "granularities", + "timestamp_col" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.Adlsgen2Info": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.AutoScale": { + "anyOf": [ + { + "type": "object", + "properties": { + "max_workers": { + "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`.", + "$ref": "#/$defs/int" + }, + "min_workers": { + "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation.", + "$ref": "#/$defs/int" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.AwsAttributes": { + "anyOf": [ + { + "type": "object", + "properties": { + "availability": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AwsAvailability" + }, + "ebs_volume_count": { + "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden.", + "$ref": "#/$defs/int" + }, + "ebs_volume_iops": { + "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.", + "$ref": "#/$defs/int" + }, + "ebs_volume_size": { + "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096.", + "$ref": "#/$defs/int" + }, + "ebs_volume_throughput": { + "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.", + "$ref": "#/$defs/int" + }, + "ebs_volume_type": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.EbsVolumeType" + }, + "first_on_demand": { + "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.", + "$ref": "#/$defs/int" + }, + "instance_profile_arn": { + "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists.", + "$ref": "#/$defs/string" + }, + "spot_bid_price_percent": { + "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.", + "$ref": "#/$defs/int" + }, + "zone_id": { + "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, a default zone will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.AwsAvailability": { + "type": "string", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.\n", + "enum": [ + "SPOT", + "ON_DEMAND", + "SPOT_WITH_FALLBACK" + ] + }, + "compute.AzureAttributes": { + "anyOf": [ + { + "type": "object", + "properties": { + "availability": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AzureAvailability" + }, + "first_on_demand": { + "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.", + "$ref": "#/$defs/int" + }, + "log_analytics_info": { + "description": "Defines values necessary to configure and run Azure Log Analytics agent", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.LogAnalyticsInfo" + }, + "spot_bid_max_price": { + "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1.", + "$ref": "#/$defs/float64" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.AzureAvailability": { + "type": "string", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero (which only happens on pool clusters), this availability\ntype will be used for the entire cluster.", + "enum": [ + "SPOT_AZURE", + "ON_DEMAND_AZURE", + "SPOT_WITH_FALLBACK_AZURE" + ] + }, + "compute.ClientsTypes": { + "anyOf": [ + { + "type": "object", + "properties": { + "jobs": { + "description": "With jobs set, the cluster can be used for jobs", + "$ref": "#/$defs/bool" + }, + "notebooks": { + "description": "With notebooks set, this cluster can be used for notebooks", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.ClusterLogConf": { + "anyOf": [ + { + "type": "object", + "properties": { + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DbfsStorageInfo" + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.S3StorageInfo" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.ClusterSpec": { + "anyOf": [ + { + "type": "object", + "properties": { + "apply_policy_default_values": { + "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied.", + "$ref": "#/$defs/bool" + }, + "autoscale": { + "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AutoScale" + }, + "autotermination_minutes": { + "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination.", + "$ref": "#/$defs/int" + }, + "aws_attributes": { + "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes" + }, + "azure_attributes": { + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes" + }, + "cluster_log_conf": { + "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClusterLogConf" + }, + "cluster_name": { + "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n", + "$ref": "#/$defs/string" + }, + "custom_tags": { + "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", + "$ref": "#/$defs/map/string" + }, + "data_security_mode": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DataSecurityMode" + }, + "docker_image": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DockerImage" + }, + "driver_instance_pool_id": { + "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned.", + "$ref": "#/$defs/string" + }, + "driver_node_type_id": { + "description": "The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n", + "$ref": "#/$defs/string" + }, + "enable_elastic_disk": { + "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details.", + "$ref": "#/$defs/bool" + }, + "enable_local_disk_encryption": { + "description": "Whether to enable LUKS on cluster VMs' local disks", + "$ref": "#/$defs/bool" + }, + "gcp_attributes": { + "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.GcpAttributes" + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/compute.InitScriptInfo" + }, + "instance_pool_id": { + "description": "The optional ID of the instance pool to which the cluster belongs.", + "$ref": "#/$defs/string" + }, + "node_type_id": { + "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n", + "$ref": "#/$defs/string" + }, + "num_workers": { + "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned.", + "$ref": "#/$defs/int" + }, + "policy_id": { + "description": "The ID of the cluster policy used to create the cluster if applicable.", + "$ref": "#/$defs/string" + }, + "runtime_engine": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.RuntimeEngine" + }, + "single_user_name": { + "description": "Single user name if data_security_mode is `SINGLE_USER`", + "$ref": "#/$defs/string" + }, + "spark_conf": { + "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", + "$ref": "#/$defs/map/string" + }, + "spark_env_vars": { + "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", + "$ref": "#/$defs/map/string" + }, + "spark_version": { + "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.\n", + "$ref": "#/$defs/string" + }, + "ssh_public_keys": { + "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", + "$ref": "#/$defs/slice/string" + }, + "workload_type": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.WorkloadType" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.DataSecurityMode": { + "type": "string", + "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.\n", + "enum": [ + "NONE", + "SINGLE_USER", + "USER_ISOLATION", + "LEGACY_TABLE_ACL", + "LEGACY_PASSTHROUGH", + "LEGACY_SINGLE_USER", + "LEGACY_SINGLE_USER_STANDARD" + ] + }, + "compute.DbfsStorageInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "dbfs destination, e.g. `dbfs:/my/path`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.DockerBasicAuth": { + "anyOf": [ + { + "type": "object", + "properties": { + "password": { + "description": "Password of the user", + "$ref": "#/$defs/string" + }, + "username": { + "description": "Name of the user", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.DockerImage": { + "anyOf": [ + { + "type": "object", + "properties": { + "basic_auth": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DockerBasicAuth" + }, + "url": { + "description": "URL of the docker image.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.EbsVolumeType": { + "type": "string", + "description": "The type of EBS volumes that will be launched with this cluster.", + "enum": [ + "GENERAL_PURPOSE_SSD", + "THROUGHPUT_OPTIMIZED_HDD" + ] + }, + "compute.Environment": { + "anyOf": [ + { + "type": "object", + "description": "The environment entity used to preserve serverless environment side panel and jobs' environment for non-notebook task.\nIn this minimal environment spec, only pip dependencies are supported.", + "properties": { + "client": { + "description": "Client version used by the environment\nThe client is the user-facing environment of the runtime.\nEach client comes with a specific set of pre-installed libraries.\nThe version is a string, consisting of the major client version.", + "$ref": "#/$defs/string" + }, + "dependencies": { + "description": "List of pip dependencies, as supported by the version of pip in this environment.\nEach dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/\nAllowed dependency could be \u003crequirement specifier\u003e, \u003carchive url/path\u003e, \u003clocal project path\u003e(WSFS or Volumes in Databricks), \u003cvcs project url\u003e\nE.g. dependencies: [\"foo==0.0.1\", \"-r /Workspace/test/requirements.txt\"]", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false, + "required": [ + "client" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.GcpAttributes": { + "anyOf": [ + { + "type": "object", + "properties": { + "availability": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.GcpAvailability" + }, + "boot_disk_size": { + "description": "boot disk size in GB", + "$ref": "#/$defs/int" + }, + "google_service_account": { + "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator.", + "$ref": "#/$defs/string" + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type.", + "$ref": "#/$defs/int" + }, + "use_preemptible_executors": { + "description": "This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead.", + "$ref": "#/$defs/bool" + }, + "zone_id": { + "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.GcpAvailability": { + "type": "string", + "description": "This field determines whether the instance pool will contain preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", + "enum": [ + "PREEMPTIBLE_GCP", + "ON_DEMAND_GCP", + "PREEMPTIBLE_WITH_FALLBACK_GCP" + ] + }, + "compute.GcsStorageInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.InitScriptInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "abfss": { + "description": "destination needs to be provided. e.g.\n`{ \"abfss\" : { \"destination\" : \"abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e\" } }", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.Adlsgen2Info" + }, + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.DbfsStorageInfo" + }, + "file": { + "description": "destination needs to be provided. e.g.\n`{ \"file\" : { \"destination\" : \"file:/my/local/file.sh\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.LocalFileInfo" + }, + "gcs": { + "description": "destination needs to be provided. e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.GcsStorageInfo" + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.S3StorageInfo" + }, + "volumes": { + "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/my-init.sh\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.VolumesStorageInfo" + }, + "workspace": { + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.WorkspaceStorageInfo" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.Library": { + "anyOf": [ + { + "type": "object", + "properties": { + "cran": { + "description": "Specification of a CRAN library to be installed as part of the library", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.RCranLibrary" + }, + "egg": { + "description": "Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above.", + "$ref": "#/$defs/string" + }, + "jar": { + "description": "URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"jar\": \"/Workspace/path/to/library.jar\" }`, `{ \"jar\" : \"/Volumes/path/to/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.", + "$ref": "#/$defs/string" + }, + "maven": { + "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.MavenLibrary" + }, + "pypi": { + "description": "Specification of a PyPi library to be installed. For example:\n`{ \"package\": \"simplejson\" }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.PythonPyPiLibrary" + }, + "requirements": { + "description": "URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported.\nFor example: `{ \"requirements\": \"/Workspace/path/to/requirements.txt\" }` or `{ \"requirements\" : \"/Volumes/path/to/requirements.txt\" }`", + "$ref": "#/$defs/string" + }, + "whl": { + "description": "URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"whl\": \"/Workspace/path/to/library.whl\" }`, `{ \"whl\" : \"/Volumes/path/to/library.whl\" }` or\n`{ \"whl\": \"s3://my-bucket/library.whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.LocalFileInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "local file destination, e.g. `file:/my/local/file.sh`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.LogAnalyticsInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "log_analytics_primary_key": { + "description": "\u003cneeds content added\u003e", + "$ref": "#/$defs/string" + }, + "log_analytics_workspace_id": { + "description": "\u003cneeds content added\u003e", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.MavenLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "coordinates": { + "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\".", + "$ref": "#/$defs/string" + }, + "exclusions": { + "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", + "$ref": "#/$defs/slice/string" + }, + "repo": { + "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "coordinates" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.PythonPyPiLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "package": { + "description": "The name of the pypi package to install. An optional exact version specification is also\nsupported. Examples: \"simplejson\" and \"simplejson==3.8.0\".", + "$ref": "#/$defs/string" + }, + "repo": { + "description": "The repository where the package can be found. If not specified, the default pip index is\nused.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "package" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.RCranLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "package": { + "description": "The name of the CRAN package to install.", + "$ref": "#/$defs/string" + }, + "repo": { + "description": "The repository where the package can be found. If not specified, the default CRAN repo is used.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "package" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.RuntimeEngine": { + "type": "string", + "description": "Decides which runtime engine to be use, e.g. Standard vs. Photon. If unspecified, the runtime\nengine is inferred from spark_version.", + "enum": [ + "NULL", + "STANDARD", + "PHOTON" + ] + }, + "compute.S3StorageInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "canned_acl": { + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs.", + "$ref": "#/$defs/string" + }, + "destination": { + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs.", + "$ref": "#/$defs/string" + }, + "enable_encryption": { + "description": "(Optional) Flag to enable server side encryption, `false` by default.", + "$ref": "#/$defs/bool" + }, + "encryption_type": { + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`.", + "$ref": "#/$defs/string" + }, + "endpoint": { + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used.", + "$ref": "#/$defs/string" + }, + "kms_key": { + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`.", + "$ref": "#/$defs/string" + }, + "region": { + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.VolumesStorageInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.WorkloadType": { + "anyOf": [ + { + "type": "object", + "properties": { + "clients": { + "description": " defined what type of clients can use the cluster. E.g. Notebooks, Jobs", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClientsTypes" + } + }, + "additionalProperties": false, + "required": [ + "clients" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.WorkspaceStorageInfo": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination": { + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "destination" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Condition": { + "type": "string", + "enum": [ + "ANY_UPDATED", + "ALL_UPDATED" + ] + }, + "jobs.ConditionTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "left": { + "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference.", + "$ref": "#/$defs/string" + }, + "op": { + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.ConditionTaskOp" + }, + "right": { + "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "left", + "op", + "right" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.ConditionTaskOp": { + "type": "string", + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", + "enum": [ + "EQUAL_TO", + "GREATER_THAN", + "GREATER_THAN_OR_EQUAL", + "LESS_THAN", + "LESS_THAN_OR_EQUAL", + "NOT_EQUAL" + ] + }, + "jobs.Continuous": { + "anyOf": [ + { + "type": "object", + "properties": { + "pause_status": { + "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PauseStatus" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.CronSchedule": { + "anyOf": [ + { + "type": "object", + "properties": { + "pause_status": { + "description": "Indicate whether this schedule is paused or not.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PauseStatus" + }, + "quartz_cron_expression": { + "description": "A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required.", + "$ref": "#/$defs/string" + }, + "timezone_id": { + "description": "A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "quartz_cron_expression", + "timezone_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.DbtTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "catalog": { + "description": "Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks \u003e= 1.1.1.", + "$ref": "#/$defs/string" + }, + "commands": { + "description": "A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided.", + "$ref": "#/$defs/slice/string" + }, + "profiles_directory": { + "description": "Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used.", + "$ref": "#/$defs/string" + }, + "project_directory": { + "description": "Path to the project directory. Optional for Git sourced tasks, in which\ncase if no value is provided, the root of the Git repository is used.", + "$ref": "#/$defs/string" + }, + "schema": { + "description": "Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used.", + "$ref": "#/$defs/string" + }, + "source": { + "description": "Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Project is located in Databricks workspace.\n* `GIT`: Project is located in cloud Git provider.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Source" + }, + "warehouse_id": { + "description": "ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "commands" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.FileArrivalTriggerConfiguration": { + "anyOf": [ + { + "type": "object", + "properties": { + "min_time_between_triggers_seconds": { + "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds", + "$ref": "#/$defs/int" + }, + "url": { + "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location.", + "$ref": "#/$defs/string" + }, + "wait_after_last_change_seconds": { + "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.", + "$ref": "#/$defs/int" + } + }, + "additionalProperties": false, + "required": [ + "url" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.ForEachTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "concurrency": { + "description": "An optional maximum allowed number of concurrent runs of the task.\nSet this value if you want to be able to execute multiple runs of the task concurrently.", + "$ref": "#/$defs/int" + }, + "inputs": { + "description": "Array for task to iterate on. This can be a JSON string or a reference to\nan array parameter.", + "$ref": "#/$defs/string" + }, + "task": { + "description": "Configuration for the task that will be run for each element in the array", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Task" + } + }, + "additionalProperties": false, + "required": [ + "inputs", + "task" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Format": { + "type": "string", + "enum": [ + "SINGLE_TASK", + "MULTI_TASK" + ] + }, + "jobs.GitProvider": { + "type": "string", + "enum": [ + "gitHub", + "bitbucketCloud", + "azureDevOpsServices", + "gitHubEnterprise", + "bitbucketServer", + "gitLab", + "gitLabEnterpriseEdition", + "awsCodeCommit" + ] + }, + "jobs.GitSnapshot": { + "anyOf": [ + { + "type": "object", + "description": "Read-only state of the remote repository at the time the job was run. This field is only included on job runs.", + "properties": { + "used_commit": { + "description": "Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.GitSource": { + "anyOf": [ + { + "type": "object", + "description": "An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.\n\nIf `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.\n\nNote: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.", + "properties": { + "git_branch": { + "description": "Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit.", + "$ref": "#/$defs/string" + }, + "git_commit": { + "description": "Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag.", + "$ref": "#/$defs/string" + }, + "git_provider": { + "description": "Unique identifier of the service used to host the Git repository. The value is case insensitive.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.GitProvider" + }, + "git_snapshot": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.GitSnapshot" + }, + "git_tag": { + "description": "Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit.", + "$ref": "#/$defs/string" + }, + "git_url": { + "description": "URL of the repository to be cloned by this job.", + "$ref": "#/$defs/string" + }, + "job_source": { + "description": "The source of the job specification in the remote repository when the job is source controlled.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobSource" + } + }, + "additionalProperties": false, + "required": [ + "git_provider", + "git_url" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobCluster": { + "anyOf": [ + { + "type": "object", + "properties": { + "job_cluster_key": { + "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution.", + "$ref": "#/$defs/string" + }, + "new_cluster": { + "description": "If new_cluster, a description of a cluster that is created for each task.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec" + } + }, + "additionalProperties": false, + "required": [ + "job_cluster_key", + "new_cluster" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobDeployment": { + "anyOf": [ + { + "type": "object", + "properties": { + "kind": { + "description": "The kind of deployment that manages the job.\n\n* `BUNDLE`: The job is managed by Databricks Asset Bundle.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobDeploymentKind" + }, + "metadata_file_path": { + "description": "Path of the file that contains deployment metadata.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "kind" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobDeploymentKind": { + "type": "string", + "description": "* `BUNDLE`: The job is managed by Databricks Asset Bundle.", + "enum": [ + "BUNDLE" + ] + }, + "jobs.JobEditMode": { + "type": "string", + "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", + "enum": [ + "UI_LOCKED", + "EDITABLE" + ] + }, + "jobs.JobEmailNotifications": { + "anyOf": [ + { + "type": "object", + "properties": { + "no_alert_for_skipped_runs": { + "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped.", + "$ref": "#/$defs/bool" + }, + "on_duration_warning_threshold_exceeded": { + "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_failure": { + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_start": { + "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_streaming_backlog_exceeded": { + "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", + "$ref": "#/$defs/slice/string" + }, + "on_success": { + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobEnvironment": { + "anyOf": [ + { + "type": "object", + "properties": { + "environment_key": { + "description": "The key of an environment. It has to be unique within a job.", + "$ref": "#/$defs/string" + }, + "spec": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.Environment" + } + }, + "additionalProperties": false, + "required": [ + "environment_key" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobNotificationSettings": { + "anyOf": [ + { + "type": "object", + "properties": { + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.", + "$ref": "#/$defs/bool" + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobParameterDefinition": { + "anyOf": [ + { + "type": "object", + "properties": { + "default": { + "description": "Default value of the parameter.", + "$ref": "#/$defs/string" + }, + "name": { + "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "default", + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobRunAs": { + "anyOf": [ + { + "type": "object", + "description": "Write-only setting, available only in Create/Update/Reset and Submit calls. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job.\n\nOnly `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown.", + "properties": { + "service_principal_name": { + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.", + "$ref": "#/$defs/string" + }, + "user_name": { + "description": "The email of an active workspace user. Non-admin users can only set this field to their own email.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobSource": { + "anyOf": [ + { + "type": "object", + "description": "The source of the job specification in the remote repository when the job is source controlled.", + "properties": { + "dirty_state": { + "description": "Dirty state indicates the job is not fully synced with the job specification in the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobSourceDirtyState" + }, + "import_from_git_branch": { + "description": "Name of the branch which the job is imported from.", + "$ref": "#/$defs/string" + }, + "job_config_path": { + "description": "Path of the job YAML file that contains the job specification.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "import_from_git_branch", + "job_config_path" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobSourceDirtyState": { + "type": "string", + "description": "Dirty state indicates the job is not fully synced with the job specification\nin the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", + "enum": [ + "NOT_SYNCED", + "DISCONNECTED" + ] + }, + "jobs.JobsHealthMetric": { + "type": "string", + "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Private Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Private Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Private Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Private Preview.", + "enum": [ + "RUN_DURATION_SECONDS", + "STREAMING_BACKLOG_BYTES", + "STREAMING_BACKLOG_RECORDS", + "STREAMING_BACKLOG_SECONDS", + "STREAMING_BACKLOG_FILES" + ] + }, + "jobs.JobsHealthOperator": { + "type": "string", + "description": "Specifies the operator used to compare the health metric value with the specified threshold.", + "enum": [ + "GREATER_THAN" + ] + }, + "jobs.JobsHealthRule": { + "anyOf": [ + { + "type": "object", + "properties": { + "metric": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthMetric" + }, + "op": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthOperator" + }, + "value": { + "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule.", + "$ref": "#/$defs/int64" + } + }, + "additionalProperties": false, + "required": [ + "metric", + "op", + "value" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobsHealthRules": { + "anyOf": [ + { + "type": "object", + "description": "An optional set of health rules that can be defined for this job.", + "properties": { + "rules": { + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRule" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.NotebookTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "base_parameters": { + "description": "Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run\nNow with parameters specified, the two parameters maps are merged. If the same key is specified in\n`base_parameters` and in `run-now`, the value from `run-now` is used.\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nIf the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters,\nthe default value from the notebook is used.\n\nRetrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets).\n\nThe JSON representation of this field cannot exceed 1MB.", + "$ref": "#/$defs/map/string" + }, + "notebook_path": { + "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.", + "$ref": "#/$defs/string" + }, + "source": { + "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n* `WORKSPACE`: Notebook is located in Databricks workspace.\n* `GIT`: Notebook is located in cloud Git provider.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Source" + }, + "warehouse_id": { + "description": "Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses.\n\nNote that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "notebook_path" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.PauseStatus": { + "type": "string", + "enum": [ + "UNPAUSED", + "PAUSED" + ] + }, + "jobs.PeriodicTriggerConfiguration": { + "anyOf": [ + { + "type": "object", + "properties": { + "interval": { + "description": "The interval at which the trigger should run.", + "$ref": "#/$defs/int" + }, + "unit": { + "description": "The unit of time for the interval.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PeriodicTriggerConfigurationTimeUnit" + } + }, + "additionalProperties": false, + "required": [ + "interval", + "unit" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.PeriodicTriggerConfigurationTimeUnit": { + "type": "string", + "enum": [ + "HOURS", + "DAYS", + "WEEKS" + ] + }, + "jobs.PipelineParams": { + "anyOf": [ + { + "type": "object", + "properties": { + "full_refresh": { + "description": "If true, triggers a full refresh on the delta live table.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.PipelineTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "full_refresh": { + "description": "If true, triggers a full refresh on the delta live table.", + "$ref": "#/$defs/bool" + }, + "pipeline_id": { + "description": "The full name of the pipeline task to execute.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "pipeline_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.PythonWheelTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "entry_point": { + "description": "Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()`", + "$ref": "#/$defs/string" + }, + "named_parameters": { + "description": "Command-line parameters passed to Python wheel task in the form of `[\"--name=task\", \"--data=dbfs:/path/to/data.json\"]`. Leave it empty if `parameters` is not null.", + "$ref": "#/$defs/map/string" + }, + "package_name": { + "description": "Name of the package to execute", + "$ref": "#/$defs/string" + }, + "parameters": { + "description": "Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false, + "required": [ + "entry_point", + "package_name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.QueueSettings": { + "anyOf": [ + { + "type": "object", + "properties": { + "enabled": { + "description": "If true, enable queueing for the job. This is a required field.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false, + "required": [ + "enabled" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.RunIf": { + "type": "string", + "description": "An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\nPossible values are:\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", + "enum": [ + "ALL_SUCCESS", + "ALL_DONE", + "NONE_FAILED", + "AT_LEAST_ONE_SUCCESS", + "ALL_FAILED", + "AT_LEAST_ONE_FAILED" + ] + }, + "jobs.RunJobTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "dbt_commands": { + "description": "An array of commands to execute for jobs with the dbt task, for example `\"dbt_commands\": [\"dbt deps\", \"dbt seed\", \"dbt deps\", \"dbt seed\", \"dbt run\"]`", + "$ref": "#/$defs/slice/string" + }, + "jar_params": { + "description": "A list of parameters for jobs with Spark JAR tasks, for example `\"jar_params\": [\"john doe\", \"35\"]`.\nThe parameters are used to invoke the main function of the main class specified in the Spark JAR task.\nIf not specified upon `run-now`, it defaults to an empty list.\njar_params cannot be specified in conjunction with notebook_params.\nThe JSON representation of this field (for example `{\"jar_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes.\n\nUse [Task parameter variables](/jobs.html\\\"#parameter-variables\\\") to set parameters containing information about job runs.", + "$ref": "#/$defs/slice/string" + }, + "job_id": { + "description": "ID of the job to trigger.", + "$ref": "#/$defs/int64" + }, + "job_parameters": { + "description": "Job-level parameters used to trigger the job.", + "$ref": "#/$defs/map/string" + }, + "notebook_params": { + "description": "A map from keys to values for jobs with notebook task, for example `\"notebook_params\": {\"name\": \"john doe\", \"age\": \"35\"}`.\nThe map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function.\n\nIf not specified upon `run-now`, the triggered run uses the job’s base parameters.\n\nnotebook_params cannot be specified in conjunction with jar_params.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nThe JSON representation of this field (for example `{\"notebook_params\":{\"name\":\"john doe\",\"age\":\"35\"}}`) cannot exceed 10,000 bytes.", + "$ref": "#/$defs/map/string" + }, + "pipeline_params": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PipelineParams" + }, + "python_named_params": { + "$ref": "#/$defs/map/string" + }, + "python_params": { + "description": "A list of parameters for jobs with Python tasks, for example `\"python_params\": [\"john doe\", \"35\"]`.\nThe parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite\nthe parameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", + "$ref": "#/$defs/slice/string" + }, + "spark_submit_params": { + "description": "A list of parameters for jobs with spark submit task, for example `\"spark_submit_params\": [\"--class\", \"org.apache.spark.examples.SparkPi\"]`.\nThe parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the\nparameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", + "$ref": "#/$defs/slice/string" + }, + "sql_params": { + "description": "A map from keys to values for jobs with SQL task, for example `\"sql_params\": {\"name\": \"john doe\", \"age\": \"35\"}`. The SQL alert task does not support custom parameters.", + "$ref": "#/$defs/map/string" + } + }, + "additionalProperties": false, + "required": [ + "job_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Source": { + "type": "string", + "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\\\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", + "enum": [ + "WORKSPACE", + "GIT" + ] + }, + "jobs.SparkJarTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "jar_uri": { + "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.", + "$ref": "#/$defs/string" + }, + "main_class_name": { + "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail.", + "$ref": "#/$defs/string" + }, + "parameters": { + "description": "Parameters passed to the main method.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SparkPythonTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "parameters": { + "description": "Command line parameters passed to the Python file.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", + "$ref": "#/$defs/slice/string" + }, + "python_file": { + "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required.", + "$ref": "#/$defs/string" + }, + "source": { + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local\nDatabricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Source" + } + }, + "additionalProperties": false, + "required": [ + "python_file" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SparkSubmitTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "parameters": { + "description": "Command-line parameters passed to spark submit.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTask": { + "anyOf": [ + { + "type": "object", + "properties": { + "alert": { + "description": "If alert, indicates that this job must refresh a SQL alert.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskAlert" + }, + "dashboard": { + "description": "If dashboard, indicates that this job must refresh a SQL dashboard.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskDashboard" + }, + "file": { + "description": "If file, indicates that this job runs a SQL file in a remote Git repository.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskFile" + }, + "parameters": { + "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", + "$ref": "#/$defs/map/string" + }, + "query": { + "description": "If query, indicates that this job must execute a SQL query.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskQuery" + }, + "warehouse_id": { + "description": "The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "warehouse_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskAlert": { + "anyOf": [ + { + "type": "object", + "properties": { + "alert_id": { + "description": "The canonical identifier of the SQL alert.", + "$ref": "#/$defs/string" + }, + "pause_subscriptions": { + "description": "If true, the alert notifications are not sent to subscribers.", + "$ref": "#/$defs/bool" + }, + "subscriptions": { + "description": "If specified, alert notifications are sent to subscribers.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskSubscription" + } + }, + "additionalProperties": false, + "required": [ + "alert_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskDashboard": { + "anyOf": [ + { + "type": "object", + "properties": { + "custom_subject": { + "description": "Subject of the email sent to subscribers of this task.", + "$ref": "#/$defs/string" + }, + "dashboard_id": { + "description": "The canonical identifier of the SQL dashboard.", + "$ref": "#/$defs/string" + }, + "pause_subscriptions": { + "description": "If true, the dashboard snapshot is not taken, and emails are not sent to subscribers.", + "$ref": "#/$defs/bool" + }, + "subscriptions": { + "description": "If specified, dashboard snapshots are sent to subscriptions.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskSubscription" + } + }, + "additionalProperties": false, + "required": [ + "dashboard_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskFile": { + "anyOf": [ + { + "type": "object", + "properties": { + "path": { + "description": "Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths.", + "$ref": "#/$defs/string" + }, + "source": { + "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Source" + } + }, + "additionalProperties": false, + "required": [ + "path" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskQuery": { + "anyOf": [ + { + "type": "object", + "properties": { + "query_id": { + "description": "The canonical identifier of the SQL query.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "query_id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskSubscription": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination_id": { + "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications.", + "$ref": "#/$defs/string" + }, + "user_name": { + "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TableUpdateTriggerConfiguration": { + "anyOf": [ + { + "type": "object", + "properties": { + "condition": { + "description": "The table(s) condition based on which to trigger a job run.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Condition" + }, + "min_time_between_triggers_seconds": { + "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds.", + "$ref": "#/$defs/int" + }, + "table_names": { + "description": "A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", + "$ref": "#/$defs/slice/string" + }, + "wait_after_last_change_seconds": { + "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds.", + "$ref": "#/$defs/int" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Task": { + "anyOf": [ + { + "type": "object", + "properties": { + "condition_task": { + "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.ConditionTask" + }, + "dbt_task": { + "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.DbtTask" + }, + "depends_on": { + "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.\nThe key is `task_key`, and the value is the name assigned to the dependent task.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.TaskDependency" + }, + "description": { + "description": "An optional description for this task.", + "$ref": "#/$defs/string" + }, + "disable_auto_optimization": { + "description": "An option to disable auto optimization in serverless", + "$ref": "#/$defs/bool" + }, + "email_notifications": { + "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TaskEmailNotifications" + }, + "environment_key": { + "description": "The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute.", + "$ref": "#/$defs/string" + }, + "existing_cluster_id": { + "description": "If existing_cluster_id, the ID of an existing cluster that is used for all runs.\nWhen running jobs or tasks on an existing cluster, you may need to manually restart\nthe cluster if it stops responding. We suggest running jobs and tasks on new clusters for\ngreater reliability", + "$ref": "#/$defs/string" + }, + "for_each_task": { + "description": "If for_each_task, indicates that this task must execute the nested task within it.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.ForEachTask" + }, + "health": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules" + }, + "job_cluster_key": { + "description": "If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`.", + "$ref": "#/$defs/string" + }, + "libraries": { + "description": "An optional list of libraries to be installed on the cluster.\nThe default value is an empty list.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/compute.Library" + }, + "max_retries": { + "description": "An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry.", + "$ref": "#/$defs/int" + }, + "min_retry_interval_millis": { + "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried.", + "$ref": "#/$defs/int" + }, + "new_cluster": { + "description": "If new_cluster, a description of a new cluster that is created for each run.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec" + }, + "notebook_task": { + "description": "If notebook_task, indicates that this task must run a notebook. This field may not be specified in conjunction with spark_jar_task.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.NotebookTask" + }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TaskNotificationSettings" + }, + "pipeline_task": { + "description": "If pipeline_task, indicates that this task must execute a Pipeline.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PipelineTask" + }, + "python_wheel_task": { + "description": "If python_wheel_task, indicates that this job must execute a PythonWheel.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PythonWheelTask" + }, + "retry_on_timeout": { + "description": "An optional policy to specify whether to retry a job when it times out. The default behavior\nis to not retry on timeout.", + "$ref": "#/$defs/bool" + }, + "run_if": { + "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.RunIf" + }, + "run_job_task": { + "description": "If run_job_task, indicates that this task must execute another job.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.RunJobTask" + }, + "spark_jar_task": { + "description": "If spark_jar_task, indicates that this task must run a JAR.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SparkJarTask" + }, + "spark_python_task": { + "description": "If spark_python_task, indicates that this task must run a Python file.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SparkPythonTask" + }, + "spark_submit_task": { + "description": "If `spark_submit_task`, indicates that this task must be launched by the spark submit script. This task can run only on new clusters.\n\nIn the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations.\n\n`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters.\n\nBy default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage.\n\nThe `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SparkSubmitTask" + }, + "sql_task": { + "description": "If sql_task, indicates that this job must execute a SQL task.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTask" + }, + "task_key": { + "description": "A unique name for the task. This field is used to refer to this task from other tasks.\nThis field is required and must be unique within its parent job.\nOn Update or Reset, this field is used to reference the tasks to be updated or reset.", + "$ref": "#/$defs/string" + }, + "timeout_seconds": { + "description": "An optional timeout applied to each run of this job task. A value of `0` means no timeout.", + "$ref": "#/$defs/int" + }, + "webhook_notifications": { + "description": "A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.WebhookNotifications" + } + }, + "additionalProperties": false, + "required": [ + "task_key" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TaskDependency": { + "anyOf": [ + { + "type": "object", + "properties": { + "outcome": { + "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run.", + "$ref": "#/$defs/string" + }, + "task_key": { + "description": "The name of the task this task depends on.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "task_key" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TaskEmailNotifications": { + "anyOf": [ + { + "type": "object", + "properties": { + "no_alert_for_skipped_runs": { + "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped.", + "$ref": "#/$defs/bool" + }, + "on_duration_warning_threshold_exceeded": { + "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_failure": { + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_start": { + "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + }, + "on_streaming_backlog_exceeded": { + "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", + "$ref": "#/$defs/slice/string" + }, + "on_success": { + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TaskNotificationSettings": { + "anyOf": [ + { + "type": "object", + "properties": { + "alert_on_last_attempt": { + "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run.", + "$ref": "#/$defs/bool" + }, + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.", + "$ref": "#/$defs/bool" + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.", + "$ref": "#/$defs/bool" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TriggerSettings": { + "anyOf": [ + { + "type": "object", + "properties": { + "file_arrival": { + "description": "File arrival trigger settings.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.FileArrivalTriggerConfiguration" + }, + "pause_status": { + "description": "Whether this trigger is paused or not.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PauseStatus" + }, + "periodic": { + "description": "Periodic trigger settings.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PeriodicTriggerConfiguration" + }, + "table": { + "description": "Old table trigger settings name. Deprecated in favor of `table_update`.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TableUpdateTriggerConfiguration" + }, + "table_update": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TableUpdateTriggerConfiguration" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Webhook": { + "anyOf": [ + { + "type": "object", + "properties": { + "id": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "id" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.WebhookNotifications": { + "anyOf": [ + { + "type": "object", + "properties": { + "on_duration_warning_threshold_exceeded": { + "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + }, + "on_failure": { + "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + }, + "on_start": { + "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + }, + "on_streaming_backlog_exceeded": { + "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + }, + "on_success": { + "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ExperimentTag": { + "anyOf": [ + { + "type": "object", + "properties": { + "key": { + "description": "The tag key.", + "$ref": "#/$defs/string" + }, + "value": { + "description": "The tag value.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelTag": { + "anyOf": [ + { + "type": "object", + "properties": { + "key": { + "description": "The tag key.", + "$ref": "#/$defs/string" + }, + "value": { + "description": "The tag value.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelVersion": { + "anyOf": [ + { + "type": "object", + "properties": { + "creation_timestamp": { + "description": "Timestamp recorded when this `model_version` was created.", + "$ref": "#/$defs/int64" + }, + "current_stage": { + "description": "Current stage for this `model_version`.", + "$ref": "#/$defs/string" + }, + "description": { + "description": "Description of this `model_version`.", + "$ref": "#/$defs/string" + }, + "last_updated_timestamp": { + "description": "Timestamp recorded when metadata for this `model_version` was last updated.", + "$ref": "#/$defs/int64" + }, + "name": { + "description": "Unique name of the model", + "$ref": "#/$defs/string" + }, + "run_id": { + "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server.", + "$ref": "#/$defs/string" + }, + "run_link": { + "description": "Run Link: Direct link to the run that generated this version", + "$ref": "#/$defs/string" + }, + "source": { + "description": "URI indicating the location of the source model artifacts, used when creating `model_version`", + "$ref": "#/$defs/string" + }, + "status": { + "description": "Current status of `model_version`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/ml.ModelVersionStatus", + "enum": [ + "PENDING_REGISTRATION", + "FAILED_REGISTRATION", + "READY" + ] + }, + "status_message": { + "description": "Details on current `status`, if it is pending or failed.", + "$ref": "#/$defs/string" + }, + "tags": { + "description": "Tags: Additional metadata key-value pairs for this `model_version`.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ModelVersionTag" + }, + "user_id": { + "description": "User that created this `model_version`.", + "$ref": "#/$defs/string" + }, + "version": { + "description": "Model's version number.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelVersionStatus": { + "type": "string" + }, + "ml.ModelVersionTag": { + "anyOf": [ + { + "type": "object", + "properties": { + "key": { + "description": "The tag key.", + "$ref": "#/$defs/string" + }, + "value": { + "description": "The tag value.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.CronTrigger": { + "anyOf": [ + { + "type": "object", + "properties": { + "quartz_cron_schedule": { + "$ref": "#/$defs/string" + }, + "timezone_id": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.DeploymentKind": { + "type": "string", + "description": "The deployment method that manages the pipeline:\n- BUNDLE: The pipeline is managed by a Databricks Asset Bundle.\n", + "enum": [ + "BUNDLE" + ] + }, + "pipelines.FileLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "path": { + "description": "The absolute path of the file.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.Filters": { + "anyOf": [ + { + "type": "object", + "properties": { + "exclude": { + "description": "Paths to exclude.", + "$ref": "#/$defs/slice/string" + }, + "include": { + "description": "Paths to include.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.IngestionConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "schema": { + "description": "Select tables from a specific source schema.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.SchemaSpec" + }, + "table": { + "description": "Select tables from a specific source table.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpec" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.IngestionGatewayPipelineDefinition": { + "anyOf": [ + { + "type": "object", + "properties": { + "connection_id": { + "description": "Immutable. The Unity Catalog connection this gateway pipeline uses to communicate with the source.", + "$ref": "#/$defs/string" + }, + "gateway_storage_catalog": { + "description": "Required, Immutable. The name of the catalog for the gateway pipeline's storage location.", + "$ref": "#/$defs/string" + }, + "gateway_storage_name": { + "description": "Optional. The Unity Catalog-compatible name for the gateway storage location.\nThis is the destination to use for the data that is extracted by the gateway.\nDelta Live Tables system will automatically create the storage location under the catalog and schema.\n", + "$ref": "#/$defs/string" + }, + "gateway_storage_schema": { + "description": "Required, Immutable. The name of the schema for the gateway pipelines's storage location.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.IngestionPipelineDefinition": { + "anyOf": [ + { + "type": "object", + "properties": { + "connection_name": { + "description": "Immutable. The Unity Catalog connection this ingestion pipeline uses to communicate with the source. Specify either ingestion_gateway_id or connection_name.", + "$ref": "#/$defs/string" + }, + "ingestion_gateway_id": { + "description": "Immutable. Identifier for the ingestion gateway used by this ingestion pipeline to communicate with the source. Specify either ingestion_gateway_id or connection_name.", + "$ref": "#/$defs/string" + }, + "objects": { + "description": "Required. Settings specifying tables to replicate and the destination for the replicated tables.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.IngestionConfig" + }, + "table_configuration": { + "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.ManualTrigger": { + "anyOf": [ + { + "type": "object", + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.NotebookLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "path": { + "description": "The absolute path of the notebook.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.Notifications": { + "anyOf": [ + { + "type": "object", + "properties": { + "alerts": { + "description": "A list of alerts that trigger the sending of notifications to the configured\ndestinations. The supported alerts are:\n\n* `on-update-success`: A pipeline update completes successfully.\n* `on-update-failure`: Each time a pipeline update fails.\n* `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error.\n* `on-flow-failure`: A single data flow fails.\n", + "$ref": "#/$defs/slice/string" + }, + "email_recipients": { + "description": "A list of email addresses notified when a configured alert is triggered.\n", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineCluster": { + "anyOf": [ + { + "type": "object", + "properties": { + "apply_policy_default_values": { + "description": "Note: This field won't be persisted. Only API users will check this field.", + "$ref": "#/$defs/bool" + }, + "autoscale": { + "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineClusterAutoscale" + }, + "aws_attributes": { + "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes" + }, + "azure_attributes": { + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes" + }, + "cluster_log_conf": { + "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClusterLogConf" + }, + "custom_tags": { + "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", + "$ref": "#/$defs/map/string" + }, + "driver_instance_pool_id": { + "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned.", + "$ref": "#/$defs/string" + }, + "driver_node_type_id": { + "description": "The node type of the Spark driver.\nNote that this field is optional; if unset, the driver node type will be set as the same value\nas `node_type_id` defined above.", + "$ref": "#/$defs/string" + }, + "enable_local_disk_encryption": { + "description": "Whether to enable local disk encryption for the cluster.", + "$ref": "#/$defs/bool" + }, + "gcp_attributes": { + "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.GcpAttributes" + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/compute.InitScriptInfo" + }, + "instance_pool_id": { + "description": "The optional ID of the instance pool to which the cluster belongs.", + "$ref": "#/$defs/string" + }, + "label": { + "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`.", + "$ref": "#/$defs/string" + }, + "node_type_id": { + "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n", + "$ref": "#/$defs/string" + }, + "num_workers": { + "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned.", + "$ref": "#/$defs/int" + }, + "policy_id": { + "description": "The ID of the cluster policy used to create the cluster if applicable.", + "$ref": "#/$defs/string" + }, + "spark_conf": { + "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nSee :method:clusters/create for more details.\n", + "$ref": "#/$defs/map/string" + }, + "spark_env_vars": { + "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", + "$ref": "#/$defs/map/string" + }, + "ssh_public_keys": { + "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", + "$ref": "#/$defs/slice/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineClusterAutoscale": { + "anyOf": [ + { + "type": "object", + "properties": { + "max_workers": { + "description": "The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`.", + "$ref": "#/$defs/int" + }, + "min_workers": { + "description": "The minimum number of workers the cluster can scale down to when underutilized.\nIt is also the initial number of workers the cluster will have after creation.", + "$ref": "#/$defs/int" + }, + "mode": { + "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineClusterAutoscaleMode", + "enum": [ + "ENHANCED", + "LEGACY" + ] + } + }, + "additionalProperties": false, + "required": [ + "max_workers", + "min_workers" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineClusterAutoscaleMode": { + "type": "string" + }, + "pipelines.PipelineDeployment": { + "anyOf": [ + { + "type": "object", + "properties": { + "kind": { + "description": "The deployment method that manages the pipeline.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.DeploymentKind" + }, + "metadata_file_path": { + "description": "The path to the file containing metadata about the deployment.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineLibrary": { + "anyOf": [ + { + "type": "object", + "properties": { + "file": { + "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.FileLibrary" + }, + "jar": { + "description": "URI of the jar to be installed. Currently only DBFS is supported.\n", + "$ref": "#/$defs/string" + }, + "maven": { + "description": "Specification of a maven library to be installed.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.MavenLibrary" + }, + "notebook": { + "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.NotebookLibrary" + }, + "whl": { + "description": "URI of the whl to be installed.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineTrigger": { + "anyOf": [ + { + "type": "object", + "properties": { + "cron": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.CronTrigger" + }, + "manual": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.ManualTrigger" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.SchemaSpec": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination_catalog": { + "description": "Required. Destination catalog to store tables.", + "$ref": "#/$defs/string" + }, + "destination_schema": { + "description": "Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists.", + "$ref": "#/$defs/string" + }, + "source_catalog": { + "description": "The source catalog name. Might be optional depending on the type of source.", + "$ref": "#/$defs/string" + }, + "source_schema": { + "description": "Required. Schema name in the source database.", + "$ref": "#/$defs/string" + }, + "table_configuration": { + "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.TableSpec": { + "anyOf": [ + { + "type": "object", + "properties": { + "destination_catalog": { + "description": "Required. Destination catalog to store table.", + "$ref": "#/$defs/string" + }, + "destination_schema": { + "description": "Required. Destination schema to store table.", + "$ref": "#/$defs/string" + }, + "destination_table": { + "description": "Optional. Destination table name. The pipeline fails If a table with that name already exists. If not set, the source table name is used.", + "$ref": "#/$defs/string" + }, + "source_catalog": { + "description": "Source catalog name. Might be optional depending on the type of source.", + "$ref": "#/$defs/string" + }, + "source_schema": { + "description": "Schema name in the source database. Might be optional depending on the type of source.", + "$ref": "#/$defs/string" + }, + "source_table": { + "description": "Required. Table name in the source database.", + "$ref": "#/$defs/string" + }, + "table_configuration": { + "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.TableSpecificConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "primary_keys": { + "description": "The primary key of the table used to apply changes.", + "$ref": "#/$defs/slice/string" + }, + "salesforce_include_formula_fields": { + "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector", + "$ref": "#/$defs/bool" + }, + "scd_type": { + "description": "The SCD type to use to ingest the table.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfigScdType", + "enum": [ + "SCD_TYPE_1", + "SCD_TYPE_2" + ] + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.TableSpecificConfigScdType": { + "type": "string" + }, + "serving.Ai21LabsConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "ai21labs_api_key": { + "$ref": "#/$defs/string" + }, + "ai21labs_api_key_plaintext": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.AmazonBedrockConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "aws_access_key_id": { + "description": "The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`.", + "$ref": "#/$defs/string" + }, + "aws_access_key_id_plaintext": { + "description": "An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`.", + "$ref": "#/$defs/string" + }, + "aws_region": { + "description": "The AWS region to use. Bedrock has to be enabled there.", + "$ref": "#/$defs/string" + }, + "aws_secret_access_key": { + "description": "The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`.", + "$ref": "#/$defs/string" + }, + "aws_secret_access_key_plaintext": { + "description": "An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`.", + "$ref": "#/$defs/string" + }, + "bedrock_provider": { + "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfigBedrockProvider", + "enum": [ + "anthropic", + "cohere", + "ai21labs", + "amazon" + ] + } + }, + "additionalProperties": false, + "required": [ + "aws_region", + "bedrock_provider" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.AmazonBedrockConfigBedrockProvider": { + "type": "string" + }, + "serving.AnthropicConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "anthropic_api_key": { + "description": "The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`.", + "$ref": "#/$defs/string" + }, + "anthropic_api_key_plaintext": { + "description": "The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.AutoCaptureConfigInput": { + "anyOf": [ + { + "type": "object", + "properties": { + "catalog_name": { + "description": "The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled.", + "$ref": "#/$defs/string" + }, + "enabled": { + "description": "Indicates whether the inference table is enabled.", + "$ref": "#/$defs/bool" + }, + "schema_name": { + "description": "The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled.", + "$ref": "#/$defs/string" + }, + "table_name_prefix": { + "description": "The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.CohereConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "cohere_api_base": { + "description": "This is an optional field to provide a customized base URL for the Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n", + "$ref": "#/$defs/string" + }, + "cohere_api_key": { + "description": "The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`.", + "$ref": "#/$defs/string" + }, + "cohere_api_key_plaintext": { + "description": "The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.DatabricksModelServingConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "databricks_api_token": { + "description": "The Databricks secret key reference for a Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model.\nIf you prefer to paste your API key directly, see `databricks_api_token_plaintext`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n", + "$ref": "#/$defs/string" + }, + "databricks_api_token_plaintext": { + "description": "The Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `databricks_api_token`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n", + "$ref": "#/$defs/string" + }, + "databricks_workspace_url": { + "description": "The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "databricks_workspace_url" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.EndpointCoreConfigInput": { + "anyOf": [ + { + "type": "object", + "properties": { + "auto_capture_config": { + "description": "Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AutoCaptureConfigInput" + }, + "served_entities": { + "description": "A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput" + }, + "served_models": { + "description": "(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput" + }, + "traffic_config": { + "description": "The traffic config defining how invocations to the serving endpoint should be routed.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.TrafficConfig" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.EndpointTag": { + "anyOf": [ + { + "type": "object", + "properties": { + "key": { + "description": "Key field for a serving endpoint tag.", + "$ref": "#/$defs/string" + }, + "value": { + "description": "Optional value field for a serving endpoint tag.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "key" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ExternalModel": { + "anyOf": [ + { + "type": "object", + "properties": { + "ai21labs_config": { + "description": "AI21Labs Config. Only required if the provider is 'ai21labs'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig" + }, + "amazon_bedrock_config": { + "description": "Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfig" + }, + "anthropic_config": { + "description": "Anthropic Config. Only required if the provider is 'anthropic'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AnthropicConfig" + }, + "cohere_config": { + "description": "Cohere Config. Only required if the provider is 'cohere'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.CohereConfig" + }, + "databricks_model_serving_config": { + "description": "Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.DatabricksModelServingConfig" + }, + "google_cloud_vertex_ai_config": { + "description": "Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig" + }, + "name": { + "description": "The name of the external model.", + "$ref": "#/$defs/string" + }, + "openai_config": { + "description": "OpenAI Config. Only required if the provider is 'openai'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.OpenAiConfig" + }, + "palm_config": { + "description": "PaLM Config. Only required if the provider is 'palm'.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.PaLmConfig" + }, + "provider": { + "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ExternalModelProvider", + "enum": [ + "ai21labs", + "anthropic", + "amazon-bedrock", + "cohere", + "databricks-model-serving", + "google-cloud-vertex-ai", + "openai", + "palm" + ] + }, + "task": { + "description": "The task type of the external model.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "name", + "provider", + "task" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ExternalModelProvider": { + "type": "string" + }, + "serving.GoogleCloudVertexAiConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "private_key": { + "$ref": "#/$defs/string" + }, + "private_key_plaintext": { + "$ref": "#/$defs/string" + }, + "project_id": { + "$ref": "#/$defs/string" + }, + "region": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.OpenAiConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "microsoft_entra_client_id": { + "$ref": "#/$defs/string" + }, + "microsoft_entra_client_secret": { + "$ref": "#/$defs/string" + }, + "microsoft_entra_client_secret_plaintext": { + "$ref": "#/$defs/string" + }, + "microsoft_entra_tenant_id": { + "$ref": "#/$defs/string" + }, + "openai_api_base": { + "$ref": "#/$defs/string" + }, + "openai_api_key": { + "$ref": "#/$defs/string" + }, + "openai_api_key_plaintext": { + "$ref": "#/$defs/string" + }, + "openai_api_type": { + "$ref": "#/$defs/string" + }, + "openai_api_version": { + "$ref": "#/$defs/string" + }, + "openai_deployment_name": { + "$ref": "#/$defs/string" + }, + "openai_organization": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.PaLmConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "palm_api_key": { + "$ref": "#/$defs/string" + }, + "palm_api_key_plaintext": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.RateLimit": { + "anyOf": [ + { + "type": "object", + "properties": { + "calls": { + "description": "Used to specify how many calls are allowed for a key within the renewal_period.", + "$ref": "#/$defs/int" + }, + "key": { + "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.RateLimitKey", + "enum": [ + "user", + "endpoint" + ] + }, + "renewal_period": { + "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.RateLimitRenewalPeriod", + "enum": [ + "minute" + ] + } + }, + "additionalProperties": false, + "required": [ + "calls", + "renewal_period" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.RateLimitKey": { + "type": "string" + }, + "serving.RateLimitRenewalPeriod": { + "type": "string" + }, + "serving.Route": { + "anyOf": [ + { + "type": "object", + "properties": { + "served_model_name": { + "description": "The name of the served model this route configures traffic for.", + "$ref": "#/$defs/string" + }, + "traffic_percentage": { + "description": "The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.", + "$ref": "#/$defs/int" + } + }, + "additionalProperties": false, + "required": [ + "served_model_name", + "traffic_percentage" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServedEntityInput": { + "anyOf": [ + { + "type": "object", + "properties": { + "entity_name": { + "description": "The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC),\nor a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of\n__catalog_name__.__schema_name__.__model_name__.\n", + "$ref": "#/$defs/string" + }, + "entity_version": { + "description": "The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.", + "$ref": "#/$defs/string" + }, + "environment_vars": { + "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity.\nNote: this is an experimental feature and subject to change. \nExample entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", + "$ref": "#/$defs/map/string" + }, + "external_model": { + "description": "The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled)\ncan be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model,\nit cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.\nThe task type of all external models within an endpoint must be the same.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ExternalModel" + }, + "instance_profile_arn": { + "description": "ARN of the instance profile that the served entity uses to access AWS resources.", + "$ref": "#/$defs/string" + }, + "max_provisioned_throughput": { + "description": "The maximum tokens per second that the endpoint can scale up to.", + "$ref": "#/$defs/int" + }, + "min_provisioned_throughput": { + "description": "The minimum tokens per second that the endpoint can scale down to.", + "$ref": "#/$defs/int" + }, + "name": { + "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores.\nIf not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other\nentities, it defaults to \u003centity-name\u003e-\u003centity-version\u003e.\n", + "$ref": "#/$defs/string" + }, + "scale_to_zero_enabled": { + "description": "Whether the compute resources for the served entity should scale down to zero.", + "$ref": "#/$defs/bool" + }, + "workload_size": { + "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.\n", + "$ref": "#/$defs/string" + }, + "workload_type": { + "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServedModelInput": { + "anyOf": [ + { + "type": "object", + "properties": { + "environment_vars": { + "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this model.\nNote: this is an experimental feature and subject to change. \nExample model environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", + "$ref": "#/$defs/map/string" + }, + "instance_profile_arn": { + "description": "ARN of the instance profile that the served model will use to access AWS resources.", + "$ref": "#/$defs/string" + }, + "max_provisioned_throughput": { + "description": "The maximum tokens per second that the endpoint can scale up to.", + "$ref": "#/$defs/int" + }, + "min_provisioned_throughput": { + "description": "The minimum tokens per second that the endpoint can scale down to.", + "$ref": "#/$defs/int" + }, + "model_name": { + "description": "The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model,\nin the form of __catalog_name__.__schema_name__.__model_name__.\n", + "$ref": "#/$defs/string" + }, + "model_version": { + "description": "The version of the model in Databricks Model Registry or Unity Catalog to be served.", + "$ref": "#/$defs/string" + }, + "name": { + "description": "The name of a served model. It must be unique across an endpoint. If not specified, this field will default to \u003cmodel-name\u003e-\u003cmodel-version\u003e.\nA served model name can consist of alphanumeric characters, dashes, and underscores.\n", + "$ref": "#/$defs/string" + }, + "scale_to_zero_enabled": { + "description": "Whether the compute resources for the served model should scale down to zero.", + "$ref": "#/$defs/bool" + }, + "workload_size": { + "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadSize", + "enum": [ + "Small", + "Medium", + "Large" + ] + }, + "workload_type": { + "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadType", + "enum": [ + "CPU", + "GPU_SMALL", + "GPU_MEDIUM", + "GPU_LARGE", + "MULTIGPU_MEDIUM" + ] + } + }, + "additionalProperties": false, + "required": [ + "model_name", + "model_version", + "scale_to_zero_enabled" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServedModelInputWorkloadSize": { + "type": "string" + }, + "serving.ServedModelInputWorkloadType": { + "type": "string" + }, + "serving.TrafficConfig": { + "anyOf": [ + { + "type": "object", + "properties": { + "routes": { + "description": "The list of routes that define traffic to each served entity.", + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.Route" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + } + } + } + }, + "int": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string", + "pattern": "\\$\\{(resources(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(bundle(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(workspace(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(artifacts(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "int64": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string", + "pattern": "\\$\\{(resources(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(bundle(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(workspace(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(artifacts(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "interface": {}, + "map": { + "github.com": { + "databricks": { + "cli": { + "bundle": { + "config": { + "resources.Job": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Job" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.MlflowExperiment": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.MlflowExperiment" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.MlflowModel": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.MlflowModel" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.ModelServingEndpoint": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Pipeline": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Pipeline" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.QualityMonitor": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.QualityMonitor" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.RegisteredModel": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.RegisteredModel" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Schema": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Schema" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "variable.TargetVariable": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.TargetVariable" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "variable.Variable": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.Variable" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + }, + "config.Artifact": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Artifact" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Command": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Command" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "config.Target": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Target" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + } + } + } + }, + "string": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/string" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + }, + "slice": { + "github.com": { + "databricks": { + "cli": { + "bundle": { + "config": { + "resources.Grant": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Grant" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "resources.Permission": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.Permission" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + }, + "config.ArtifactFile": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.ArtifactFile" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + } + }, + "databricks-sdk-go": { + "service": { + "catalog.MonitorMetric": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetric" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.InitScriptInfo": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.InitScriptInfo" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "compute.Library": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.Library" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobCluster": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobCluster" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobEnvironment": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobEnvironment" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobParameterDefinition": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobParameterDefinition" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.JobsHealthRule": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRule" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.SqlTaskSubscription": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskSubscription" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Task": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Task" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.TaskDependency": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.TaskDependency" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.Webhook": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.Webhook" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ExperimentTag": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/ml.ExperimentTag" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelTag": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/ml.ModelTag" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelVersion": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/ml.ModelVersion" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "ml.ModelVersionTag": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/ml.ModelVersionTag" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.IngestionConfig": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.IngestionConfig" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.Notifications": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.Notifications" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineCluster": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineCluster" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "pipelines.PipelineLibrary": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineLibrary" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.EndpointTag": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.EndpointTag" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.RateLimit": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.RateLimit" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.Route": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.Route" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServedEntityInput": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServedModelInput": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + } + } + } + }, + "string": { + "anyOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/string" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + } + }, + "string": { + "type": "string" + } + }, + "type": "object", + "properties": { + "artifacts": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact" + }, + "bundle": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Bundle" + }, + "experimental": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Experimental" + }, + "include": { + "$ref": "#/$defs/slice/string" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "presets": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Presets" + }, + "resources": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Resources" + }, + "run_as": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs" + }, + "sync": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync" + }, + "targets": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Target" + }, + "variables": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.Variable" + }, + "workspace": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/bundle/schema/openapi.go b/bundle/schema/openapi.go deleted file mode 100644 index 0d896b87c..000000000 --- a/bundle/schema/openapi.go +++ /dev/null @@ -1,293 +0,0 @@ -package schema - -import ( - "encoding/json" - "fmt" - "strings" - - "github.com/databricks/cli/libs/jsonschema" -) - -type OpenapiReader struct { - // OpenAPI spec to read schemas from. - OpenapiSpec *Specification - - // In-memory cache of schemas read from the OpenAPI spec. - memo map[string]jsonschema.Schema -} - -const SchemaPathPrefix = "#/components/schemas/" - -// Read a schema directly from the OpenAPI spec. -func (reader *OpenapiReader) readOpenapiSchema(path string) (jsonschema.Schema, error) { - schemaKey := strings.TrimPrefix(path, SchemaPathPrefix) - - // return early if we already have a computed schema - memoSchema, ok := reader.memo[schemaKey] - if ok { - return memoSchema, nil - } - - // check path is present in openapi spec - openapiSchema, ok := reader.OpenapiSpec.Components.Schemas[schemaKey] - if !ok { - return jsonschema.Schema{}, fmt.Errorf("schema with path %s not found in openapi spec", path) - } - - // convert openapi schema to the native schema struct - bytes, err := json.Marshal(*openapiSchema) - if err != nil { - return jsonschema.Schema{}, err - } - jsonSchema := jsonschema.Schema{} - err = json.Unmarshal(bytes, &jsonSchema) - if err != nil { - return jsonschema.Schema{}, err - } - - // A hack to convert a map[string]interface{} to *Schema - // We rely on the type of a AdditionalProperties in downstream functions - // to do reference interpolation - _, ok = jsonSchema.AdditionalProperties.(map[string]interface{}) - if ok { - b, err := json.Marshal(jsonSchema.AdditionalProperties) - if err != nil { - return jsonschema.Schema{}, err - } - additionalProperties := &jsonschema.Schema{} - err = json.Unmarshal(b, additionalProperties) - if err != nil { - return jsonschema.Schema{}, err - } - jsonSchema.AdditionalProperties = additionalProperties - } - - // store read schema into memo - reader.memo[schemaKey] = jsonSchema - - return jsonSchema, nil -} - -// Resolve all nested "$ref" references in the schema. This function unrolls a single -// level of "$ref" in the schema and calls into traverseSchema to resolve nested references. -// Thus this function and traverseSchema are mutually recursive. -// -// This function is safe against reference loops. If a reference loop is detected, an error -// is returned. -func (reader *OpenapiReader) safeResolveRefs(root *jsonschema.Schema, tracker *tracker) (*jsonschema.Schema, error) { - if root.Reference == nil { - return reader.traverseSchema(root, tracker) - } - key := *root.Reference - - // HACK to unblock CLI release (13th Feb 2024). This is temporary until proper - // support for recursive types is added to the docs generator. PR: https://github.com/databricks/cli/pull/1204 - if strings.Contains(key, "ForEachTask") { - return root, nil - } - - if tracker.hasCycle(key) { - // self reference loops can be supported however the logic is non-trivial because - // cross refernce loops are not allowed (see: http://json-schema.org/understanding-json-schema/structuring.html#recursion) - return nil, fmt.Errorf("references loop detected") - } - ref := *root.Reference - description := root.Description - tracker.push(ref, ref) - - // Mark reference nil, so we do not traverse this again. This is tracked - // in the memo - root.Reference = nil - - // unroll one level of reference. - selfRef, err := reader.readOpenapiSchema(ref) - if err != nil { - return nil, err - } - root = &selfRef - root.Description = description - - // traverse again to find new references - root, err = reader.traverseSchema(root, tracker) - if err != nil { - return nil, err - } - tracker.pop(ref) - return root, err -} - -// Traverse the nested properties of the schema to resolve "$ref" references. This function -// and safeResolveRefs are mutually recursive. -func (reader *OpenapiReader) traverseSchema(root *jsonschema.Schema, tracker *tracker) (*jsonschema.Schema, error) { - // case primitive (or invalid) - if root.Type != jsonschema.ObjectType && root.Type != jsonschema.ArrayType { - return root, nil - } - // only root references are resolved - if root.Reference != nil { - return reader.safeResolveRefs(root, tracker) - } - // case struct - if len(root.Properties) > 0 { - for k, v := range root.Properties { - childSchema, err := reader.safeResolveRefs(v, tracker) - if err != nil { - return nil, err - } - root.Properties[k] = childSchema - } - } - // case array - if root.Items != nil { - itemsSchema, err := reader.safeResolveRefs(root.Items, tracker) - if err != nil { - return nil, err - } - root.Items = itemsSchema - } - // case map - additionalProperties, ok := root.AdditionalProperties.(*jsonschema.Schema) - if ok && additionalProperties != nil { - valueSchema, err := reader.safeResolveRefs(additionalProperties, tracker) - if err != nil { - return nil, err - } - root.AdditionalProperties = valueSchema - } - return root, nil -} - -func (reader *OpenapiReader) readResolvedSchema(path string) (*jsonschema.Schema, error) { - root, err := reader.readOpenapiSchema(path) - if err != nil { - return nil, err - } - tracker := newTracker() - tracker.push(path, path) - resolvedRoot, err := reader.safeResolveRefs(&root, tracker) - if err != nil { - return nil, tracker.errWithTrace(err.Error(), "") - } - return resolvedRoot, nil -} - -func (reader *OpenapiReader) jobsDocs() (*Docs, error) { - jobSettingsSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "jobs.JobSettings") - if err != nil { - return nil, err - } - jobDocs := schemaToDocs(jobSettingsSchema) - // TODO: add description for id if needed. - // Tracked in https://github.com/databricks/cli/issues/242 - jobsDocs := &Docs{ - Description: "List of Databricks jobs", - AdditionalProperties: jobDocs, - } - return jobsDocs, nil -} - -func (reader *OpenapiReader) pipelinesDocs() (*Docs, error) { - pipelineSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "pipelines.PipelineSpec") - if err != nil { - return nil, err - } - pipelineDocs := schemaToDocs(pipelineSpecSchema) - // TODO: Two fields in resources.Pipeline have the json tag id. Clarify the - // semantics and then add a description if needed. (https://github.com/databricks/cli/issues/242) - pipelinesDocs := &Docs{ - Description: "List of DLT pipelines", - AdditionalProperties: pipelineDocs, - } - return pipelinesDocs, nil -} - -func (reader *OpenapiReader) experimentsDocs() (*Docs, error) { - experimentSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Experiment") - if err != nil { - return nil, err - } - experimentDocs := schemaToDocs(experimentSpecSchema) - experimentsDocs := &Docs{ - Description: "List of MLflow experiments", - AdditionalProperties: experimentDocs, - } - return experimentsDocs, nil -} - -func (reader *OpenapiReader) modelsDocs() (*Docs, error) { - modelSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Model") - if err != nil { - return nil, err - } - modelDocs := schemaToDocs(modelSpecSchema) - modelsDocs := &Docs{ - Description: "List of MLflow models", - AdditionalProperties: modelDocs, - } - return modelsDocs, nil -} - -func (reader *OpenapiReader) modelServingEndpointsDocs() (*Docs, error) { - modelServingEndpointsSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "serving.CreateServingEndpoint") - if err != nil { - return nil, err - } - modelServingEndpointsDocs := schemaToDocs(modelServingEndpointsSpecSchema) - modelServingEndpointsAllDocs := &Docs{ - Description: "List of Model Serving Endpoints", - AdditionalProperties: modelServingEndpointsDocs, - } - return modelServingEndpointsAllDocs, nil -} - -func (reader *OpenapiReader) registeredModelDocs() (*Docs, error) { - registeredModelsSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "catalog.CreateRegisteredModelRequest") - if err != nil { - return nil, err - } - registeredModelsDocs := schemaToDocs(registeredModelsSpecSchema) - registeredModelsAllDocs := &Docs{ - Description: "List of Registered Models", - AdditionalProperties: registeredModelsDocs, - } - return registeredModelsAllDocs, nil -} - -func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { - jobsDocs, err := reader.jobsDocs() - if err != nil { - return nil, err - } - pipelinesDocs, err := reader.pipelinesDocs() - if err != nil { - return nil, err - } - experimentsDocs, err := reader.experimentsDocs() - if err != nil { - return nil, err - } - modelsDocs, err := reader.modelsDocs() - if err != nil { - return nil, err - } - modelServingEndpointsDocs, err := reader.modelServingEndpointsDocs() - if err != nil { - return nil, err - } - registeredModelsDocs, err := reader.registeredModelDocs() - if err != nil { - return nil, err - } - - return &Docs{ - Description: "Collection of Databricks resources to deploy.", - Properties: map[string]*Docs{ - "jobs": jobsDocs, - "pipelines": pipelinesDocs, - "experiments": experimentsDocs, - "models": modelsDocs, - "model_serving_endpoints": modelServingEndpointsDocs, - "registered_models": registeredModelsDocs, - }, - }, nil -} diff --git a/bundle/schema/openapi_test.go b/bundle/schema/openapi_test.go deleted file mode 100644 index 4d393cf37..000000000 --- a/bundle/schema/openapi_test.go +++ /dev/null @@ -1,493 +0,0 @@ -package schema - -import ( - "encoding/json" - "testing" - - "github.com/databricks/cli/libs/jsonschema" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestReadSchemaForObject(t *testing.T) { - specString := ` - { - "components": { - "schemas": { - "foo": { - "type": "number" - }, - "fruits": { - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mango": { - "type": "object", - "description": "a mango for my schema", - "$ref": "#/components/schemas/mango" - } - } - }, - "mango": { - "type": "object", - "properties": { - "foo": { - "$ref": "#/components/schemas/foo" - } - } - } - } - } - } - ` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mango": { - "type": "object", - "description": "a mango for my schema", - "properties": { - "foo": { - "type": "number" - } - } - } - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestReadSchemaForArray(t *testing.T) { - specString := ` - { - "components": { - "schemas": { - "fruits": { - "type": "object", - "description": "fruits that are cool", - "items": { - "description": "some papayas, because papayas are fruits too", - "$ref": "#/components/schemas/papaya" - } - }, - "papaya": { - "type": "number" - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "fruits that are cool", - "items": { - "type": "number", - "description": "some papayas, because papayas are fruits too" - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestReadSchemaForMap(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "fruits": { - "type": "object", - "description": "fruits that are meh", - "additionalProperties": { - "description": "watermelons. watermelons.", - "$ref": "#/components/schemas/watermelon" - } - }, - "watermelon": { - "type": "number" - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "fruits that are meh", - "additionalProperties": { - "type": "number", - "description": "watermelons. watermelons." - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestRootReferenceIsResolved(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "foo": { - "type": "object", - "description": "this description is ignored", - "properties": { - "abc": { - "type": "string" - } - } - }, - "fruits": { - "type": "object", - "description": "foo fighters fighting fruits", - "$ref": "#/components/schemas/foo" - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - schema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - fruitsSchemaJson, err := json.MarshalIndent(schema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "foo fighters fighting fruits", - "properties": { - "abc": { - "type": "string" - } - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestSelfReferenceLoopErrors(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "foo": { - "type": "object", - "description": "this description is ignored", - "properties": { - "bar": { - "type": "object", - "$ref": "#/components/schemas/foo" - } - } - }, - "fruits": { - "type": "object", - "description": "foo fighters fighting fruits", - "$ref": "#/components/schemas/foo" - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - _, err = reader.readResolvedSchema("#/components/schemas/fruits") - assert.ErrorContains(t, err, "references loop detected. traversal trace: -> #/components/schemas/fruits -> #/components/schemas/foo") -} - -func TestCrossReferenceLoopErrors(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "foo": { - "type": "object", - "description": "this description is ignored", - "properties": { - "bar": { - "type": "object", - "$ref": "#/components/schemas/fruits" - } - } - }, - "fruits": { - "type": "object", - "description": "foo fighters fighting fruits", - "$ref": "#/components/schemas/foo" - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - _, err = reader.readResolvedSchema("#/components/schemas/fruits") - assert.ErrorContains(t, err, "references loop detected. traversal trace: -> #/components/schemas/fruits -> #/components/schemas/foo") -} - -func TestReferenceResolutionForMapInObject(t *testing.T) { - specString := ` - { - "components": { - "schemas": { - "foo": { - "type": "number" - }, - "fruits": { - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mangos": { - "type": "object", - "description": "multiple mangos", - "$ref": "#/components/schemas/mango" - } - } - }, - "mango": { - "type": "object", - "additionalProperties": { - "description": "a single mango", - "$ref": "#/components/schemas/foo" - } - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mangos": { - "type": "object", - "description": "multiple mangos", - "additionalProperties": { - "type": "number", - "description": "a single mango" - } - } - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestReferenceResolutionForArrayInObject(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "foo": { - "type": "number" - }, - "fruits": { - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mangos": { - "type": "object", - "description": "multiple mangos", - "$ref": "#/components/schemas/mango" - } - } - }, - "mango": { - "type": "object", - "items": { - "description": "a single mango", - "$ref": "#/components/schemas/foo" - } - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "description": "fruits that are cool", - "properties": { - "guava": { - "type": "string", - "description": "a guava for my schema" - }, - "mangos": { - "type": "object", - "description": "multiple mangos", - "items": { - "type": "number", - "description": "a single mango" - } - } - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} - -func TestReferenceResolutionDoesNotOverwriteDescriptions(t *testing.T) { - specString := `{ - "components": { - "schemas": { - "foo": { - "type": "number" - }, - "fruits": { - "type": "object", - "properties": { - "guava": { - "type": "object", - "description": "Guava is a fruit", - "$ref": "#/components/schemas/foo" - }, - "mango": { - "type": "object", - "description": "What is a mango?", - "$ref": "#/components/schemas/foo" - } - } - } - } - } - }` - spec := &Specification{} - reader := &OpenapiReader{ - OpenapiSpec: spec, - memo: make(map[string]jsonschema.Schema), - } - err := json.Unmarshal([]byte(specString), spec) - require.NoError(t, err) - - fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits") - require.NoError(t, err) - - fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ") - require.NoError(t, err) - - expected := `{ - "type": "object", - "properties": { - "guava": { - "type": "number", - "description": "Guava is a fruit" - }, - "mango": { - "type": "number", - "description": "What is a mango?" - } - } - }` - - t.Log("[DEBUG] actual: ", string(fruitsSchemaJson)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(fruitsSchemaJson)) -} diff --git a/bundle/schema/schema.go b/bundle/schema/schema.go deleted file mode 100644 index ac0b4f2ec..000000000 --- a/bundle/schema/schema.go +++ /dev/null @@ -1,287 +0,0 @@ -package schema - -import ( - "container/list" - "fmt" - "reflect" - "strings" - - "github.com/databricks/cli/libs/dyn/dynvar" - "github.com/databricks/cli/libs/jsonschema" -) - -// Fields tagged "readonly" should not be emitted in the schema as they are -// computed at runtime, and should not be assigned a value by the bundle author. -const readonlyTag = "readonly" - -// Annotation for internal bundle fields that should not be exposed to customers. -// Fields can be tagged as "internal" to remove them from the generated schema. -const internalTag = "internal" - -// Annotation for bundle fields that have been deprecated. -// Fields tagged as "deprecated" are removed/omitted from the generated schema. -const deprecatedTag = "deprecated" - -// This function translates golang types into json schema. Here is the mapping -// between json schema types and golang types -// -// - GolangType -> Javascript type / Json Schema2 -// -// - bool -> boolean -// -// - string -> string -// -// - int (all variants) -> number -// -// - float (all variants) -> number -// -// - map[string]MyStruct -> { type: object, additionalProperties: {}} -// for details visit: https://json-schema.org/understanding-json-schema/reference/object.html#additional-properties -// -// - []MyStruct -> {type: array, items: {}} -// for details visit: https://json-schema.org/understanding-json-schema/reference/array.html#items -// -// - []MyStruct -> {type: object, properties: {}, additionalProperties: false} -// for details visit: https://json-schema.org/understanding-json-schema/reference/object.html#properties -func New(golangType reflect.Type, docs *Docs) (*jsonschema.Schema, error) { - tracker := newTracker() - schema, err := safeToSchema(golangType, docs, "", tracker) - if err != nil { - return nil, tracker.errWithTrace(err.Error(), "root") - } - return schema, nil -} - -func jsonSchemaType(golangType reflect.Type) (jsonschema.Type, error) { - switch golangType.Kind() { - case reflect.Bool: - return jsonschema.BooleanType, nil - case reflect.String: - return jsonschema.StringType, nil - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, - reflect.Float32, reflect.Float64: - - return jsonschema.NumberType, nil - case reflect.Struct: - return jsonschema.ObjectType, nil - case reflect.Map: - if golangType.Key().Kind() != reflect.String { - return jsonschema.InvalidType, fmt.Errorf("only strings map keys are valid. key type: %v", golangType.Key().Kind()) - } - return jsonschema.ObjectType, nil - case reflect.Array, reflect.Slice: - return jsonschema.ArrayType, nil - default: - return jsonschema.InvalidType, fmt.Errorf("unhandled golang type: %s", golangType) - } -} - -// A wrapper over toSchema function to: -// 1. Detect cycles in the bundle config struct. -// 2. Update tracker -// -// params: -// -// - golangType: Golang type to generate json schema for -// -// - docs: Contains documentation to be injected into the generated json schema -// -// - traceId: An identifier for the current type, to trace recursive traversal. -// Its value is the first json tag in case of struct fields and "" in other cases -// like array, map or no json tags -// -// - tracker: Keeps track of types / traceIds seen during recursive traversal -func safeToSchema(golangType reflect.Type, docs *Docs, traceId string, tracker *tracker) (*jsonschema.Schema, error) { - // HACK to unblock CLI release (13th Feb 2024). This is temporary until proper - // support for recursive types is added to the schema generator. PR: https://github.com/databricks/cli/pull/1204 - if traceId == "for_each_task" { - return &jsonschema.Schema{ - Type: jsonschema.ObjectType, - }, nil - } - - // WE ERROR OUT IF THERE ARE CYCLES IN THE JSON SCHEMA - // There are mechanisms to deal with cycles though recursive identifiers in json - // schema. However if we use them, we would need to make sure we are able to detect - // cycles where two properties (directly or indirectly) pointing to each other - // - // see: https://json-schema.org/understanding-json-schema/structuring.html#recursion - // for details - if tracker.hasCycle(golangType) { - return nil, fmt.Errorf("cycle detected") - } - - tracker.push(golangType, traceId) - props, err := toSchema(golangType, docs, tracker) - if err != nil { - return nil, err - } - tracker.pop(golangType) - return props, nil -} - -// This function returns all member fields of the provided type. -// If the type has embedded (aka anonymous) fields, this function traverses -// those in a breadth first manner -func getStructFields(golangType reflect.Type) []reflect.StructField { - fields := []reflect.StructField{} - bfsQueue := list.New() - - for i := 0; i < golangType.NumField(); i++ { - bfsQueue.PushBack(golangType.Field(i)) - } - for bfsQueue.Len() > 0 { - front := bfsQueue.Front() - field := front.Value.(reflect.StructField) - bfsQueue.Remove(front) - - if !field.Anonymous { - fields = append(fields, field) - continue - } - - fieldType := field.Type - if fieldType.Kind() == reflect.Pointer { - fieldType = fieldType.Elem() - } - - for i := 0; i < fieldType.NumField(); i++ { - bfsQueue.PushBack(fieldType.Field(i)) - } - } - return fields -} - -func toSchema(golangType reflect.Type, docs *Docs, tracker *tracker) (*jsonschema.Schema, error) { - // *Struct and Struct generate identical json schemas - if golangType.Kind() == reflect.Pointer { - return safeToSchema(golangType.Elem(), docs, "", tracker) - } - if golangType.Kind() == reflect.Interface { - return &jsonschema.Schema{}, nil - } - - rootJavascriptType, err := jsonSchemaType(golangType) - if err != nil { - return nil, err - } - jsonSchema := &jsonschema.Schema{Type: rootJavascriptType} - - // If the type is a non-string primitive, then we allow it to be a string - // provided it's a pure variable reference (ie only a single variable reference). - if rootJavascriptType == jsonschema.BooleanType || rootJavascriptType == jsonschema.NumberType { - jsonSchema = &jsonschema.Schema{ - AnyOf: []*jsonschema.Schema{ - { - Type: rootJavascriptType, - }, - { - Type: jsonschema.StringType, - Pattern: dynvar.VariableRegex, - }, - }, - } - } - - if docs != nil { - jsonSchema.Description = docs.Description - } - - // case array/slice - if golangType.Kind() == reflect.Array || golangType.Kind() == reflect.Slice { - elemGolangType := golangType.Elem() - elemJavascriptType, err := jsonSchemaType(elemGolangType) - if err != nil { - return nil, err - } - var childDocs *Docs - if docs != nil { - childDocs = docs.Items - } - elemProps, err := safeToSchema(elemGolangType, childDocs, "", tracker) - if err != nil { - return nil, err - } - jsonSchema.Items = &jsonschema.Schema{ - Type: elemJavascriptType, - Properties: elemProps.Properties, - AdditionalProperties: elemProps.AdditionalProperties, - Items: elemProps.Items, - Required: elemProps.Required, - } - } - - // case map - if golangType.Kind() == reflect.Map { - if golangType.Key().Kind() != reflect.String { - return nil, fmt.Errorf("only string keyed maps allowed") - } - var childDocs *Docs - if docs != nil { - childDocs = docs.AdditionalProperties - } - jsonSchema.AdditionalProperties, err = safeToSchema(golangType.Elem(), childDocs, "", tracker) - if err != nil { - return nil, err - } - } - - // case struct - if golangType.Kind() == reflect.Struct { - children := getStructFields(golangType) - properties := map[string]*jsonschema.Schema{} - required := []string{} - for _, child := range children { - bundleTag := child.Tag.Get("bundle") - // Fields marked as "readonly", "internal" or "deprecated" are skipped - // while generating the schema - if bundleTag == readonlyTag || bundleTag == internalTag || bundleTag == deprecatedTag { - continue - } - - // get child json tags - childJsonTag := strings.Split(child.Tag.Get("json"), ",") - childName := childJsonTag[0] - - // skip children that have no json tags, the first json tag is "" - // or the first json tag is "-" - if childName == "" || childName == "-" { - continue - } - - // get docs for the child if they exist - var childDocs *Docs - if docs != nil { - if val, ok := docs.Properties[childName]; ok { - childDocs = val - } - } - - // compute if the child is a required field. Determined by the - // presence of "omitempty" in the json tags - hasOmitEmptyTag := false - for i := 1; i < len(childJsonTag); i++ { - if childJsonTag[i] == "omitempty" { - hasOmitEmptyTag = true - } - } - if !hasOmitEmptyTag { - required = append(required, childName) - } - - // compute Schema.Properties for the child recursively - fieldProps, err := safeToSchema(child.Type, childDocs, childName, tracker) - if err != nil { - return nil, err - } - properties[childName] = fieldProps - } - - jsonSchema.AdditionalProperties = false - jsonSchema.Properties = properties - jsonSchema.Required = required - } - - return jsonSchema, nil -} diff --git a/bundle/schema/schema_test.go b/bundle/schema/schema_test.go deleted file mode 100644 index 6d9df0cc7..000000000 --- a/bundle/schema/schema_test.go +++ /dev/null @@ -1,1900 +0,0 @@ -package schema - -import ( - "encoding/json" - "reflect" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestIntSchema(t *testing.T) { - var elemInt int - - expected := - `{ - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }` - - schema, err := New(reflect.TypeOf(elemInt), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestBooleanSchema(t *testing.T) { - var elem bool - - expected := - `{ - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }` - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestStringSchema(t *testing.T) { - var elem string - - expected := - `{ - "type": "string" - }` - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestStructOfPrimitivesSchema(t *testing.T) { - type Foo struct { - IntVal int `json:"int_val"` - Int8Val int8 `json:"int8_val"` - Int16Val int16 `json:"int16_val"` - Int32Val int32 `json:"int32_val"` - Int64Val int64 `json:"int64_val"` - - UIntVal uint `json:"uint_val"` - Uint8Val uint8 `json:"uint8_val"` - Uint16Val uint16 `json:"uint16_val"` - Uint32Val uint32 `json:"uint32_val"` - Uint64Val uint64 `json:"uint64_val"` - - Float32Val float32 `json:"float32_val"` - Float64Val float64 `json:"float64_val"` - - StringVal string `json:"string_val"` - - BoolVal bool `json:"bool_val"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "bool_val": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "float32_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "float64_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "int16_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "int32_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "int64_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "int8_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "int_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "string_val": { - "type": "string" - }, - "uint16_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "uint32_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "uint64_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "uint8_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "uint_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "int_val", - "int8_val", - "int16_val", - "int32_val", - "int64_val", - "uint_val", - "uint8_val", - "uint16_val", - "uint32_val", - "uint64_val", - "float32_val", - "float64_val", - "string_val", - "bool_val" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestStructOfStructsSchema(t *testing.T) { - type Bar struct { - A int `json:"a"` - B string `json:"b,string"` - } - - type Foo struct { - Bar Bar `json:"bar"` - } - - type MyStruct struct { - Foo Foo `json:"foo"` - } - - elem := MyStruct{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "foo": { - "type": "object", - "properties": { - "bar": { - "type": "object", - "properties": { - "a": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "b": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "a", - "b" - ] - } - }, - "additionalProperties": false, - "required": [ - "bar" - ] - } - }, - "additionalProperties": false, - "required": [ - "foo" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestStructOfMapsSchema(t *testing.T) { - type Bar struct { - MyMap map[string]int `json:"my_map"` - } - - type Foo struct { - Bar Bar `json:"bar"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "bar": { - "type": "object", - "properties": { - "my_map": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "my_map" - ] - } - }, - "additionalProperties": false, - "required": [ - "bar" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestStructOfSliceSchema(t *testing.T) { - type Bar struct { - MySlice []string `json:"my_slice"` - } - - type Foo struct { - Bar Bar `json:"bar"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "bar": { - "type": "object", - "properties": { - "my_slice": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false, - "required": [ - "my_slice" - ] - } - }, - "additionalProperties": false, - "required": [ - "bar" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestMapOfPrimitivesSchema(t *testing.T) { - var elem map[string]int - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestMapOfStructSchema(t *testing.T) { - type Foo struct { - MyInt int `json:"my_int"` - } - - var elem map[string]Foo - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "additionalProperties": { - "type": "object", - "properties": { - "my_int": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "my_int" - ] - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestMapOfMapSchema(t *testing.T) { - var elem map[string]map[string]int - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "additionalProperties": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestMapOfSliceSchema(t *testing.T) { - var elem map[string][]string - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "additionalProperties": { - "type": "array", - "items": { - "type": "string" - } - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestSliceOfPrimitivesSchema(t *testing.T) { - var elem []float32 - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "array", - "items": { - "type": "number" - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestSliceOfSliceSchema(t *testing.T) { - var elem [][]string - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "array", - "items": { - "type": "array", - "items": { - "type": "string" - } - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestSliceOfMapSchema(t *testing.T) { - var elem []map[string]int - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestSliceOfStructSchema(t *testing.T) { - type Foo struct { - MyInt int `json:"my_int"` - } - - var elem []Foo - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "array", - "items": { - "type": "object", - "properties": { - "my_int": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "my_int" - ] - } - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestEmbeddedStructSchema(t *testing.T) { - type Location struct { - Country string `json:"country"` - State string `json:"state,omitempty"` - } - - type Person struct { - Name string `json:"name"` - Age int `json:"age,omitempty"` - Home Location `json:"home"` - } - - type Plot struct { - Events map[string]Person `json:"events"` - } - - type Story struct { - Plot Plot `json:"plot"` - *Person - Location - } - - elem := Story{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "country": { - "type": "string" - }, - "home": { - "type": "object", - "properties": { - "country": { - "type": "string" - }, - "state": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "country" - ] - }, - "name": { - "type": "string" - }, - "plot": { - "type": "object", - "properties": { - "events": { - "type": "object", - "additionalProperties": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "home": { - "type": "object", - "properties": { - "country": { - "type": "string" - }, - "state": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "country" - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name", - "home" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "events" - ] - }, - "state": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "plot", - "name", - "home", - "country" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestErrorWithTrace(t *testing.T) { - tracker := newTracker() - dummyType := reflect.TypeOf(struct{}{}) - err := tracker.errWithTrace("with empty trace", "root") - assert.ErrorContains(t, err, "with empty trace. traversal trace: root") - - tracker.push(dummyType, "resources") - err = tracker.errWithTrace("with depth = 1", "root") - assert.ErrorContains(t, err, "with depth = 1. traversal trace: root -> resources") - - tracker.push(dummyType, "pipelines") - tracker.push(dummyType, "datasets") - err = tracker.errWithTrace("with depth = 4", "root") - assert.ErrorContains(t, err, "with depth = 4. traversal trace: root -> resources -> pipelines -> datasets") -} - -func TestNonAnnotatedFieldsAreSkipped(t *testing.T) { - type MyStruct struct { - Foo string - Bar int `json:"bar"` - } - - elem := MyStruct{} - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "properties": { - "bar": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "bar" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestDashFieldsAreSkipped(t *testing.T) { - type MyStruct struct { - Foo string `json:"-"` - Bar int `json:"bar"` - } - - elem := MyStruct{} - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "properties": { - "bar": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "bar" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestPointerInStructSchema(t *testing.T) { - - type Bar struct { - PtrVal2 *int `json:"ptr_val2"` - } - - type Foo struct { - PtrInt *int `json:"ptr_int"` - PtrString *string `json:"ptr_string"` - FloatVal float32 `json:"float_val"` - PtrBar *Bar `json:"ptr_bar"` - Bar *Bar `json:"bar"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "properties": { - "bar": { - "type": "object", - "properties": { - "ptr_val2": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "ptr_val2" - ] - }, - "float_val": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "ptr_bar": { - "type": "object", - "properties": { - "ptr_val2": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "ptr_val2" - ] - }, - "ptr_int": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "ptr_string": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "ptr_int", - "ptr_string", - "float_val", - "ptr_bar", - "bar" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestGenericSchema(t *testing.T) { - type Person struct { - Name string `json:"name"` - Age int `json:"age,omitempty"` - } - - type Plot struct { - Stakes []string `json:"stakes"` - Deaths []Person `json:"deaths"` - Murders map[string]Person `json:"murders"` - } - - type Wedding struct { - Hidden string `json:","` - Groom Person `json:"groom"` - Bride Person `json:"bride"` - Plots []Plot `json:"plots"` - } - - type Story struct { - Hero *Person `json:"hero"` - Villian Person `json:"villian,omitempty"` - Weddings []Wedding `json:"weddings"` - } - - elem := Story{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "hero": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - }, - "villian": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - }, - "weddings": { - "type": "array", - "items": { - "type": "object", - "properties": { - "bride": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - }, - "groom": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - }, - "plots": { - "type": "array", - "items": { - "type": "object", - "properties": { - "deaths": { - "type": "array", - "items": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - } - }, - "murders": { - "type": "object", - "additionalProperties": { - "type": "object", - "properties": { - "age": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "name" - ] - } - }, - "stakes": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false, - "required": [ - "stakes", - "deaths", - "murders" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "groom", - "bride", - "plots" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "hero", - "weddings" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestFieldsWithoutOmitEmptyAreRequired(t *testing.T) { - - type Papaya struct { - A int `json:"a,string,omitempty"` - B string `json:"b"` - } - - type MyStruct struct { - Foo string `json:"-,omitempty"` - Bar int `json:"bar"` - Apple int `json:"apple,omitempty"` - Mango int `json:",omitempty"` - Guava int `json:","` - Papaya *Papaya `json:"papaya,"` - } - - elem := MyStruct{} - - schema, err := New(reflect.TypeOf(elem), nil) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "properties": { - "apple": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "bar": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "papaya": { - "type": "object", - "properties": { - "a": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "b": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "b" - ] - } - }, - "additionalProperties": false, - "required": [ - "bar", - "papaya" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestDocIngestionForObject(t *testing.T) { - docs := &Docs{ - Description: "docs for root", - Properties: map[string]*Docs{ - "my_struct": { - Description: "docs for my struct", - Properties: map[string]*Docs{ - "a": { - Description: "docs for a", - }, - "c": { - Description: "docs for c which does not exist on my_struct", - }, - }, - }, - }, - } - - type MyStruct struct { - A string `json:"a"` - B int `json:"b"` - } - - type Root struct { - MyStruct *MyStruct `json:"my_struct"` - } - - elem := Root{} - - schema, err := New(reflect.TypeOf(elem), docs) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "description": "docs for root", - "properties": { - "my_struct": { - "type": "object", - "description": "docs for my struct", - "properties": { - "a": { - "type": "string", - "description": "docs for a" - }, - "b": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "a", - "b" - ] - } - }, - "additionalProperties": false, - "required": [ - "my_struct" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestDocIngestionForSlice(t *testing.T) { - docs := &Docs{ - Description: "docs for root", - Properties: map[string]*Docs{ - "my_slice": { - Description: "docs for my slice", - Items: &Docs{ - Properties: map[string]*Docs{ - "guava": { - Description: "docs for guava", - }, - "pineapple": { - Description: "docs for pineapple", - }, - "watermelon": { - Description: "docs for watermelon which does not exist in schema", - }, - }, - }, - }, - }, - } - - type Bar struct { - Guava int `json:"guava"` - Pineapple int `json:"pineapple"` - } - - type Root struct { - MySlice []Bar `json:"my_slice"` - } - - elem := Root{} - - schema, err := New(reflect.TypeOf(elem), docs) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "description": "docs for root", - "properties": { - "my_slice": { - "type": "array", - "description": "docs for my slice", - "items": { - "type": "object", - "properties": { - "guava": { - "description": "docs for guava", - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "pineapple": { - "description": "docs for pineapple", - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "guava", - "pineapple" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "my_slice" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestDocIngestionForMap(t *testing.T) { - docs := &Docs{ - Description: "docs for root", - Properties: map[string]*Docs{ - "my_map": { - Description: "docs for my map", - AdditionalProperties: &Docs{ - Properties: map[string]*Docs{ - "apple": { - Description: "docs for apple", - }, - "mango": { - Description: "docs for mango", - }, - "watermelon": { - Description: "docs for watermelon which does not exist in schema", - }, - "papaya": { - Description: "docs for papaya which does not exist in schema", - }, - }, - }, - }, - }, - } - - type Foo struct { - Apple int `json:"apple"` - Mango int `json:"mango"` - } - - type Root struct { - MyMap map[string]*Foo `json:"my_map"` - } - - elem := Root{} - - schema, err := New(reflect.TypeOf(elem), docs) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "description": "docs for root", - "properties": { - "my_map": { - "type": "object", - "description": "docs for my map", - "additionalProperties": { - "type": "object", - "properties": { - "apple": { - "description": "docs for apple", - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "mango": { - "description": "docs for mango", - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "apple", - "mango" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "my_map" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestDocIngestionForTopLevelPrimitive(t *testing.T) { - docs := &Docs{ - Description: "docs for root", - Properties: map[string]*Docs{ - "my_val": { - Description: "docs for my val", - }, - }, - } - - type Root struct { - MyVal int `json:"my_val"` - } - - elem := Root{} - - schema, err := New(reflect.TypeOf(elem), docs) - require.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expectedSchema := - `{ - "type": "object", - "description": "docs for root", - "properties": { - "my_val": { - "description": "docs for my val", - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "my_val" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expectedSchema) - - assert.Equal(t, expectedSchema, string(jsonSchema)) -} - -func TestErrorOnMapWithoutStringKey(t *testing.T) { - type Foo struct { - Bar map[int]string `json:"bar"` - } - elem := Foo{} - _, err := New(reflect.TypeOf(elem), nil) - assert.ErrorContains(t, err, "only strings map keys are valid. key type: int") -} - -func TestErrorIfStructRefersToItself(t *testing.T) { - type Foo struct { - MyFoo *Foo `json:"my_foo"` - } - - elem := Foo{} - _, err := New(reflect.TypeOf(elem), nil) - assert.ErrorContains(t, err, "cycle detected. traversal trace: root -> my_foo") -} - -func TestErrorIfStructHasLoop(t *testing.T) { - type Apple struct { - MyVal int `json:"my_val"` - MyMango struct { - MyGuava struct { - MyPapaya struct { - MyApple *Apple `json:"my_apple"` - } `json:"my_papaya"` - } `json:"my_guava"` - } `json:"my_mango"` - } - - elem := Apple{} - _, err := New(reflect.TypeOf(elem), nil) - assert.ErrorContains(t, err, "cycle detected. traversal trace: root -> my_mango -> my_guava -> my_papaya -> my_apple") -} - -func TestInterfaceGeneratesEmptySchema(t *testing.T) { - type Foo struct { - Apple int `json:"apple"` - Mango interface{} `json:"mango"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "apple": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "mango": {} - }, - "additionalProperties": false, - "required": [ - "apple", - "mango" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestBundleReadOnlytag(t *testing.T) { - type Pokemon struct { - Pikachu string `json:"pikachu" bundle:"readonly"` - Raichu string `json:"raichu"` - } - - type Foo struct { - Pokemon *Pokemon `json:"pokemon"` - Apple int `json:"apple"` - Mango string `json:"mango" bundle:"readonly"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "apple": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "pokemon": { - "type": "object", - "properties": { - "raichu": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "raichu" - ] - } - }, - "additionalProperties": false, - "required": [ - "pokemon", - "apple" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} - -func TestBundleInternalTag(t *testing.T) { - type Pokemon struct { - Pikachu string `json:"pikachu" bundle:"internal"` - Raichu string `json:"raichu"` - } - - type Foo struct { - Pokemon *Pokemon `json:"pokemon"` - Apple int `json:"apple"` - Mango string `json:"mango" bundle:"internal"` - } - - elem := Foo{} - - schema, err := New(reflect.TypeOf(elem), nil) - assert.NoError(t, err) - - jsonSchema, err := json.MarshalIndent(schema, " ", " ") - assert.NoError(t, err) - - expected := - `{ - "type": "object", - "properties": { - "apple": { - "anyOf": [ - { - "type": "number" - }, - { - "type": "string", - "pattern": "\\$\\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)*(\\[[0-9]+\\])*)\\}" - } - ] - }, - "pokemon": { - "type": "object", - "properties": { - "raichu": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "raichu" - ] - } - }, - "additionalProperties": false, - "required": [ - "pokemon", - "apple" - ] - }` - - t.Log("[DEBUG] actual: ", string(jsonSchema)) - t.Log("[DEBUG] expected: ", expected) - assert.Equal(t, expected, string(jsonSchema)) -} diff --git a/bundle/schema/spec.go b/bundle/schema/spec.go deleted file mode 100644 index fdc31a4ca..000000000 --- a/bundle/schema/spec.go +++ /dev/null @@ -1,11 +0,0 @@ -package schema - -import "github.com/databricks/cli/libs/jsonschema" - -type Specification struct { - Components *Components `json:"components"` -} - -type Components struct { - Schemas map[string]*jsonschema.Schema `json:"schemas,omitempty"` -} diff --git a/bundle/schema/tracker.go b/bundle/schema/tracker.go deleted file mode 100644 index ace6559bc..000000000 --- a/bundle/schema/tracker.go +++ /dev/null @@ -1,53 +0,0 @@ -package schema - -import ( - "container/list" - "fmt" -) - -type tracker struct { - // Nodes encountered in current path during the recursive traversal. Used to - // check for cycles - seenNodes map[interface{}]struct{} - - // List of node names encountered in order in current path during the recursive traversal. - // Used to hydrate errors with path to the exact node where error occured. - // - // NOTE: node and node names can be the same - listOfNodes *list.List -} - -func newTracker() *tracker { - return &tracker{ - seenNodes: map[interface{}]struct{}{}, - listOfNodes: list.New(), - } -} - -func (t *tracker) errWithTrace(prefix string, initTrace string) error { - traceString := initTrace - curr := t.listOfNodes.Front() - for curr != nil { - if curr.Value.(string) != "" { - traceString += " -> " + curr.Value.(string) - } - curr = curr.Next() - } - return fmt.Errorf(prefix + ". traversal trace: " + traceString) -} - -func (t *tracker) hasCycle(node interface{}) bool { - _, ok := t.seenNodes[node] - return ok -} - -func (t *tracker) push(node interface{}, name string) { - t.seenNodes[node] = struct{}{} - t.listOfNodes.PushBack(name) -} - -func (t *tracker) pop(nodeType interface{}) { - back := t.listOfNodes.Back() - t.listOfNodes.Remove(back) - delete(t.seenNodes, nodeType) -} diff --git a/bundle/tests/clusters/databricks.yml b/bundle/tests/clusters/databricks.yml new file mode 100644 index 000000000..1074462a6 --- /dev/null +++ b/bundle/tests/clusters/databricks.yml @@ -0,0 +1,36 @@ +bundle: + name: clusters + +workspace: + host: https://acme.cloud.databricks.com/ + +resources: + clusters: + foo: + cluster_name: foo + num_workers: 2 + node_type_id: "i3.xlarge" + autoscale: + min_workers: 2 + max_workers: 7 + spark_version: "13.3.x-scala2.12" + spark_conf: + "spark.executor.memory": "2g" + +targets: + default: + + development: + resources: + clusters: + foo: + cluster_name: foo-override + num_workers: 3 + node_type_id: "m5.xlarge" + autoscale: + min_workers: 1 + max_workers: 3 + spark_version: "15.2.x-scala2.12" + spark_conf: + "spark.executor.memory": "4g" + "spark.executor.memory2": "4g" diff --git a/bundle/tests/clusters_test.go b/bundle/tests/clusters_test.go new file mode 100644 index 000000000..def8a2a31 --- /dev/null +++ b/bundle/tests/clusters_test.go @@ -0,0 +1,36 @@ +package config_tests + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestClusters(t *testing.T) { + b := load(t, "./clusters") + assert.Equal(t, "clusters", b.Config.Bundle.Name) + + cluster := b.Config.Resources.Clusters["foo"] + assert.Equal(t, "foo", cluster.ClusterName) + assert.Equal(t, "13.3.x-scala2.12", cluster.SparkVersion) + assert.Equal(t, "i3.xlarge", cluster.NodeTypeId) + assert.Equal(t, 2, cluster.NumWorkers) + assert.Equal(t, "2g", cluster.SparkConf["spark.executor.memory"]) + assert.Equal(t, 2, cluster.Autoscale.MinWorkers) + assert.Equal(t, 7, cluster.Autoscale.MaxWorkers) +} + +func TestClustersOverride(t *testing.T) { + b := loadTarget(t, "./clusters", "development") + assert.Equal(t, "clusters", b.Config.Bundle.Name) + + cluster := b.Config.Resources.Clusters["foo"] + assert.Equal(t, "foo-override", cluster.ClusterName) + assert.Equal(t, "15.2.x-scala2.12", cluster.SparkVersion) + assert.Equal(t, "m5.xlarge", cluster.NodeTypeId) + assert.Equal(t, 3, cluster.NumWorkers) + assert.Equal(t, "4g", cluster.SparkConf["spark.executor.memory"]) + assert.Equal(t, "4g", cluster.SparkConf["spark.executor.memory2"]) + assert.Equal(t, 1, cluster.Autoscale.MinWorkers) + assert.Equal(t, 3, cluster.Autoscale.MaxWorkers) +} diff --git a/bundle/tests/complex_variables_test.go b/bundle/tests/complex_variables_test.go index 6371071ce..7a9a53a76 100644 --- a/bundle/tests/complex_variables_test.go +++ b/bundle/tests/complex_variables_test.go @@ -88,3 +88,21 @@ func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) { require.Equalf(t, "false", cluster.NewCluster.SparkConf["spark.speculation"], "cluster: %v", cluster.JobClusterKey) } } + +func TestComplexVariablesOverrideWithFullSyntax(t *testing.T) { + b, diags := loadTargetWithDiags("variables/complex", "dev") + require.Empty(t, diags) + + diags = bundle.Apply(context.Background(), b, bundle.Seq( + mutator.SetVariables(), + mutator.ResolveVariableReferencesInComplexVariables(), + mutator.ResolveVariableReferences( + "variables", + ), + )) + require.NoError(t, diags.Error()) + require.Empty(t, diags) + + complexvar := b.Config.Variables["complexvar"].Value + require.Equal(t, map[string]interface{}{"key1": "1", "key2": "2", "key3": "3"}, complexvar) +} diff --git a/bundle/tests/variables/complex/databricks.yml b/bundle/tests/variables/complex/databricks.yml index ca27f606d..3b32a7c8e 100644 --- a/bundle/tests/variables/complex/databricks.yml +++ b/bundle/tests/variables/complex/databricks.yml @@ -35,6 +35,13 @@ variables: - jar: "/path/to/jar" - egg: "/path/to/egg" - whl: "/path/to/whl" + complexvar: + type: complex + description: "A complex variable" + default: + key1: "value1" + key2: "value2" + key3: "value3" targets: @@ -49,3 +56,9 @@ targets: spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false + complexvar: + type: complex + default: + key1: "1" + key2: "2" + key3: "3" diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index 1166875ab..f1c85cb3d 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/sync" "github.com/spf13/cobra" ) @@ -23,13 +24,19 @@ func newDeployCommand() *cobra.Command { var force bool var forceLock bool var failOnActiveRuns bool - var computeID string + var clusterId string var autoApprove bool + var verbose bool cmd.Flags().BoolVar(&force, "force", false, "Force-override Git branch validation.") cmd.Flags().BoolVar(&forceLock, "force-lock", false, "Force acquisition of deployment lock.") cmd.Flags().BoolVar(&failOnActiveRuns, "fail-on-active-runs", false, "Fail if there are running jobs or pipelines in the deployment.") - cmd.Flags().StringVarP(&computeID, "compute-id", "c", "", "Override compute in the deployment with the given compute ID.") + cmd.Flags().StringVar(&clusterId, "compute-id", "", "Override cluster in the deployment with the given compute ID.") + cmd.Flags().StringVarP(&clusterId, "cluster-id", "c", "", "Override cluster in the deployment with the given cluster ID.") cmd.Flags().BoolVar(&autoApprove, "auto-approve", false, "Skip interactive approvals that might be required for deployment.") + cmd.Flags().MarkDeprecated("compute-id", "use --cluster-id instead") + cmd.Flags().BoolVar(&verbose, "verbose", false, "Enable verbose output.") + // Verbose flag currently only affects file sync output, it's used by the vscode extension + cmd.Flags().MarkHidden("verbose") cmd.RunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() @@ -42,7 +49,10 @@ func newDeployCommand() *cobra.Command { b.AutoApprove = autoApprove if cmd.Flag("compute-id").Changed { - b.Config.Bundle.ComputeID = computeID + b.Config.Bundle.ClusterId = clusterId + } + if cmd.Flag("cluster-id").Changed { + b.Config.Bundle.ClusterId = clusterId } if cmd.Flag("fail-on-active-runs").Changed { b.Config.Bundle.Deployment.FailOnActiveRuns = failOnActiveRuns @@ -51,11 +61,18 @@ func newDeployCommand() *cobra.Command { return nil }) + var outputHandler sync.OutputHandler + if verbose { + outputHandler = func(ctx context.Context, c <-chan sync.Event) { + sync.TextOutput(ctx, c, cmd.OutOrStdout()) + } + } + diags = diags.Extend( bundle.Apply(ctx, b, bundle.Seq( phases.Initialize(), phases.Build(), - phases.Deploy(), + phases.Deploy(outputHandler), )), ) } diff --git a/cmd/bundle/generate/generate_test.go b/cmd/bundle/generate/generate_test.go index ae3710ac8..7de6805fb 100644 --- a/cmd/bundle/generate/generate_test.go +++ b/cmd/bundle/generate/generate_test.go @@ -152,6 +152,12 @@ func TestGenerateJobCommand(t *testing.T) { }, }, }, + Parameters: []jobs.JobParameterDefinition{ + { + Name: "empty", + Default: "", + }, + }, }, }, nil) @@ -198,6 +204,9 @@ func TestGenerateJobCommand(t *testing.T) { - task_key: notebook_task notebook_task: notebook_path: %s + parameters: + - name: empty + default: "" `, filepath.Join("..", "src", "notebook.py")), string(data)) data, err = os.ReadFile(filepath.Join(srcDir, "notebook.py")) diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 63458f85c..9ef5eb8ff 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -55,13 +55,7 @@ task or a Python wheel task, the second example applies. return diags.Error() } - diags = bundle.Apply(ctx, b, bundle.Seq( - phases.Initialize(), - terraform.Interpolate(), - terraform.Write(), - terraform.StatePull(), - terraform.Load(terraform.ErrorOnEmptyState), - )) + diags = bundle.Apply(ctx, b, phases.Initialize()) if err := diags.Error(); err != nil { return err } @@ -84,6 +78,16 @@ task or a Python wheel task, the second example applies. return fmt.Errorf("expected a KEY of the resource to run") } + diags = bundle.Apply(ctx, b, bundle.Seq( + terraform.Interpolate(), + terraform.Write(), + terraform.StatePull(), + terraform.Load(terraform.ErrorOnEmptyState), + )) + if err := diags.Error(); err != nil { + return err + } + runner, err := run.Find(b, args[0]) if err != nil { return err diff --git a/cmd/bundle/schema.go b/cmd/bundle/schema.go index 813aebbae..480618ed7 100644 --- a/cmd/bundle/schema.go +++ b/cmd/bundle/schema.go @@ -1,13 +1,8 @@ package bundle import ( - "encoding/json" - "reflect" - - "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/schema" "github.com/databricks/cli/cmd/root" - "github.com/databricks/cli/libs/jsonschema" "github.com/spf13/cobra" ) @@ -19,32 +14,8 @@ func newSchemaCommand() *cobra.Command { } cmd.RunE = func(cmd *cobra.Command, args []string) error { - // Load embedded schema descriptions. - docs, err := schema.LoadBundleDescriptions() - if err != nil { - return err - } - - // Generate the JSON schema from the bundle configuration struct in Go. - schema, err := schema.New(reflect.TypeOf(config.Root{}), docs) - if err != nil { - return err - } - - // Target variable value overrides can be primitives, maps or sequences. - // Set an empty schema for them. - err = schema.SetByPath("targets.*.variables.*", jsonschema.Schema{}) - if err != nil { - return err - } - - // Print the JSON schema to stdout. - result, err := json.MarshalIndent(schema, "", " ") - if err != nil { - return err - } - cmd.OutOrStdout().Write(result) - return nil + _, err := cmd.OutOrStdout().Write(schema.Bytes) + return err } return cmd diff --git a/cmd/sync/sync.go b/cmd/sync/sync.go index 23a4c018f..2092d9e33 100644 --- a/cmd/sync/sync.go +++ b/cmd/sync/sync.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "path/filepath" - stdsync "sync" "time" "github.com/databricks/cli/bundle" @@ -46,6 +45,21 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn return nil, flag.ErrHelp } + var outputFunc func(context.Context, <-chan sync.Event, io.Writer) + switch f.output { + case flags.OutputText: + outputFunc = sync.TextOutput + case flags.OutputJSON: + outputFunc = sync.JsonOutput + } + + var outputHandler sync.OutputHandler + if outputFunc != nil { + outputHandler = func(ctx context.Context, events <-chan sync.Event) { + outputFunc(ctx, events, cmd.OutOrStdout()) + } + } + opts := sync.SyncOptions{ LocalRoot: vfs.MustNew(args[0]), Paths: []string{"."}, @@ -62,6 +76,8 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn // exist and add it to the `.gitignore` file in the root. SnapshotBasePath: filepath.Join(args[0], ".databricks"), WorkspaceClient: root.WorkspaceClient(cmd.Context()), + + OutputHandler: outputHandler, } return &opts, nil } @@ -118,23 +134,7 @@ func New() *cobra.Command { if err != nil { return err } - - var outputFunc func(context.Context, <-chan sync.Event, io.Writer) - switch f.output { - case flags.OutputText: - outputFunc = textOutput - case flags.OutputJSON: - outputFunc = jsonOutput - } - - var wg stdsync.WaitGroup - if outputFunc != nil { - wg.Add(1) - go func() { - defer wg.Done() - outputFunc(ctx, s.Events(), cmd.OutOrStdout()) - }() - } + defer s.Close() if f.watch { err = s.RunContinuous(ctx) @@ -142,8 +142,6 @@ func New() *cobra.Command { _, err = s.RunOnce(ctx) } - s.Close() - wg.Wait() return err } diff --git a/go.mod b/go.mod index db874b489..b6478a915 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/databricks/cli -go 1.22 +go 1.22.0 + +toolchain go1.22.7 require ( github.com/Masterminds/semver/v3 v3.3.0 // MIT diff --git a/internal/bundle/artifacts_test.go b/internal/bundle/artifacts_test.go index bae8073fc..fa052e223 100644 --- a/internal/bundle/artifacts_test.go +++ b/internal/bundle/artifacts_test.go @@ -36,7 +36,8 @@ func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) { wsDir := internal.TemporaryWorkspaceDir(t, w) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + SyncRootPath: dir, Config: config.Root{ Bundle: config.Bundle{ Target: "whatever", @@ -100,7 +101,8 @@ func TestAccUploadArtifactFileToCorrectRemotePathWithEnvironments(t *testing.T) wsDir := internal.TemporaryWorkspaceDir(t, w) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + SyncRootPath: dir, Config: config.Root{ Bundle: config.Bundle{ Target: "whatever", @@ -169,7 +171,8 @@ func TestAccUploadArtifactFileToCorrectRemotePathForVolumes(t *testing.T) { touchEmptyFile(t, whlPath) b := &bundle.Bundle{ - RootPath: dir, + RootPath: dir, + SyncRootPath: dir, Config: config.Root{ Bundle: config.Bundle{ Target: "whatever", diff --git a/internal/bundle/bundles/clusters/databricks_template_schema.json b/internal/bundle/bundles/clusters/databricks_template_schema.json new file mode 100644 index 000000000..c1c5cf12e --- /dev/null +++ b/internal/bundle/bundles/clusters/databricks_template_schema.json @@ -0,0 +1,16 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for job name" + }, + "spark_version": { + "type": "string", + "description": "Spark version used for job cluster" + }, + "node_type_id": { + "type": "string", + "description": "Node type id for job cluster" + } + } +} diff --git a/internal/bundle/bundles/clusters/template/databricks.yml.tmpl b/internal/bundle/bundles/clusters/template/databricks.yml.tmpl new file mode 100644 index 000000000..e0d6320a3 --- /dev/null +++ b/internal/bundle/bundles/clusters/template/databricks.yml.tmpl @@ -0,0 +1,24 @@ +bundle: + name: basic + +workspace: + root_path: "~/.bundle/{{.unique_id}}" + +resources: + clusters: + test_cluster: + cluster_name: "test-cluster-{{.unique_id}}" + spark_version: "{{.spark_version}}" + node_type_id: "{{.node_type_id}}" + num_workers: 2 + spark_conf: + "spark.executor.memory": "2g" + + jobs: + foo: + name: test-job-with-cluster-{{.unique_id}} + tasks: + - task_key: my_notebook_task + existing_cluster_id: "${resources.clusters.test_cluster.cluster_id}" + spark_python_task: + python_file: ./hello_world.py diff --git a/internal/bundle/bundles/clusters/template/hello_world.py b/internal/bundle/bundles/clusters/template/hello_world.py new file mode 100644 index 000000000..f301245e2 --- /dev/null +++ b/internal/bundle/bundles/clusters/template/hello_world.py @@ -0,0 +1 @@ +print("Hello World!") diff --git a/internal/bundle/clusters_test.go b/internal/bundle/clusters_test.go new file mode 100644 index 000000000..a961f3ea8 --- /dev/null +++ b/internal/bundle/clusters_test.go @@ -0,0 +1,56 @@ +package bundle + +import ( + "fmt" + "testing" + + "github.com/databricks/cli/internal" + "github.com/databricks/cli/internal/acc" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/env" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/google/uuid" + "github.com/stretchr/testify/require" +) + +func TestAccDeployBundleWithCluster(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + if testutil.IsAWSCloud(wt.T) { + t.Skip("Skipping test for AWS cloud because it is not permitted to create clusters") + } + + nodeTypeId := internal.GetNodeTypeId(env.Get(ctx, "CLOUD_ENV")) + uniqueId := uuid.New().String() + root, err := initTestTemplate(t, ctx, "clusters", map[string]any{ + "unique_id": uniqueId, + "node_type_id": nodeTypeId, + "spark_version": defaultSparkVersion, + }) + require.NoError(t, err) + + t.Cleanup(func() { + err = destroyBundle(t, ctx, root) + require.NoError(t, err) + + cluster, err := wt.W.Clusters.GetByClusterName(ctx, fmt.Sprintf("test-cluster-%s", uniqueId)) + if err != nil { + require.ErrorContains(t, err, "does not exist") + } else { + require.Contains(t, []compute.State{compute.StateTerminated, compute.StateTerminating}, cluster.State) + } + + }) + + err = deployBundle(t, ctx, root) + require.NoError(t, err) + + // Cluster should exists after bundle deployment + cluster, err := wt.W.Clusters.GetByClusterName(ctx, fmt.Sprintf("test-cluster-%s", uniqueId)) + require.NoError(t, err) + require.NotNil(t, cluster) + + out, err := runResource(t, ctx, root, "foo") + require.NoError(t, err) + require.Contains(t, out, "Hello World!") +} diff --git a/internal/init_test.go b/internal/init_test.go index c3cb0127e..d1a89f7b7 100644 --- a/internal/init_test.go +++ b/internal/init_test.go @@ -126,7 +126,7 @@ func TestAccBundleInitHelpers(t *testing.T) { }{ { funcName: "{{short_name}}", - expected: auth.GetShortUserName(me.UserName), + expected: auth.GetShortUserName(me), }, { funcName: "{{user_name}}", diff --git a/internal/testutil/cloud.go b/internal/testutil/cloud.go index e547069f3..ba5b75ecf 100644 --- a/internal/testutil/cloud.go +++ b/internal/testutil/cloud.go @@ -49,3 +49,7 @@ func GetCloud(t *testing.T) Cloud { } return -1 } + +func IsAWSCloud(t *testing.T) bool { + return GetCloud(t) == AWS +} diff --git a/libs/auth/user.go b/libs/auth/user.go index 8eaa87633..c6aa974f3 100644 --- a/libs/auth/user.go +++ b/libs/auth/user.go @@ -4,12 +4,17 @@ import ( "strings" "github.com/databricks/cli/libs/textutil" + "github.com/databricks/databricks-sdk-go/service/iam" ) // Get a short-form username, based on the user's primary email address. // We leave the full range of unicode letters in tact, but remove all "special" characters, // including dots, which are not supported in e.g. experiment names. -func GetShortUserName(emailAddress string) string { - local, _, _ := strings.Cut(emailAddress, "@") +func GetShortUserName(user *iam.User) string { + name := user.UserName + if IsServicePrincipal(user.UserName) && user.DisplayName != "" { + name = user.DisplayName + } + local, _, _ := strings.Cut(name, "@") return textutil.NormalizeString(local) } diff --git a/libs/auth/user_test.go b/libs/auth/user_test.go index 62b2d29ac..24b61464b 100644 --- a/libs/auth/user_test.go +++ b/libs/auth/user_test.go @@ -3,70 +3,111 @@ package auth import ( "testing" + "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" ) func TestGetShortUserName(t *testing.T) { tests := []struct { name string - email string + user *iam.User expected string }{ { - email: "test.user.1234@example.com", + user: &iam.User{ + UserName: "test.user.1234@example.com", + }, expected: "test_user_1234", }, { - email: "tést.üser@example.com", + user: &iam.User{ + UserName: "tést.üser@example.com", + }, expected: "tést_üser", }, { - email: "test$.user@example.com", + user: &iam.User{ + UserName: "test$.user@example.com", + }, expected: "test_user", }, { - email: `jöhn.dœ@domain.com`, // Using non-ASCII characters. + user: &iam.User{ + UserName: `jöhn.dœ@domain.com`, // Using non-ASCII characters. + }, expected: "jöhn_dœ", }, { - email: `first+tag@email.com`, // The plus (+) sign is used for "sub-addressing" in some email services. + user: &iam.User{ + UserName: `first+tag@email.com`, // The plus (+) sign is used for "sub-addressing" in some email services. + }, expected: "first_tag", }, { - email: `email@sub.domain.com`, // Using a sub-domain. + user: &iam.User{ + UserName: `email@sub.domain.com`, // Using a sub-domain. + }, expected: "email", }, { - email: `"_quoted"@domain.com`, // Quoted strings can be part of the local-part. + user: &iam.User{ + UserName: `"_quoted"@domain.com`, // Quoted strings can be part of the local-part. + }, expected: "quoted", }, { - email: `name-o'mally@website.org`, // Single quote in the local-part. + user: &iam.User{ + UserName: `name-o'mally@website.org`, // Single quote in the local-part. + }, expected: "name_o_mally", }, { - email: `user%domain@external.com`, // Percent sign can be used for email routing in legacy systems. + user: &iam.User{ + UserName: `user%domain@external.com`, // Percent sign can be used for email routing in legacy systems. + }, expected: "user_domain", }, { - email: `long.name.with.dots@domain.net`, // Multiple dots in the local-part. + user: &iam.User{ + UserName: `long.name.with.dots@domain.net`, // Multiple dots in the local-part. + }, expected: "long_name_with_dots", }, { - email: `me&you@together.com`, // Using an ampersand (&) in the local-part. + user: &iam.User{ + UserName: `me&you@together.com`, // Using an ampersand (&) in the local-part. + }, expected: "me_you", }, { - email: `user!def!xyz@domain.org`, // The exclamation mark can be valid in some legacy systems. + user: &iam.User{ + UserName: `user!def!xyz@domain.org`, // The exclamation mark can be valid in some legacy systems. + }, expected: "user_def_xyz", }, { - email: `admin@ιντερνετ.com`, // Domain in non-ASCII characters (IDN or Internationalized Domain Name). + user: &iam.User{ + UserName: `admin@ιντερνετ.com`, // Domain in non-ASCII characters (IDN or Internationalized Domain Name). + }, expected: "admin", }, + { + user: &iam.User{ + UserName: `1706906c-c0a2-4c25-9f57-3a7aa3cb8123`, + DisplayName: "my-service-principal", + }, + expected: "my_service_principal", + }, + { + user: &iam.User{ + UserName: `1706906c-c0a2-4c25-9f57-3a7aa3cb8123`, + // This service princpal has DisplayName (it's an optional property) + }, + expected: "1706906c_c0a2_4c25_9f57_3a7aa3cb8123", + }, } for _, tt := range tests { - assert.Equal(t, tt.expected, GetShortUserName(tt.email)) + assert.Equal(t, tt.expected, GetShortUserName(tt.user)) } } diff --git a/libs/dyn/dynvar/ref.go b/libs/dyn/dynvar/ref.go index bf160fa85..338ac8ce6 100644 --- a/libs/dyn/dynvar/ref.go +++ b/libs/dyn/dynvar/ref.go @@ -6,9 +6,7 @@ import ( "github.com/databricks/cli/libs/dyn" ) -const VariableRegex = `\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\[[0-9]+\])*)*(\[[0-9]+\])*)\}` - -var re = regexp.MustCompile(VariableRegex) +var re = regexp.MustCompile(`\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`) // ref represents a variable reference. // It is a string [dyn.Value] contained in a larger [dyn.Value]. diff --git a/libs/dyn/yamlsaver/saver.go b/libs/dyn/yamlsaver/saver.go index f4c7157f2..0fd81d534 100644 --- a/libs/dyn/yamlsaver/saver.go +++ b/libs/dyn/yamlsaver/saver.go @@ -151,6 +151,8 @@ func isScalarValueInString(v dyn.Value) bool { switch v.MustString() { case "true", "false": return true + case "": + return true default: _, err := parseNumber(v.MustString()) return err == nil diff --git a/libs/git/repository_test.go b/libs/git/repository_test.go index a28038eeb..93d9a03dc 100644 --- a/libs/git/repository_test.go +++ b/libs/git/repository_test.go @@ -209,7 +209,26 @@ func TestRepositoryGitConfigWhenNotARepo(t *testing.T) { } func TestRepositoryOriginUrlRemovesUserCreds(t *testing.T) { - repo := newTestRepository(t) - repo.addOriginUrl("https://username:token@github.com/databricks/foobar.git") - repo.assertOriginUrl("https://github.com/databricks/foobar.git") + tcases := []struct { + url string + expected string + }{ + { + url: "https://username:token@github.com/databricks/foobar.git", + expected: "https://github.com/databricks/foobar.git", + }, + { + // Note: The token is still considered and parsed as a username here. + // However credentials integrations by Git providers like GitHub + // allow for setting a PAT token as a username. + url: "https://token@github.com/databricks/foobar.git", + expected: "https://github.com/databricks/foobar.git", + }, + } + + for _, tc := range tcases { + repo := newTestRepository(t) + repo.addOriginUrl(tc.url) + repo.assertOriginUrl(tc.expected) + } } diff --git a/libs/jsonschema/from_type.go b/libs/jsonschema/from_type.go new file mode 100644 index 000000000..18a2b3ba5 --- /dev/null +++ b/libs/jsonschema/from_type.go @@ -0,0 +1,356 @@ +package jsonschema + +import ( + "container/list" + "fmt" + "maps" + "path" + "reflect" + "slices" + "strings" +) + +var skipTags = []string{ + // Fields tagged "readonly" should not be emitted in the schema as they are + // computed at runtime, and should not be assigned a value by the bundle author. + "readonly", + + // Annotation for internal bundle fields that should not be exposed to customers. + // Fields can be tagged as "internal" to remove them from the generated schema. + "internal", + + // Annotation for bundle fields that have been deprecated. + // Fields tagged as "deprecated" are omitted from the generated schema. + "deprecated", +} + +type constructor struct { + // Map of typ.PkgPath() + "." + typ.Name() to the schema for that type. + // Example key: github.com/databricks/databricks-sdk-go/service/jobs.JobSettings + definitions map[string]Schema + + // Map of typ.PkgPath() + "." + typ.Name() to the corresponding type. Used to + // track types that have been seen to avoid infinite recursion. + seen map[string]reflect.Type + + // The root type for which the schema is being generated. + root reflect.Type +} + +// JSON pointers use "/" as a delimiter to represent nested objects. This means +// we would instead need to use "~1" to represent "/" if we wish to refer to a +// key in a JSON object with a "/" in it. Instead of doing that we replace "/" with an +// additional level of nesting in the output map. Thus the $refs in the generated +// JSON schema can contain "/" without any issues. +// see: https://datatracker.ietf.org/doc/html/rfc6901 +// +// For example: +// {"a/b/c": "value"} is converted to {"a": {"b": {"c": "value"}}} +// the $ref for "value" would be "#/$defs/a/b/c" in the generated JSON schema. +func (c *constructor) Definitions() map[string]any { + defs := maps.Clone(c.definitions) + + // Remove the root type from the definitions. We don't need to include it in + // the definitions because it will be inlined as the root of the generated JSON schema. + delete(defs, typePath(c.root)) + + if len(defs) == 0 { + return nil + } + + res := make(map[string]any) + for k, v := range defs { + parts := strings.Split(k, "/") + cur := res + for i, p := range parts { + // Set the value for the last part. + if i == len(parts)-1 { + cur[p] = v + break + } + + // For all but the last part, create a new map value to add a level + // of nesting. + if _, ok := cur[p]; !ok { + cur[p] = make(map[string]any) + } + cur = cur[p].(map[string]any) + } + } + + return res +} + +// FromType converts a [reflect.Type] to a [Schema]. Nodes in the final JSON +// schema are guaranteed to be one level deep, which is done using defining $defs +// for every Go type and referring them using $ref in the corresponding node in +// the JSON schema. +// +// fns is a list of transformation functions that will be applied in order to all $defs +// in the schema. +func FromType(typ reflect.Type, fns []func(typ reflect.Type, s Schema) Schema) (Schema, error) { + c := constructor{ + definitions: make(map[string]Schema), + seen: make(map[string]reflect.Type), + root: typ, + } + + _, err := c.walk(typ) + if err != nil { + return Schema{}, err + } + + for _, fn := range fns { + for k := range c.definitions { + c.definitions[k] = fn(c.seen[k], c.definitions[k]) + } + } + + res := c.definitions[typePath(typ)] + res.Definitions = c.Definitions() + return res, nil +} + +// typePath computes a unique string representation of the type. $ref in the generated +// JSON schema will refer to this path. See TestTypePath for examples outputs. +func typePath(typ reflect.Type) string { + // Pointers have a typ.Name() of "". Dereference them to get the underlying type. + for typ.Kind() == reflect.Ptr { + typ = typ.Elem() + } + + if typ.Kind() == reflect.Interface { + return "interface" + } + + // Recursively call typePath, to handle slices of slices / maps. + if typ.Kind() == reflect.Slice { + return path.Join("slice", typePath(typ.Elem())) + } + + if typ.Kind() == reflect.Map { + if typ.Key().Kind() != reflect.String { + panic(fmt.Sprintf("found map with non-string key: %v", typ.Key())) + } + + // Recursively call typePath, to handle maps of maps / slices. + return path.Join("map", typePath(typ.Elem())) + } + + switch { + case typ.PkgPath() != "" && typ.Name() != "": + return typ.PkgPath() + "." + typ.Name() + case typ.Name() != "": + return typ.Name() + default: + // Invariant. This function should return a non-empty string + // for all types. + panic("unexpected empty type name for type: " + typ.String()) + } +} + +// Walk the Go type, generating $defs for every type encountered, and populating +// the corresponding $ref in the JSON schema. +func (c *constructor) walk(typ reflect.Type) (string, error) { + // Dereference pointers if necessary. + for typ.Kind() == reflect.Ptr { + typ = typ.Elem() + } + + typPath := typePath(typ) + + // Return early if the type has already been seen, to avoid infinite recursion. + if _, ok := c.seen[typPath]; ok { + return typPath, nil + } + c.seen[typPath] = typ + + var s Schema + var err error + + switch typ.Kind() { + case reflect.Struct: + s, err = c.fromTypeStruct(typ) + case reflect.Slice: + s, err = c.fromTypeSlice(typ) + case reflect.Map: + s, err = c.fromTypeMap(typ) + case reflect.String: + s = Schema{Type: StringType} + case reflect.Bool: + s = Schema{Type: BooleanType} + case reflect.Int, reflect.Int32, reflect.Int64: + s = Schema{Type: IntegerType} + case reflect.Float32, reflect.Float64: + s = Schema{Type: NumberType} + case reflect.Interface: + // We cannot determine the schema for fields of interface type just based + // on the type information. Thus we'll set the empty schema here and allow + // arbitrary values. + s = Schema{} + default: + return "", fmt.Errorf("unsupported type: %s", typ.Kind()) + } + if err != nil { + return "", err + } + + // Store the computed JSON schema for the type. + c.definitions[typPath] = s + return typPath, nil +} + +// This function returns all member fields of the provided type. +// If the type has embedded (aka anonymous) fields, this function traverses +// those in a breadth first manner +// +// BFS is important because we want the a field defined at a higher level embedded +// struct to be given preference over a field with the same name defined at a lower +// level embedded struct. For example see: TestHigherLevelEmbeddedFieldIsInSchema +func getStructFields(typ reflect.Type) []reflect.StructField { + fields := []reflect.StructField{} + bfsQueue := list.New() + + for i := 0; i < typ.NumField(); i++ { + bfsQueue.PushBack(typ.Field(i)) + } + for bfsQueue.Len() > 0 { + front := bfsQueue.Front() + field := front.Value.(reflect.StructField) + bfsQueue.Remove(front) + + if !field.Anonymous { + fields = append(fields, field) + continue + } + + fieldType := field.Type + + // Embedded types can only be struct{} or pointer to struct{}. Multiple + // levels of pointers are not allowed by the Go compiler. So we only + // dereference pointers once. + if fieldType.Kind() == reflect.Pointer { + fieldType = fieldType.Elem() + } + + for i := 0; i < fieldType.NumField(); i++ { + bfsQueue.PushBack(fieldType.Field(i)) + } + } + return fields +} + +func (c *constructor) fromTypeStruct(typ reflect.Type) (Schema, error) { + if typ.Kind() != reflect.Struct { + return Schema{}, fmt.Errorf("expected struct, got %s", typ.Kind()) + } + + res := Schema{ + Type: ObjectType, + Properties: make(map[string]*Schema), + Required: []string{}, + AdditionalProperties: false, + } + + structFields := getStructFields(typ) + for _, structField := range structFields { + bundleTags := strings.Split(structField.Tag.Get("bundle"), ",") + // Fields marked as "readonly", "internal" or "deprecated" are skipped + // while generating the schema + skip := false + for _, tag := range skipTags { + if slices.Contains(bundleTags, tag) { + skip = true + break + } + } + if skip { + continue + } + + jsonTags := strings.Split(structField.Tag.Get("json"), ",") + fieldName := jsonTags[0] + // Do not include fields in the schema that will not be serialized during + // JSON marshalling. + if fieldName == "" || fieldName == "-" || !structField.IsExported() { + continue + } + + // Skip property if it is already present in the schema. + // This can happen if the same field is defined multiple times across + // a tree of embedded structs. For example see: TestHigherLevelEmbeddedFieldIsInSchema + if _, ok := res.Properties[fieldName]; ok { + continue + } + + // "omitempty" tags in the Go SDK structs represent fields that not are + // required to be present in the API payload. Thus its absence in the + // tags list indicates that the field is required. + if !slices.Contains(jsonTags, "omitempty") { + res.Required = append(res.Required, fieldName) + } + + // Walk the fields of the struct. + typPath, err := c.walk(structField.Type) + if err != nil { + return Schema{}, err + } + + // For every property in the struct, add a $ref to the corresponding + // $defs block. + refPath := path.Join("#/$defs", typPath) + res.Properties[fieldName] = &Schema{ + Reference: &refPath, + } + } + + return res, nil +} + +func (c *constructor) fromTypeSlice(typ reflect.Type) (Schema, error) { + if typ.Kind() != reflect.Slice { + return Schema{}, fmt.Errorf("expected slice, got %s", typ.Kind()) + } + + res := Schema{ + Type: ArrayType, + } + + // Walk the slice element type. + typPath, err := c.walk(typ.Elem()) + if err != nil { + return Schema{}, err + } + + refPath := path.Join("#/$defs", typPath) + + // Add a $ref to the corresponding $defs block for the slice element type. + res.Items = &Schema{ + Reference: &refPath, + } + return res, nil +} + +func (c *constructor) fromTypeMap(typ reflect.Type) (Schema, error) { + if typ.Kind() != reflect.Map { + return Schema{}, fmt.Errorf("expected map, got %s", typ.Kind()) + } + + res := Schema{ + Type: ObjectType, + } + + // Walk the map value type. + typPath, err := c.walk(typ.Elem()) + if err != nil { + return Schema{}, err + } + + refPath := path.Join("#/$defs", typPath) + + // Add a $ref to the corresponding $defs block for the map value type. + res.AdditionalProperties = &Schema{ + Reference: &refPath, + } + return res, nil +} diff --git a/libs/jsonschema/from_type_test.go b/libs/jsonschema/from_type_test.go new file mode 100644 index 000000000..174ffad88 --- /dev/null +++ b/libs/jsonschema/from_type_test.go @@ -0,0 +1,521 @@ +package jsonschema + +import ( + "reflect" + "testing" + + "github.com/databricks/cli/libs/jsonschema/test_types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFromTypeBasic(t *testing.T) { + type myStruct struct { + S string `json:"s"` + I *int `json:"i,omitempty"` + V interface{} `json:"v,omitempty"` + TriplePointer ***int `json:"triple_pointer,omitempty"` + + // These fields should be ignored in the resulting schema. + NotAnnotated string + DashedTag string `json:"-"` + InternalTagged string `json:"internal_tagged" bundle:"internal"` + DeprecatedTagged string `json:"deprecated_tagged" bundle:"deprecated"` + ReadOnlyTagged string `json:"readonly_tagged" bundle:"readonly"` + } + + strRef := "#/$defs/string" + boolRef := "#/$defs/bool" + intRef := "#/$defs/int" + interfaceRef := "#/$defs/interface" + + tcases := []struct { + name string + typ reflect.Type + expected Schema + }{ + { + name: "int", + typ: reflect.TypeOf(int(0)), + expected: Schema{ + Type: "integer", + }, + }, + { + name: "string", + typ: reflect.TypeOf(string("")), + expected: Schema{ + Type: "string", + }, + }, + { + name: "bool", + typ: reflect.TypeOf(bool(true)), + expected: Schema{ + Type: "boolean", + }, + }, + { + name: "float64", + typ: reflect.TypeOf(float64(0)), + expected: Schema{ + Type: "number", + }, + }, + { + name: "struct", + typ: reflect.TypeOf(myStruct{}), + expected: Schema{ + Type: "object", + Definitions: map[string]any{ + "interface": Schema{}, + "string": Schema{ + Type: "string", + }, + "int": Schema{ + Type: "integer", + }, + }, + Properties: map[string]*Schema{ + "s": { + Reference: &strRef, + }, + "i": { + Reference: &intRef, + }, + "v": { + Reference: &interfaceRef, + }, + "triple_pointer": { + Reference: &intRef, + }, + }, + AdditionalProperties: false, + Required: []string{"s"}, + }, + }, + { + name: "slice", + typ: reflect.TypeOf([]bool{}), + expected: Schema{ + Type: "array", + Definitions: map[string]any{ + "bool": Schema{ + Type: "boolean", + }, + }, + Items: &Schema{ + Reference: &boolRef, + }, + }, + }, + { + name: "map", + typ: reflect.TypeOf(map[string]int{}), + expected: Schema{ + Type: "object", + Definitions: map[string]any{ + "int": Schema{ + Type: "integer", + }, + }, + AdditionalProperties: &Schema{ + Reference: &intRef, + }, + }, + }, + } + + for _, tc := range tcases { + t.Run(tc.name, func(t *testing.T) { + s, err := FromType(tc.typ, nil) + assert.NoError(t, err) + assert.Equal(t, tc.expected, s) + }) + } +} + +func TestGetStructFields(t *testing.T) { + type InnerEmbeddedStruct struct { + InnerField float64 + } + + type EmbeddedStructOne struct { + FieldOne int + + *InnerEmbeddedStruct + } + + type EmbeddedStructTwo struct { + FieldTwo bool + } + + type MyStruct struct { + *EmbeddedStructOne + EmbeddedStructTwo + + OuterField string + } + + fields := getStructFields(reflect.TypeOf(MyStruct{})) + assert.Len(t, fields, 4) + assert.Equal(t, "OuterField", fields[0].Name) + assert.Equal(t, "FieldOne", fields[1].Name) + + // InnerField occurring after FieldTwo ensures BFS as opposed to DFS traversal. + assert.Equal(t, "FieldTwo", fields[2].Name) + assert.Equal(t, "InnerField", fields[3].Name) +} + +func TestHigherLevelEmbeddedFieldIsInSchema(t *testing.T) { + type Inner struct { + Override string `json:"override,omitempty"` + } + + type EmbeddedOne struct { + Inner + } + + type EmbeddedTwo struct { + Override int `json:"override,omitempty"` + } + + type Outer struct { + EmbeddedOne + EmbeddedTwo + } + + intRef := "#/$defs/int" + expected := Schema{ + Type: "object", + Definitions: map[string]any{ + "int": Schema{ + Type: "integer", + }, + }, + Properties: map[string]*Schema{ + "override": { + Reference: &intRef, + }, + }, + AdditionalProperties: false, + Required: []string{}, + } + + s, err := FromType(reflect.TypeOf(Outer{}), nil) + require.NoError(t, err) + assert.Equal(t, expected, s) +} + +func TestFromTypeNested(t *testing.T) { + type Inner struct { + S string `json:"s"` + } + + type Outer struct { + I string `json:"i"` + Inner Inner `json:"inner"` + } + + innerRef := "#/$defs/github.com/databricks/cli/libs/jsonschema.Inner" + strRef := "#/$defs/string" + + expectedDefinitions := map[string]any{ + "github.com": map[string]any{ + "databricks": map[string]any{ + "cli": map[string]any{ + "libs": map[string]any{ + "jsonschema.Inner": Schema{ + Type: "object", + Properties: map[string]*Schema{ + "s": { + Reference: &strRef, + }, + }, + AdditionalProperties: false, + Required: []string{"s"}, + }, + }, + }, + }, + }, + "string": Schema{ + Type: "string", + }, + } + + tcases := []struct { + name string + typ reflect.Type + expected Schema + }{ + { + name: "struct in struct", + typ: reflect.TypeOf(Outer{}), + expected: Schema{ + Type: "object", + Definitions: expectedDefinitions, + Properties: map[string]*Schema{ + "i": { + Reference: &strRef, + }, + "inner": { + Reference: &innerRef, + }, + }, + AdditionalProperties: false, + Required: []string{"i", "inner"}, + }, + }, + { + name: "struct as a map value", + typ: reflect.TypeOf(map[string]*Inner{}), + expected: Schema{ + Type: "object", + Definitions: expectedDefinitions, + AdditionalProperties: &Schema{ + Reference: &innerRef, + }, + }, + }, + { + name: "struct as a slice element", + typ: reflect.TypeOf([]Inner{}), + expected: Schema{ + Type: "array", + Definitions: expectedDefinitions, + Items: &Schema{ + Reference: &innerRef, + }, + }, + }, + } + for _, tc := range tcases { + t.Run(tc.name, func(t *testing.T) { + s, err := FromType(tc.typ, nil) + assert.NoError(t, err) + assert.Equal(t, tc.expected, s) + }) + } +} + +func TestFromTypeRecursive(t *testing.T) { + fooRef := "#/$defs/github.com/databricks/cli/libs/jsonschema/test_types.Foo" + barRef := "#/$defs/github.com/databricks/cli/libs/jsonschema/test_types.Bar" + + expected := Schema{ + Type: "object", + Definitions: map[string]any{ + "github.com": map[string]any{ + "databricks": map[string]any{ + "cli": map[string]any{ + "libs": map[string]any{ + "jsonschema": map[string]any{ + "test_types.Bar": Schema{ + Type: "object", + Properties: map[string]*Schema{ + "foo": { + Reference: &fooRef, + }, + }, + AdditionalProperties: false, + Required: []string{}, + }, + "test_types.Foo": Schema{ + Type: "object", + Properties: map[string]*Schema{ + "bar": { + Reference: &barRef, + }, + }, + AdditionalProperties: false, + Required: []string{}, + }, + }, + }, + }, + }, + }, + }, + Properties: map[string]*Schema{ + "foo": { + Reference: &fooRef, + }, + }, + AdditionalProperties: false, + Required: []string{"foo"}, + } + + s, err := FromType(reflect.TypeOf(test_types.Outer{}), nil) + assert.NoError(t, err) + assert.Equal(t, expected, s) +} + +func TestFromTypeSelfReferential(t *testing.T) { + selfRef := "#/$defs/github.com/databricks/cli/libs/jsonschema/test_types.Self" + stringRef := "#/$defs/string" + + expected := Schema{ + Type: "object", + Definitions: map[string]any{ + "github.com": map[string]any{ + "databricks": map[string]any{ + "cli": map[string]any{ + "libs": map[string]any{ + "jsonschema": map[string]any{ + "test_types.Self": Schema{ + Type: "object", + Properties: map[string]*Schema{ + "self": { + Reference: &selfRef, + }, + "s": { + Reference: &stringRef, + }, + }, + AdditionalProperties: false, + Required: []string{}, + }, + }, + }, + }, + }, + }, + "string": Schema{ + Type: "string", + }, + }, + Properties: map[string]*Schema{ + "self": { + Reference: &selfRef, + }, + }, + AdditionalProperties: false, + Required: []string{}, + } + + s, err := FromType(reflect.TypeOf(test_types.OuterSelf{}), nil) + assert.NoError(t, err) + assert.Equal(t, expected, s) +} + +func TestFromTypeError(t *testing.T) { + // Maps with non-string keys should panic. + type mapOfInts map[int]int + assert.PanicsWithValue(t, "found map with non-string key: int", func() { + FromType(reflect.TypeOf(mapOfInts{}), nil) + }) + + // Unsupported types should return an error. + _, err := FromType(reflect.TypeOf(complex64(0)), nil) + assert.EqualError(t, err, "unsupported type: complex64") +} + +func TestFromTypeFunctionsArg(t *testing.T) { + type myStruct struct { + S string `json:"s"` + } + + strRef := "#/$defs/string" + expected := Schema{ + Type: "object", + Definitions: map[string]any{ + "string": Schema{ + Type: "string", + Description: "a string", + Enum: []any{"a", "b", "c"}, + }, + }, + Properties: map[string]*Schema{ + "s": { + Reference: &strRef, + }, + }, + AdditionalProperties: false, + Required: []string{"s"}, + } + + addDescription := func(typ reflect.Type, s Schema) Schema { + if typ.Kind() != reflect.String { + return s + } + s.Description = "a string" + return s + } + + addEnums := func(typ reflect.Type, s Schema) Schema { + if typ.Kind() != reflect.String { + return s + } + s.Enum = []any{"a", "b", "c"} + return s + } + + s, err := FromType(reflect.TypeOf(myStruct{}), []func(reflect.Type, Schema) Schema{ + addDescription, + addEnums, + }) + assert.NoError(t, err) + assert.Equal(t, expected, s) +} + +func TestTypePath(t *testing.T) { + type myStruct struct{} + + tcases := []struct { + typ reflect.Type + path string + }{ + { + typ: reflect.TypeOf(""), + path: "string", + }, + { + typ: reflect.TypeOf(int(0)), + path: "int", + }, + { + typ: reflect.TypeOf(true), + path: "bool", + }, + { + typ: reflect.TypeOf(float64(0)), + path: "float64", + }, + { + typ: reflect.TypeOf(myStruct{}), + path: "github.com/databricks/cli/libs/jsonschema.myStruct", + }, + { + typ: reflect.TypeOf([]int{}), + path: "slice/int", + }, + { + typ: reflect.TypeOf(map[string]int{}), + path: "map/int", + }, + { + typ: reflect.TypeOf([]myStruct{}), + path: "slice/github.com/databricks/cli/libs/jsonschema.myStruct", + }, + { + typ: reflect.TypeOf([][]map[string]map[string]myStruct{}), + path: "slice/slice/map/map/github.com/databricks/cli/libs/jsonschema.myStruct", + }, + { + typ: reflect.TypeOf(map[string]myStruct{}), + path: "map/github.com/databricks/cli/libs/jsonschema.myStruct", + }, + } + + for _, tc := range tcases { + t.Run(tc.typ.String(), func(t *testing.T) { + assert.Equal(t, tc.path, typePath(tc.typ)) + }) + } + + // Maps with non-string keys should panic. + assert.PanicsWithValue(t, "found map with non-string key: int", func() { + typePath(reflect.TypeOf(map[int]int{})) + }) +} diff --git a/libs/jsonschema/schema.go b/libs/jsonschema/schema.go index f1e223ec7..7690ec2f7 100644 --- a/libs/jsonschema/schema.go +++ b/libs/jsonschema/schema.go @@ -6,7 +6,6 @@ import ( "os" "regexp" "slices" - "strings" "github.com/databricks/cli/internal/build" "golang.org/x/mod/semver" @@ -14,6 +13,10 @@ import ( // defines schema for a json object type Schema struct { + // Definitions that can be reused and referenced throughout the schema. The + // syntax for a reference is $ref: #/$defs/ + Definitions map[string]any `json:"$defs,omitempty"` + // Type of the object Type Type `json:"type,omitempty"` @@ -63,7 +66,7 @@ type Schema struct { Extension // Schema that must match any of the schemas in the array - AnyOf []*Schema `json:"anyOf,omitempty"` + AnyOf []Schema `json:"anyOf,omitempty"` } // Default value defined in a JSON Schema, represented as a string. @@ -82,41 +85,6 @@ func (s *Schema) ParseString(v string) (any, error) { return fromString(v, s.Type) } -func (s *Schema) getByPath(path string) (*Schema, error) { - p := strings.Split(path, ".") - - res := s - for _, node := range p { - if node == "*" { - res = res.AdditionalProperties.(*Schema) - continue - } - var ok bool - res, ok = res.Properties[node] - if !ok { - return nil, fmt.Errorf("property %q not found in schema. Query path: %s", node, path) - } - } - return res, nil -} - -func (s *Schema) GetByPath(path string) (Schema, error) { - v, err := s.getByPath(path) - if err != nil { - return Schema{}, err - } - return *v, nil -} - -func (s *Schema) SetByPath(path string, v Schema) error { - dst, err := s.getByPath(path) - if err != nil { - return err - } - *dst = v - return nil -} - type Type string const ( diff --git a/libs/jsonschema/schema_test.go b/libs/jsonschema/schema_test.go index c365cf235..cf1f12767 100644 --- a/libs/jsonschema/schema_test.go +++ b/libs/jsonschema/schema_test.go @@ -4,7 +4,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestSchemaValidateTypeNames(t *testing.T) { @@ -306,92 +305,3 @@ func TestValidateSchemaSkippedPropertiesHaveDefaults(t *testing.T) { err = s.validate() assert.NoError(t, err) } - -func testSchema() *Schema { - return &Schema{ - Type: "object", - Properties: map[string]*Schema{ - "int_val": { - Type: "integer", - Default: int64(123), - }, - "string_val": { - Type: "string", - }, - "object_val": { - Type: "object", - Properties: map[string]*Schema{ - "bar": { - Type: "string", - Default: "baz", - }, - }, - AdditionalProperties: &Schema{ - Type: "object", - Properties: map[string]*Schema{ - "foo": { - Type: "string", - Default: "zab", - }, - }, - }, - }, - }, - } - -} - -func TestSchemaGetByPath(t *testing.T) { - s := testSchema() - - ss, err := s.GetByPath("int_val") - require.NoError(t, err) - assert.Equal(t, Schema{ - Type: IntegerType, - Default: int64(123), - }, ss) - - ss, err = s.GetByPath("string_val") - require.NoError(t, err) - assert.Equal(t, Schema{ - Type: StringType, - }, ss) - - ss, err = s.GetByPath("object_val.bar") - require.NoError(t, err) - assert.Equal(t, Schema{ - Type: StringType, - Default: "baz", - }, ss) - - ss, err = s.GetByPath("object_val.*.foo") - require.NoError(t, err) - assert.Equal(t, Schema{ - Type: StringType, - Default: "zab", - }, ss) -} - -func TestSchemaSetByPath(t *testing.T) { - s := testSchema() - - err := s.SetByPath("int_val", Schema{ - Type: IntegerType, - Default: int64(456), - }) - require.NoError(t, err) - assert.Equal(t, int64(456), s.Properties["int_val"].Default) - - err = s.SetByPath("object_val.*.foo", Schema{ - Type: StringType, - Default: "zooby", - }) - require.NoError(t, err) - - ns, err := s.GetByPath("object_val.*.foo") - require.NoError(t, err) - assert.Equal(t, Schema{ - Type: StringType, - Default: "zooby", - }, ns) -} diff --git a/libs/jsonschema/test_types/test_types.go b/libs/jsonschema/test_types/test_types.go new file mode 100644 index 000000000..75e81595a --- /dev/null +++ b/libs/jsonschema/test_types/test_types.go @@ -0,0 +1,25 @@ +package test_types + +// Recursive types cannot be defined inline without making them anonymous, +// so we define them here instead. +type Foo struct { + Bar *Bar `json:"bar,omitempty"` +} + +type Bar struct { + Foo Foo `json:"foo,omitempty"` +} + +type Outer struct { + Foo Foo `json:"foo"` +} + +type Self struct { + Self *Self `json:"self,omitempty"` + + S string `json:"s,omitempty"` +} + +type OuterSelf struct { + Self Self `json:"self,omitempty"` +} diff --git a/cmd/sync/output.go b/libs/sync/output.go similarity index 83% rename from cmd/sync/output.go rename to libs/sync/output.go index 2785343f9..c01b25ef6 100644 --- a/cmd/sync/output.go +++ b/libs/sync/output.go @@ -5,12 +5,10 @@ import ( "context" "encoding/json" "io" - - "github.com/databricks/cli/libs/sync" ) // Read synchronization events and write them as JSON to the specified writer (typically stdout). -func jsonOutput(ctx context.Context, ch <-chan sync.Event, w io.Writer) { +func JsonOutput(ctx context.Context, ch <-chan Event, w io.Writer) { enc := json.NewEncoder(w) for { select { @@ -31,7 +29,7 @@ func jsonOutput(ctx context.Context, ch <-chan sync.Event, w io.Writer) { } // Read synchronization events and write them as text to the specified writer (typically stdout). -func textOutput(ctx context.Context, ch <-chan sync.Event, w io.Writer) { +func TextOutput(ctx context.Context, ch <-chan Event, w io.Writer) { bw := bufio.NewWriter(w) for { diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 9eaebf2ad..cc9c73944 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -3,6 +3,7 @@ package sync import ( "context" "fmt" + stdsync "sync" "time" "github.com/databricks/cli/libs/filer" @@ -15,6 +16,8 @@ import ( "github.com/databricks/databricks-sdk-go/service/iam" ) +type OutputHandler func(context.Context, <-chan Event) + type SyncOptions struct { LocalRoot vfs.Path Paths []string @@ -34,6 +37,8 @@ type SyncOptions struct { CurrentUser *iam.User Host string + + OutputHandler OutputHandler } type Sync struct { @@ -49,6 +54,10 @@ type Sync struct { // Synchronization progress events are sent to this event notifier. notifier EventNotifier seq int + + // WaitGroup is automatically created when an output handler is provided in the SyncOptions. + // Close call is required to ensure the output handler goroutine handles all events in time. + outputWaitGroup *stdsync.WaitGroup } // New initializes and returns a new [Sync] instance. @@ -106,31 +115,41 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { return nil, err } + var notifier EventNotifier + var outputWaitGroup = &stdsync.WaitGroup{} + if opts.OutputHandler != nil { + ch := make(chan Event, MaxRequestsInFlight) + notifier = &ChannelNotifier{ch} + outputWaitGroup.Add(1) + go func() { + defer outputWaitGroup.Done() + opts.OutputHandler(ctx, ch) + }() + } else { + notifier = &NopNotifier{} + } + return &Sync{ SyncOptions: &opts, - fileSet: fileSet, - includeFileSet: includeFileSet, - excludeFileSet: excludeFileSet, - snapshot: snapshot, - filer: filer, - notifier: &NopNotifier{}, - seq: 0, + fileSet: fileSet, + includeFileSet: includeFileSet, + excludeFileSet: excludeFileSet, + snapshot: snapshot, + filer: filer, + notifier: notifier, + outputWaitGroup: outputWaitGroup, + seq: 0, }, nil } -func (s *Sync) Events() <-chan Event { - ch := make(chan Event, MaxRequestsInFlight) - s.notifier = &ChannelNotifier{ch} - return ch -} - func (s *Sync) Close() { if s.notifier == nil { return } s.notifier.Close() s.notifier = nil + s.outputWaitGroup.Wait() } func (s *Sync) notifyStart(ctx context.Context, d diff) { diff --git a/libs/template/config_test.go b/libs/template/config_test.go index 73b47f289..ab9dbeb5f 100644 --- a/libs/template/config_test.go +++ b/libs/template/config_test.go @@ -461,7 +461,7 @@ func TestPromptIsSkippedAnyOf(t *testing.T) { Default: "hello-world", Extension: jsonschema.Extension{ SkipPromptIf: &jsonschema.Schema{ - AnyOf: []*jsonschema.Schema{ + AnyOf: []jsonschema.Schema{ { Properties: map[string]*jsonschema.Schema{ "abc": { diff --git a/libs/template/helpers.go b/libs/template/helpers.go index 1dfe74d73..88c73cc47 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -119,7 +119,7 @@ func loadHelpers(ctx context.Context) template.FuncMap { return "", err } } - return auth.GetShortUserName(cachedUser.UserName), nil + return auth.GetShortUserName(cachedUser), nil }, // Get the default workspace catalog. If there is no default, or if // Unity Catalog is not enabled, return an empty string. diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index dbf8a8d85..cd4c29a76 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -121,7 +121,7 @@ You can find that job by opening your workpace and clicking on **Workflows**. You can also deploy to your production target directly from the command-line. The warehouse, catalog, and schema for that target are configured in databricks.yml. -When deploying to this target, note that the default job at resources/{{.project_name}}_job.yml +When deploying to this target, note that the default job at resources/{{.project_name}}.job.yml has a schedule set that runs every day. The schedule is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl similarity index 88% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl rename to libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl index bad12c755..e23c8dbce 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl @@ -3,10 +3,11 @@ resources: {{.project_name}}_job: name: {{.project_name}}_job - schedule: - # Run every day at 9:27 AM - quartz_cron_expression: 21 27 9 * * ? - timezone_id: UTC + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS email_notifications: on_failure: diff --git a/libs/template/templates/default-python/template/__preamble.tmpl b/libs/template/templates/default-python/template/__preamble.tmpl index a919a269c..69b769cde 100644 --- a/libs/template/templates/default-python/template/__preamble.tmpl +++ b/libs/template/templates/default-python/template/__preamble.tmpl @@ -18,7 +18,7 @@ This file only template directives; it is skipped for the actual output. {{if $notDLT}} {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} - {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.yml"}} + {{skip "{{.project_name}}/resources/{{.project_name}}.pipeline.yml"}} {{end}} {{if $notNotebook}} @@ -26,7 +26,7 @@ This file only template directives; it is skipped for the actual output. {{end}} {{if (and $notDLT $notNotebook $notPython)}} - {{skip "{{.project_name}}/resources/{{.project_name}}_job.yml"}} + {{skip "{{.project_name}}/resources/{{.project_name}}.job.yml"}} {{else}} {{skip "{{.project_name}}/resources/.gitkeep"}} {{end}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 5adade0b3..53847a9c9 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -29,7 +29,7 @@ The '{{.project_name}}' project was generated by using the default-python templa ``` Note that the default job from the template has a schedule that runs every day - (defined in resources/{{.project_name}}_job.yml). The schedule + (defined in resources/{{.project_name}}.job.yml). The schedule is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl similarity index 88% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl index dc79e3a17..5211e3894 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl @@ -10,10 +10,11 @@ resources: {{.project_name}}_job: name: {{.project_name}}_job - schedule: - # Run every day at 8:37 AM - quartz_cron_expression: '44 37 8 * * ?' - timezone_id: Europe/Amsterdam + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS {{- if not is_service_principal}} @@ -39,7 +40,7 @@ resources: - task_key: notebook_task {{- end}} pipeline_task: - {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}_pipeline.yml */}} + {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}} pipeline_id: ${resources.pipelines.{{.project_name}}_pipeline.id} {{end -}} {{- if (eq .include_python "yes") }} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl similarity index 61% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index 4b8f74d17..bf4690461 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -3,6 +3,12 @@ resources: pipelines: {{.project_name}}_pipeline: name: {{.project_name}}_pipeline + {{- if eq default_catalog ""}} + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + # catalog: catalog_name + {{- else}} + catalog: {{default_catalog}} + {{- end}} target: {{.project_name}}_${bundle.environment} libraries: - notebook: diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl index b152e9a30..253ed321c 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -14,7 +14,7 @@ "source": [ "# DLT pipeline\n", "\n", - "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}_pipeline.yml." + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." ] }, { diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl index a228f8d18..6782a053b 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -14,7 +14,7 @@ "source": [ "# Default notebook\n", "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}_job.yml." + "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." ] }, { diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql_job.yml.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql.job.yml.tmpl similarity index 80% rename from libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql_job.yml.tmpl rename to libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql.job.yml.tmpl index 31d2d21a9..4e6803da9 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql_job.yml.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql.job.yml.tmpl @@ -4,10 +4,11 @@ resources: {{.project_name}}_sql_job: name: {{.project_name}}_sql_job - schedule: - # Run every day at 7:17 AM - quartz_cron_expression: '44 17 7 * * ?' - timezone_id: Europe/Amsterdam + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS {{- if not is_service_principal}} diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl index e5ceb77a9..444ae4e03 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl @@ -1,4 +1,4 @@ --- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml) +-- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql.job.yml) USE CATALOG {{"{{"}}catalog{{"}}"}}; USE IDENTIFIER({{"{{"}}schema{{"}}"}}); diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl index c73606ef1..80f6773cb 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl @@ -1,4 +1,4 @@ --- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml) +-- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql.job.yml) -- -- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ -- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html