diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index 8622b29ca..562b72fcc 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -a6a317df8327c9b1e5cb59a03a42ffa2aabeef6d \ No newline at end of file +99f644e72261ef5ecf8d74db20f4b7a1e09723cc \ No newline at end of file diff --git a/.codegen/service.go.tmpl b/.codegen/service.go.tmpl index ee2c7b0fd..33833dfa1 100644 --- a/.codegen/service.go.tmpl +++ b/.codegen/service.go.tmpl @@ -109,16 +109,19 @@ var {{.CamelName}}Overrides []func( {{- end }} ) +{{- $excludeFromJson := list "http-request"}} + func new{{.PascalName}}() *cobra.Command { cmd := &cobra.Command{} + {{- $canUseJson := and .CanUseJson (not (in $excludeFromJson .KebabName )) -}} {{- if .Request}} var {{.CamelName}}Req {{.Service.Package.Name}}.{{.Request.PascalName}} {{- if .RequestBodyField }} {{.CamelName}}Req.{{.RequestBodyField.PascalName}} = &{{.Service.Package.Name}}.{{.RequestBodyField.Entity.PascalName}}{} {{- end }} - {{- if .CanUseJson}} + {{- if $canUseJson}} var {{.CamelName}}Json flags.JsonFlag {{- end}} {{- end}} @@ -135,14 +138,14 @@ func new{{.PascalName}}() *cobra.Command { {{- $request = .RequestBodyField.Entity -}} {{- end -}} {{if $request }}// TODO: short flags - {{- if .CanUseJson}} + {{- if $canUseJson}} cmd.Flags().Var(&{{.CamelName}}Json, "json", `either inline JSON string or @path/to/file.json with request body`) {{- end}} {{$method := .}} {{ if not .IsJsonOnly }} - {{range $request.Fields -}} + {{range .AllFields -}} {{- if not .Required -}} - {{if .Entity.IsObject }}// TODO: complex arg: {{.Name}} + {{if .Entity.IsObject}}{{if not (eq . $method.RequestBodyField) }}// TODO: complex arg: {{.Name}}{{end}} {{else if .Entity.IsAny }}// TODO: any: {{.Name}} {{else if .Entity.ArrayValue }}// TODO: array: {{.Name}} {{else if .Entity.MapValue }}// TODO: map via StringToStringVar: {{.Name}} @@ -176,8 +179,8 @@ func new{{.PascalName}}() *cobra.Command { {{- $wait := and .Wait (and (not .IsCrudRead) (not (eq .SnakeName "get_run"))) -}} {{- $hasRequiredArgs := and (not $hasIdPrompt) $hasPosArgs -}} {{- $hasSingleRequiredRequestBodyFieldWithPrompt := and (and $hasIdPrompt $request) (eq 1 (len $request.RequiredRequestBodyFields)) -}} - {{- $onlyPathArgsRequiredAsPositionalArguments := and $request (eq (len .RequiredPositionalArguments) (len $request.RequiredPathFields)) -}} - {{- $hasDifferentArgsWithJsonFlag := and (not $onlyPathArgsRequiredAsPositionalArguments) (and .CanUseJson (or $request.HasRequiredRequestBodyFields )) -}} + {{- $onlyPathArgsRequiredAsPositionalArguments := and .Request (eq (len .RequiredPositionalArguments) (len .Request.RequiredPathFields)) -}} + {{- $hasDifferentArgsWithJsonFlag := and (not $onlyPathArgsRequiredAsPositionalArguments) (and $canUseJson (or $request.HasRequiredRequestBodyFields )) -}} {{- $hasCustomArgHandler := or $hasRequiredArgs $hasDifferentArgsWithJsonFlag -}} {{- $atleastOneArgumentWithDescription := false -}} @@ -215,12 +218,12 @@ func new{{.PascalName}}() *cobra.Command { cmd.Args = func(cmd *cobra.Command, args []string) error { {{- if $hasDifferentArgsWithJsonFlag }} if cmd.Flags().Changed("json") { - err := root.ExactArgs({{len $request.RequiredPathFields}})(cmd, args) + err := root.ExactArgs({{len .Request.RequiredPathFields}})(cmd, args) if err != nil { - {{- if eq 0 (len $request.RequiredPathFields) }} + {{- if eq 0 (len .Request.RequiredPathFields) }} return fmt.Errorf("when --json flag is specified, no positional arguments are required. Provide{{- range $index, $field := $request.RequiredFields}}{{if $index}},{{end}} '{{$field.Name}}'{{end}} in your JSON input") {{- else }} - return fmt.Errorf("when --json flag is specified, provide only{{- range $index, $field := $request.RequiredPathFields}}{{if $index}},{{end}} {{$field.ConstantName}}{{end}} as positional arguments. Provide{{- range $index, $field := $request.RequiredRequestBodyFields}}{{if $index}},{{end}} '{{$field.Name}}'{{end}} in your JSON input") + return fmt.Errorf("when --json flag is specified, provide only{{- range $index, $field := .Request.RequiredPathFields}}{{if $index}},{{end}} {{$field.ConstantName}}{{end}} as positional arguments. Provide{{- range $index, $field := $request.RequiredRequestBodyFields}}{{if $index}},{{end}} '{{$field.Name}}'{{end}} in your JSON input") {{- end }} } return nil @@ -239,7 +242,7 @@ func new{{.PascalName}}() *cobra.Command { ctx := cmd.Context() {{if .Service.IsAccounts}}a := root.AccountClient(ctx){{else}}w := root.WorkspaceClient(ctx){{end}} {{- if .Request }} - {{ if .CanUseJson }} + {{ if $canUseJson }} if cmd.Flags().Changed("json") { diags := {{.CamelName}}Json.Unmarshal(&{{.CamelName}}Req{{ if .RequestBodyField }}.{{.RequestBodyField.PascalName}}{{ end }}) if diags.HasError() { @@ -255,7 +258,7 @@ func new{{.PascalName}}() *cobra.Command { return fmt.Errorf("please provide command input in JSON format by specifying the --json flag") }{{- end}} {{- if $hasPosArgs }} - {{- if and .CanUseJson $hasSingleRequiredRequestBodyFieldWithPrompt }} else { + {{- if and $canUseJson $hasSingleRequiredRequestBodyFieldWithPrompt }} else { {{- end}} {{- if $hasIdPrompt}} if len(args) == 0 { @@ -279,9 +282,9 @@ func new{{.PascalName}}() *cobra.Command { {{$method := .}} {{- range $arg, $field := .RequiredPositionalArguments}} - {{- template "args-scan" (dict "Arg" $arg "Field" $field "Method" $method "HasIdPrompt" $hasIdPrompt)}} + {{- template "args-scan" (dict "Arg" $arg "Field" $field "Method" $method "HasIdPrompt" $hasIdPrompt "ExcludeFromJson" $excludeFromJson)}} {{- end -}} - {{- if and .CanUseJson $hasSingleRequiredRequestBodyFieldWithPrompt }} + {{- if and $canUseJson $hasSingleRequiredRequestBodyFieldWithPrompt }} } {{- end}} @@ -392,7 +395,8 @@ func new{{.PascalName}}() *cobra.Command { {{- $method := .Method -}} {{- $arg := .Arg -}} {{- $hasIdPrompt := .HasIdPrompt -}} - {{- $optionalIfJsonIsUsed := and (not $hasIdPrompt) (and $field.IsRequestBodyField $method.CanUseJson) }} + {{ $canUseJson := and $method.CanUseJson (not (in .ExcludeFromJson $method.KebabName)) }} + {{- $optionalIfJsonIsUsed := and (not $hasIdPrompt) (and $field.IsRequestBodyField $canUseJson) }} {{- if $optionalIfJsonIsUsed }} if !cmd.Flags().Changed("json") { {{- end }} diff --git a/.gitattributes b/.gitattributes index 0a8ddf3cb..4b3715c93 100755 --- a/.gitattributes +++ b/.gitattributes @@ -1,11 +1,13 @@ cmd/account/access-control/access-control.go linguist-generated=true cmd/account/billable-usage/billable-usage.go linguist-generated=true +cmd/account/budget-policy/budget-policy.go linguist-generated=true cmd/account/budgets/budgets.go linguist-generated=true cmd/account/cmd.go linguist-generated=true cmd/account/credentials/credentials.go linguist-generated=true cmd/account/csp-enablement-account/csp-enablement-account.go linguist-generated=true cmd/account/custom-app-integration/custom-app-integration.go linguist-generated=true cmd/account/disable-legacy-features/disable-legacy-features.go linguist-generated=true +cmd/account/enable-ip-access-lists/enable-ip-access-lists.go linguist-generated=true cmd/account/encryption-keys/encryption-keys.go linguist-generated=true cmd/account/esm-enablement-account/esm-enablement-account.go linguist-generated=true cmd/account/federation-policy/federation-policy.go linguist-generated=true @@ -31,6 +33,7 @@ cmd/account/users/users.go linguist-generated=true cmd/account/vpc-endpoints/vpc-endpoints.go linguist-generated=true cmd/account/workspace-assignment/workspace-assignment.go linguist-generated=true cmd/account/workspaces/workspaces.go linguist-generated=true +cmd/workspace/access-control/access-control.go linguist-generated=true cmd/workspace/aibi-dashboard-embedding-access-policy/aibi-dashboard-embedding-access-policy.go linguist-generated=true cmd/workspace/aibi-dashboard-embedding-approved-domains/aibi-dashboard-embedding-approved-domains.go linguist-generated=true cmd/workspace/alerts-legacy/alerts-legacy.go linguist-generated=true @@ -74,6 +77,7 @@ cmd/workspace/instance-pools/instance-pools.go linguist-generated=true cmd/workspace/instance-profiles/instance-profiles.go linguist-generated=true cmd/workspace/ip-access-lists/ip-access-lists.go linguist-generated=true cmd/workspace/jobs/jobs.go linguist-generated=true +cmd/workspace/lakeview-embedded/lakeview-embedded.go linguist-generated=true cmd/workspace/lakeview/lakeview.go linguist-generated=true cmd/workspace/libraries/libraries.go linguist-generated=true cmd/workspace/metastores/metastores.go linguist-generated=true @@ -98,11 +102,13 @@ cmd/workspace/providers/providers.go linguist-generated=true cmd/workspace/quality-monitors/quality-monitors.go linguist-generated=true cmd/workspace/queries-legacy/queries-legacy.go linguist-generated=true cmd/workspace/queries/queries.go linguist-generated=true +cmd/workspace/query-execution/query-execution.go linguist-generated=true cmd/workspace/query-history/query-history.go linguist-generated=true cmd/workspace/query-visualizations-legacy/query-visualizations-legacy.go linguist-generated=true cmd/workspace/query-visualizations/query-visualizations.go linguist-generated=true cmd/workspace/recipient-activation/recipient-activation.go linguist-generated=true cmd/workspace/recipients/recipients.go linguist-generated=true +cmd/workspace/redash-config/redash-config.go linguist-generated=true cmd/workspace/registered-models/registered-models.go linguist-generated=true cmd/workspace/repos/repos.go linguist-generated=true cmd/workspace/resource-quotas/resource-quotas.go linguist-generated=true diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 76835de7d..3c3895bc1 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1,2 @@ * @pietern @andrewnester @shreyas-goenka @denik +cmd/labs @alexott @nfx diff --git a/.github/workflows/close-stale-issues.yml b/.github/workflows/close-stale-issues.yml index ea9558caf..fc764fb0d 100644 --- a/.github/workflows/close-stale-issues.yml +++ b/.github/workflows/close-stale-issues.yml @@ -18,7 +18,7 @@ jobs: pull-requests: write steps: - - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0 + - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0 with: stale-issue-message: This issue has not received a response in a while. If you want to keep this issue open, please leave a comment below and auto-close will be canceled. stale-pr-message: This PR has not received an update in a while. If you want to keep this PR open, please leave a comment below or push a new commit and auto-close will be canceled. diff --git a/.github/workflows/integration-main.yml b/.github/workflows/integration-main.yml index 84dd7263a..f737c48e6 100644 --- a/.github/workflows/integration-main.yml +++ b/.github/workflows/integration-main.yml @@ -20,7 +20,7 @@ jobs: steps: - name: Generate GitHub App Token id: generate-token - uses: actions/create-github-app-token@c1a285145b9d317df6ced56c09f525b5c2b6f755 # v1.11.1 + uses: actions/create-github-app-token@136412a57a7081aa63c935a2cc2918f76c34f514 # v1.11.2 with: app-id: ${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }} private-key: ${{ secrets.DECO_WORKFLOW_TRIGGER_PRIVATE_KEY }} diff --git a/.github/workflows/integration-pr.yml b/.github/workflows/integration-pr.yml index 7a62113cd..bf096c863 100644 --- a/.github/workflows/integration-pr.yml +++ b/.github/workflows/integration-pr.yml @@ -23,7 +23,7 @@ jobs: steps: - name: Generate GitHub App Token id: generate-token - uses: actions/create-github-app-token@c1a285145b9d317df6ced56c09f525b5c2b6f755 # v1.11.1 + uses: actions/create-github-app-token@136412a57a7081aa63c935a2cc2918f76c34f514 # v1.11.2 with: app-id: ${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }} private-key: ${{ secrets.DECO_WORKFLOW_TRIGGER_PRIVATE_KEY }} diff --git a/.github/workflows/publish-winget.yml b/.github/workflows/publish-winget.yml index eb9a72eda..cbd24856b 100644 --- a/.github/workflows/publish-winget.yml +++ b/.github/workflows/publish-winget.yml @@ -10,19 +10,65 @@ on: jobs: publish-to-winget-pkgs: runs-on: - group: databricks-protected-runner-group - labels: windows-server-latest + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco environment: release steps: - - uses: vedantmgoyal2009/winget-releaser@93fd8b606a1672ec3e5c6c3bb19426be68d1a8b0 # v2 - with: - identifier: Databricks.DatabricksCLI - installers-regex: 'windows_.*-signed\.zip$' # Only signed Windows releases - token: ${{ secrets.ENG_DEV_ECOSYSTEM_BOT_TOKEN }} - fork-user: eng-dev-ecosystem-bot + - name: Checkout repository and submodules + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - # Use the tag from the input, or the ref name if the input is not provided. - # The ref name is equal to the tag name when this workflow is triggered by the "sign-cli" command. - release-tag: ${{ inputs.tag || github.ref_name }} + # When updating the version of komac, make sure to update the checksum in the next step. + # Find both at https://github.com/russellbanks/Komac/releases. + - name: Download komac binary + run: | + curl -s -L -o $RUNNER_TEMP/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz https://github.com/russellbanks/Komac/releases/download/v2.9.0/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz + + - name: Verify komac binary + run: | + echo "d07a12831ad5418fee715488542a98ce3c0e591d05c850dd149fe78432be8c4c $RUNNER_TEMP/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz" | sha256sum -c - + + - name: Untar komac binary to temporary path + run: | + mkdir -p $RUNNER_TEMP/komac + tar -xzf $RUNNER_TEMP/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz -C $RUNNER_TEMP/komac + + - name: Add komac to PATH + run: echo "$RUNNER_TEMP/komac" >> $GITHUB_PATH + + - name: Confirm komac version + run: komac --version + + # Use the tag from the input, or the ref name if the input is not provided. + # The ref name is equal to the tag name when this workflow is triggered by the "sign-cli" command. + - name: Strip "v" prefix from version + id: strip_version + run: echo "version=$(echo ${{ inputs.tag || github.ref_name }} | sed 's/^v//')" >> "$GITHUB_OUTPUT" + + - name: Get URLs of signed Windows binaries + id: get_windows_urls + run: | + urls=$( + gh api https://api.github.com/repos/databricks/cli/releases/tags/${{ inputs.tag || github.ref_name }} | \ + jq -r .assets[].browser_download_url | \ + grep -E '_windows_.*-signed\.zip$' | \ + tr '\n' ' ' + ) + if [ -z "$urls" ]; then + echo "No signed Windows binaries found" >&2 + exit 1 + fi + echo "urls=$urls" >> "$GITHUB_OUTPUT" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Publish to Winget + run: | + komac update Databricks.DatabricksCLI \ + --version ${{ steps.strip_version.outputs.version }} \ + --submit \ + --urls ${{ steps.get_windows_urls.outputs.urls }} \ + env: + KOMAC_FORK_OWNER: eng-dev-ecosystem-bot + GITHUB_TOKEN: ${{ secrets.ENG_DEV_ECOSYSTEM_BOT_TOKEN }} diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index d998224a4..c41afc18c 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -50,7 +50,7 @@ jobs: - name: Setup Go uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: - go-version: 1.23.4 + go-version-file: go.mod - name: Setup Python uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 @@ -71,10 +71,10 @@ jobs: make vendor pip3 install wheel - - name: Run tests - run: make test + - name: Run tests with coverage + run: make cover - golangci: + linters: needs: cleanups name: lint runs-on: ubuntu-latest @@ -82,7 +82,7 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: - go-version: 1.23.4 + go-version-file: go.mod # Use different schema from regular job, to avoid overwriting the same key cache-dependency-path: | go.sum @@ -95,10 +95,15 @@ jobs: # Exit with status code 1 if there are differences (i.e. unformatted files) git diff --exit-code - name: golangci-lint - uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 + uses: golangci/golangci-lint-action@ec5d18412c0aeab7936cb16880d708ba2a64e1ae # v6.2.0 with: version: v1.63.4 args: --timeout=15m + - name: Run ruff + uses: astral-sh/ruff-action@f14634c415d3e63ffd4d550a22f037df4c734a60 # v3.1.0 + with: + version: "0.9.1" + args: "format --check" validate-bundle-schema: needs: cleanups @@ -111,7 +116,7 @@ jobs: - name: Setup Go uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: - go-version: 1.23.4 + go-version-file: go.mod # Use different schema from regular job, to avoid overwriting the same key cache-dependency-path: | go.sum diff --git a/.github/workflows/release-snapshot.yml b/.github/workflows/release-snapshot.yml index 0592d09d0..9f2690e03 100644 --- a/.github/workflows/release-snapshot.yml +++ b/.github/workflows/release-snapshot.yml @@ -34,7 +34,7 @@ jobs: - name: Setup Go uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: - go-version: 1.23.4 + go-version-file: go.mod # The default cache key for this action considers only the `go.sum` file. # We include .goreleaser.yaml here to differentiate from the cache used by the push action diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5d5811b19..fe5b4170b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -26,7 +26,7 @@ jobs: - name: Setup Go uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: - go-version: 1.23.4 + go-version-file: go.mod # The default cache key for this action considers only the `go.sum` file. # We include .goreleaser.yaml here to differentiate from the cache used by the push action diff --git a/.gitignore b/.gitignore index edd1409ae..2f6d0ad8e 100644 --- a/.gitignore +++ b/.gitignore @@ -20,14 +20,13 @@ dist/ *.log coverage.txt +coverage-acceptance.txt __pycache__ *.pyc -.terraform -.terraform.lock.hcl - +.idea .vscode/launch.json .vscode/tasks.json -.databricks +.ruff_cache diff --git a/.golangci.yaml b/.golangci.yaml index 07a6afdc5..8a83135ee 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -15,12 +15,20 @@ linters: - intrange - mirror - perfsprint + - unconvert linters-settings: govet: enable-all: true disable: - fieldalignment - shadow + settings: + printf: + funcs: + - (github.com/databricks/cli/internal/testutil.TestingT).Infof + - (github.com/databricks/cli/internal/testutil.TestingT).Errorf + - (github.com/databricks/cli/internal/testutil.TestingT).Fatalf + - (github.com/databricks/cli/internal/testutil.TestingT).Skipf gofmt: rewrite-rules: - pattern: 'a[b:len(a)]' @@ -41,6 +49,8 @@ linters-settings: disable: # good check, but we have too many assert.(No)?Errorf? so excluding for now - require-error + copyloopvar: + check-alias: true issues: exclude-dirs-use-default: false # recommended by docs https://golangci-lint.run/usage/false-positives/ max-issues-per-linter: 1000 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b59fa540..fad9ce620 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,154 @@ # Version changelog +## [Release] Release v0.242.0 + +Notable changes: +Starting this version CLI does not load bundle auth information when CLI command is executed inside the bundle directory with explicitly provided via `-p` flag profile. +For more details see the related GitHub issue https://github.com/databricks/cli/issues/1358 + +CLI: + * Do not load host from bundle for CLI commands when profile flag is used ([#2335](https://github.com/databricks/cli/pull/2335)). + * Fixed accessing required path parameters in CLI generation when --json flag ([#2373](https://github.com/databricks/cli/pull/2373)). + +Bundles: + * Provide instructions for testing in the default-python template ([#2355](https://github.com/databricks/cli/pull/2355)). + * Remove `run_as` from the built-in templates ([#2044](https://github.com/databricks/cli/pull/2044)). + * Change warning about incomplete permissions section into a recommendation ([#2043](https://github.com/databricks/cli/pull/2043)). + * Refine `mode: production` diagnostic output ([#2236](https://github.com/databricks/cli/pull/2236)). + * Support serverless mode in default-python template (explicit prompt) ([#2377](https://github.com/databricks/cli/pull/2377)). + * Set default data_security_mode to "SINGLE_USER" in bundle templates ([#2372](https://github.com/databricks/cli/pull/2372)). + * Fixed spark version check for clusters defined in the same bundle ([#2374](https://github.com/databricks/cli/pull/2374)). + +API Changes: + * Added `databricks genie get-message-query-result-by-attachment` command. + +OpenAPI commit 99f644e72261ef5ecf8d74db20f4b7a1e09723cc (2025-02-11) + +## [Release] Release v0.241.2 + +This is a bugfix release to address an issue where jobs with tasks with a +libraries section with PyPI packages could not be deployed. + +Bundles: + * Revert changes related to basename check for local libraries ([#2345](https://github.com/databricks/cli/pull/2345)). + +## [Release] Release v0.241.1 + +Bundles: + * Fix for regression deploying resources with PyPi and Maven library types ([#2341](https://github.com/databricks/cli/pull/2341)). + +## [Release] Release v0.241.0 + +Bundles: + * Added support to generate Git based jobs ([#2304](https://github.com/databricks/cli/pull/2304)). + * Added support for run_as in pipelines ([#2287](https://github.com/databricks/cli/pull/2287)). + * Raise an error when there are multiple local libraries with the same basename used ([#2297](https://github.com/databricks/cli/pull/2297)). + * Fix env variable for AzureCli local config ([#2248](https://github.com/databricks/cli/pull/2248)). + * Accept JSON files in includes section ([#2265](https://github.com/databricks/cli/pull/2265)). + * Always print warnings and errors; clean up format ([#2213](https://github.com/databricks/cli/pull/2213)) + +API Changes: + * Added `databricks account budget-policy` command group. + * Added `databricks lakeview-embedded` command group. + * Added `databricks query-execution` command group. + * Added `databricks account enable-ip-access-lists` command group. + * Added `databricks redash-config` command group. + +OpenAPI commit c72c58f97b950fcb924a90ef164bcb10cfcd5ece (2025-02-03) +Dependency updates: + * Upgrade to TF provider 1.65.1 ([#2328](https://github.com/databricks/cli/pull/2328)). + * Bump github.com/hashicorp/terraform-exec from 0.21.0 to 0.22.0 ([#2237](https://github.com/databricks/cli/pull/2237)). + * Bump github.com/spf13/pflag from 1.0.5 to 1.0.6 ([#2281](https://github.com/databricks/cli/pull/2281)). + * Bump github.com/databricks/databricks-sdk-go from 0.56.1 to 0.57.0 ([#2321](https://github.com/databricks/cli/pull/2321)). + * Bump golang.org/x/oauth2 from 0.25.0 to 0.26.0 ([#2322](https://github.com/databricks/cli/pull/2322)). + * Bump golang.org/x/term from 0.28.0 to 0.29.0 ([#2325](https://github.com/databricks/cli/pull/2325)). + * Bump golang.org/x/text from 0.21.0 to 0.22.0 ([#2323](https://github.com/databricks/cli/pull/2323)). + * Bump golang.org/x/mod from 0.22.0 to 0.23.0 ([#2324](https://github.com/databricks/cli/pull/2324)). + +## [Release] Release v0.240.0 + +Bundles: + * Added support for double underscore variable references ([#2203](https://github.com/databricks/cli/pull/2203)). + * Do not wait for app compute to start on `bundle deploy` ([#2144](https://github.com/databricks/cli/pull/2144)). + * Remove bundle.git.inferred ([#2258](https://github.com/databricks/cli/pull/2258)). + * libs/python: Remove DetectInterpreters ([#2234](https://github.com/databricks/cli/pull/2234)). + +API Changes: + * Added `databricks access-control` command group. + * Added `databricks serving-endpoints http-request` command. + * Changed `databricks serving-endpoints create` command with new required argument order. + * Changed `databricks serving-endpoints get-open-api` command return type to become non-empty. + * Changed `databricks recipients update` command return type to become non-empty. + +OpenAPI commit 0be1b914249781b5e903b7676fd02255755bc851 (2025-01-22) +Dependency updates: + * Bump github.com/databricks/databricks-sdk-go from 0.55.0 to 0.56.1 ([#2238](https://github.com/databricks/cli/pull/2238)). + * Upgrade TF provider to 1.64.1 ([#2247](https://github.com/databricks/cli/pull/2247)). + +## [Release] Release v0.239.1 + +CLI: + * Added text output templates for apps list and list-deployments ([#2175](https://github.com/databricks/cli/pull/2175)). + * Fix duplicate "apps" entry in help output ([#2191](https://github.com/databricks/cli/pull/2191)). + +Bundles: + * Allow yaml-anchors in schema ([#2200](https://github.com/databricks/cli/pull/2200)). + * Show an error when non-yaml files used in include section ([#2201](https://github.com/databricks/cli/pull/2201)). + * Set WorktreeRoot to sync root outside git repo ([#2197](https://github.com/databricks/cli/pull/2197)). + * fix: Detailed message for using source-linked deployment with file_path specified ([#2119](https://github.com/databricks/cli/pull/2119)). + * Allow using variables in enum fields ([#2199](https://github.com/databricks/cli/pull/2199)). + * Add experimental-jobs-as-code template ([#2177](https://github.com/databricks/cli/pull/2177)). + * Reading variables from file ([#2171](https://github.com/databricks/cli/pull/2171)). + * Fixed an apps message order and added output test ([#2174](https://github.com/databricks/cli/pull/2174)). + * Default to forward slash-separated paths for path translation ([#2145](https://github.com/databricks/cli/pull/2145)). + * Include a materialized copy of built-in templates ([#2146](https://github.com/databricks/cli/pull/2146)). + + + +## [Release] Release v0.239.0 + +### New feature announcement + +#### Databricks Apps support + +You can now manage Databricks Apps using DABs by defining an `app` resource in your bundle configuration. +For more information see Databricks documentation https://docs.databricks.com/en/dev-tools/bundles/resources.html#app + +#### Referencing complex variables in complex variables + +You can now reference complex variables within other complex variables. +For more details see https://github.com/databricks/cli/pull/2157 + +CLI: + * Filter out system clusters in cluster picker ([#2131](https://github.com/databricks/cli/pull/2131)). + * Add command line flags for fields that are not in the API request body ([#2155](https://github.com/databricks/cli/pull/2155)). + +Bundles: + * Added support for Databricks Apps in DABs ([#1928](https://github.com/databricks/cli/pull/1928)). + * Allow artifact path to be located outside the sync root ([#2128](https://github.com/databricks/cli/pull/2128)). + * Retry app deployment if there is an active deployment in progress ([#2153](https://github.com/databricks/cli/pull/2153)). + * Resolve variables in a loop ([#2164](https://github.com/databricks/cli/pull/2164)). + * Improve resolution of complex variables within complex variables ([#2157](https://github.com/databricks/cli/pull/2157)). + * Added output message to warn about slower deployments with apps ([#2161](https://github.com/databricks/cli/pull/2161)). + * Patch references to UC schemas to capture dependencies automatically ([#1989](https://github.com/databricks/cli/pull/1989)). + * Format default-python template ([#2110](https://github.com/databricks/cli/pull/2110)). + * Encourage the use of root_path in production to ensure single deployment ([#1712](https://github.com/databricks/cli/pull/1712)). + * Log warnings to stderr for "bundle validate -o json" ([#2109](https://github.com/databricks/cli/pull/2109)). + +API Changes: + * Changed `databricks account federation-policy update` command with new required argument order. + * Changed `databricks account service-principal-federation-policy update` command with new required argument order. + +OpenAPI commit 779817ed8d63031f5ea761fbd25ee84f38feec0d (2025-01-08) +Dependency updates: + * Upgrade TF provider to 1.63.0 ([#2162](https://github.com/databricks/cli/pull/2162)). + * Bump golangci-lint version to v1.63.4 from v1.63.1 ([#2114](https://github.com/databricks/cli/pull/2114)). + * Bump astral-sh/setup-uv from 4 to 5 ([#2116](https://github.com/databricks/cli/pull/2116)). + * Bump golang.org/x/oauth2 from 0.24.0 to 0.25.0 ([#2080](https://github.com/databricks/cli/pull/2080)). + * Bump github.com/hashicorp/hc-install from 0.9.0 to 0.9.1 ([#2079](https://github.com/databricks/cli/pull/2079)). + * Bump golang.org/x/term from 0.27.0 to 0.28.0 ([#2078](https://github.com/databricks/cli/pull/2078)). + * Bump github.com/databricks/databricks-sdk-go from 0.54.0 to 0.55.0 ([#2126](https://github.com/databricks/cli/pull/2126)). + ## [Release] Release v0.238.0 Bundles: diff --git a/Makefile b/Makefile index 2c84d88ba..0c3860e29 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,18 @@ -default: build +default: vendor fmt lint tidy PACKAGES=./acceptance/... ./libs/... ./internal/... ./cmd/... ./bundle/... . GOTESTSUM_FORMAT ?= pkgname-and-test-fails +GOTESTSUM_CMD ?= gotestsum --format ${GOTESTSUM_FORMAT} --no-summary=skipped + lint: golangci-lint run --fix +tidy: + @# not part of golangci-lint, apparently + go mod tidy + lintcheck: golangci-lint run ./... @@ -14,17 +20,26 @@ lintcheck: # formatting/goimports will not be applied by 'make lint'. However, it will be applied by 'make fmt'. # If you need to ensure that formatting & imports are always fixed, do "make fmt lint" fmt: + ruff format -q golangci-lint run --enable-only="gofmt,gofumpt,goimports" --fix ./... test: - gotestsum --format ${GOTESTSUM_FORMAT} --no-summary=skipped -- ${PACKAGES} + ${GOTESTSUM_CMD} -- ${PACKAGES} cover: - gotestsum --format ${GOTESTSUM_FORMAT} --no-summary=skipped -- -coverprofile=coverage.txt ${PACKAGES} + rm -fr ./acceptance/build/cover/ + VERBOSE_TEST=1 CLI_GOCOVERDIR=build/cover ${GOTESTSUM_CMD} -- -coverprofile=coverage.txt ${PACKAGES} + rm -fr ./acceptance/build/cover-merged/ + mkdir -p acceptance/build/cover-merged/ + go tool covdata merge -i $$(printf '%s,' acceptance/build/cover/* | sed 's/,$$//') -o acceptance/build/cover-merged/ + go tool covdata textfmt -i acceptance/build/cover-merged -o coverage-acceptance.txt showcover: go tool cover -html=coverage.txt +acc-showcover: + go tool cover -html=coverage-acceptance.txt + build: vendor go build -mod vendor @@ -33,16 +48,19 @@ snapshot: vendor: go mod vendor - + schema: go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json -INTEGRATION = gotestsum --format github-actions --rerun-fails --jsonfile output.json --packages "./integration/..." -- -parallel 4 -timeout=2h +docs: + go run ./bundle/docsgen ./bundle/internal/schema ./bundle/docsgen -integration: +INTEGRATION = gotestsum --format github-actions --rerun-fails --jsonfile output.json --packages "./acceptance ./integration/..." -- -parallel 4 -timeout=2h + +integration: vendor $(INTEGRATION) -integration-short: - $(INTEGRATION) -short +integration-short: vendor + VERBOSE_TEST=1 $(INTEGRATION) -short -.PHONY: lint lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short +.PHONY: lint tidy lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short acc-cover acc-showcover docs diff --git a/NOTICE b/NOTICE index f6b59e0b0..0b1d2da04 100644 --- a/NOTICE +++ b/NOTICE @@ -105,3 +105,16 @@ License - https://github.com/wI2L/jsondiff/blob/master/LICENSE https://github.com/hexops/gotextdiff Copyright (c) 2009 The Go Authors. All rights reserved. License - https://github.com/hexops/gotextdiff/blob/main/LICENSE + +https://github.com/BurntSushi/toml +Copyright (c) 2013 TOML authors +https://github.com/BurntSushi/toml/blob/master/COPYING + +dario.cat/mergo +Copyright (c) 2013 Dario Castañé. All rights reserved. +Copyright (c) 2012 The Go Authors. All rights reserved. +https://github.com/darccio/mergo/blob/master/LICENSE + +https://github.com/gorilla/mux +Copyright (c) 2023 The Gorilla Authors. All rights reserved. +https://github.com/gorilla/mux/blob/main/LICENSE diff --git a/acceptance/.gitignore b/acceptance/.gitignore new file mode 100644 index 000000000..378eac25d --- /dev/null +++ b/acceptance/.gitignore @@ -0,0 +1 @@ +build diff --git a/acceptance/README.md b/acceptance/README.md index 42a37d253..75ac1d5fc 100644 --- a/acceptance/README.md +++ b/acceptance/README.md @@ -17,3 +17,5 @@ For more complex tests one can also use: - `errcode` helper: if the command fails with non-zero code, it appends `Exit code: N` to the output but returns success to caller (bash), allowing continuation of script. - `trace` helper: prints the arguments before executing the command. - custom output files: redirect output to custom file (it must start with `out`), e.g. `$CLI bundle validate > out.txt 2> out.error.txt`. + +See [selftest](./selftest) for a toy test. diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index b9fb219dc..066a84299 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -1,31 +1,66 @@ package acceptance_test import ( + "context" + "encoding/json" "errors" + "flag" "fmt" "io" + "net/http" "os" "os/exec" "path/filepath" + "regexp" "runtime" "slices" "sort" "strings" "testing" "time" + "unicode/utf8" + + "github.com/google/uuid" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/testdiff" + "github.com/databricks/cli/libs/testserver" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -var KeepTmp = os.Getenv("KEEP_TMP") != "" +var ( + KeepTmp bool + NoRepl bool + VerboseTest bool = os.Getenv("VERBOSE_TEST") != "" +) + +// In order to debug CLI running under acceptance test, set this to full subtest name, e.g. "bundle/variables/empty" +// Then install your breakpoints and click "debug test" near TestAccept in VSCODE. +// example: var SingleTest = "bundle/variables/empty" +var SingleTest = "" + +// If enabled, instead of compiling and running CLI externally, we'll start in-process server that accepts and runs +// CLI commands. The $CLI in test scripts is a helper that just forwards command-line arguments to this server (see bin/callserver.py). +// Also disables parallelism in tests. +var InprocessMode bool + +func init() { + flag.BoolVar(&InprocessMode, "inprocess", SingleTest != "", "Run CLI in the same process as test (for debugging)") + flag.BoolVar(&KeepTmp, "keeptmp", false, "Do not delete TMP directory after run") + flag.BoolVar(&NoRepl, "norepl", false, "Do not apply any replacements (for debugging)") +} const ( EntryPointScript = "script" CleanupScript = "script.cleanup" PrepareScript = "script.prepare" + MaxFileSize = 100_000 + // Filename to save replacements to (used by diff.py) + ReplsFile = "repls.json" ) var Scripts = map[string]bool{ @@ -34,38 +69,122 @@ var Scripts = map[string]bool{ PrepareScript: true, } +var Ignored = map[string]bool{ + ReplsFile: true, +} + func TestAccept(t *testing.T) { + testAccept(t, InprocessMode, SingleTest) +} + +func TestInprocessMode(t *testing.T) { + if InprocessMode { + t.Skip("Already tested by TestAccept") + } + require.Equal(t, 1, testAccept(t, true, "selftest/basic")) + require.Equal(t, 1, testAccept(t, true, "selftest/server")) +} + +func testAccept(t *testing.T, InprocessMode bool, singleTest string) int { + repls := testdiff.ReplacementsContext{} cwd, err := os.Getwd() require.NoError(t, err) - execPath := BuildCLI(t, cwd) - // $CLI is what test scripts are using + buildDir := filepath.Join(cwd, "build", fmt.Sprintf("%s_%s", runtime.GOOS, runtime.GOARCH)) + + // Download terraform and provider and create config; this also creates build directory. + RunCommand(t, []string{"python3", filepath.Join(cwd, "install_terraform.py"), "--targetdir", buildDir}, ".") + + coverDir := os.Getenv("CLI_GOCOVERDIR") + + if coverDir != "" { + require.NoError(t, os.MkdirAll(coverDir, os.ModePerm)) + coverDir, err = filepath.Abs(coverDir) + require.NoError(t, err) + t.Logf("Writing coverage to %s", coverDir) + } + + execPath := "" + + if InprocessMode { + cmdServer := StartCmdServer(t) + t.Setenv("CMD_SERVER_URL", cmdServer.URL) + execPath = filepath.Join(cwd, "bin", "callserver.py") + } else { + execPath = BuildCLI(t, buildDir, coverDir) + } + t.Setenv("CLI", execPath) + repls.SetPath(execPath, "[CLI]") // Make helper scripts available t.Setenv("PATH", fmt.Sprintf("%s%c%s", filepath.Join(cwd, "bin"), os.PathListSeparator, os.Getenv("PATH"))) - server := StartServer(t) - AddHandlers(server) - // Redirect API access to local server: - t.Setenv("DATABRICKS_HOST", fmt.Sprintf("http://127.0.0.1:%d", server.Port)) - t.Setenv("DATABRICKS_TOKEN", "dapi1234") + tempHomeDir := t.TempDir() + repls.SetPath(tempHomeDir, "[TMPHOME]") + t.Logf("$TMPHOME=%v", tempHomeDir) - homeDir := t.TempDir() - // Do not read user's ~/.databrickscfg - t.Setenv(env.HomeEnvVar(), homeDir) + // Make use of uv cache; since we set HomeEnvVar to temporary directory, it is not picked up automatically + uvCache := getUVDefaultCacheDir(t) + t.Setenv("UV_CACHE_DIR", uvCache) - repls := testdiff.ReplacementsContext{} - repls.Set(execPath, "$CLI") + cloudEnv := os.Getenv("CLOUD_ENV") + + if cloudEnv == "" { + defaultServer := testserver.New(t) + AddHandlers(defaultServer) + t.Setenv("DATABRICKS_DEFAULT_HOST", defaultServer.URL) + + homeDir := t.TempDir() + // Do not read user's ~/.databrickscfg + t.Setenv(env.HomeEnvVar(), homeDir) + } + + terraformrcPath := filepath.Join(buildDir, ".terraformrc") + t.Setenv("TF_CLI_CONFIG_FILE", terraformrcPath) + t.Setenv("DATABRICKS_TF_CLI_CONFIG_FILE", terraformrcPath) + repls.SetPath(terraformrcPath, "[DATABRICKS_TF_CLI_CONFIG_FILE]") + + terraformExecPath := filepath.Join(buildDir, "terraform") + if runtime.GOOS == "windows" { + terraformExecPath += ".exe" + } + t.Setenv("DATABRICKS_TF_EXEC_PATH", terraformExecPath) + t.Setenv("TERRAFORM", terraformExecPath) + repls.SetPath(terraformExecPath, "[TERRAFORM]") + + // do it last so that full paths match first: + repls.SetPath(buildDir, "[BUILD_DIR]") + + testdiff.PrepareReplacementsDevVersion(t, &repls) + testdiff.PrepareReplacementSdkVersion(t, &repls) + testdiff.PrepareReplacementsGoVersion(t, &repls) + + repls.SetPath(cwd, "[TESTROOT]") + + repls.Repls = append(repls.Repls, testdiff.Replacement{Old: regexp.MustCompile("dbapi[0-9a-f]+"), New: "[DATABRICKS_TOKEN]"}) testDirs := getTests(t) require.NotEmpty(t, testDirs) + + if singleTest != "" { + testDirs = slices.DeleteFunc(testDirs, func(n string) bool { + return n != singleTest + }) + require.NotEmpty(t, testDirs, "singleTest=%#v did not match any tests\n%#v", singleTest, testDirs) + } + for _, dir := range testDirs { t.Run(dir, func(t *testing.T) { - t.Parallel() - runTest(t, dir, repls) + if !InprocessMode { + t.Parallel() + } + + runTest(t, dir, coverDir, repls.Clone()) }) } + + return len(testDirs) } func getTests(t *testing.T) []string { @@ -78,7 +197,8 @@ func getTests(t *testing.T) []string { name := filepath.Base(path) if name == EntryPointScript { // Presence of 'script' marks a test case in this directory - testDirs = append(testDirs, filepath.Dir(path)) + testName := filepath.ToSlash(filepath.Dir(path)) + testDirs = append(testDirs, testName) } return nil }) @@ -88,7 +208,23 @@ func getTests(t *testing.T) []string { return testDirs } -func runTest(t *testing.T, dir string, repls testdiff.ReplacementsContext) { +func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsContext) { + config, configPath := LoadConfig(t, dir) + + isEnabled, isPresent := config.GOOS[runtime.GOOS] + if isPresent && !isEnabled { + t.Skipf("Disabled via GOOS.%s setting in %s", runtime.GOOS, configPath) + } + + cloudEnv := os.Getenv("CLOUD_ENV") + if !isTruePtr(config.Local) && cloudEnv == "" { + t.Skipf("Disabled via Local setting in %s (CLOUD_ENV=%s)", configPath, cloudEnv) + } + + if !isTruePtr(config.Cloud) && cloudEnv != "" { + t.Skipf("Disabled via Cloud setting in %s (CLOUD_ENV=%s)", configPath, cloudEnv) + } + var tmpDir string var err error if KeepTmp { @@ -101,6 +237,8 @@ func runTest(t *testing.T, dir string, repls testdiff.ReplacementsContext) { tmpDir = t.TempDir() } + repls.SetPathWithParents(tmpDir, "[TMPDIR]") + scriptContents := readMergedScriptContents(t, dir) testutil.WriteFile(t, filepath.Join(tmpDir, EntryPointScript), scriptContents) @@ -111,70 +249,214 @@ func runTest(t *testing.T, dir string, repls testdiff.ReplacementsContext) { args := []string{"bash", "-euo", "pipefail", EntryPointScript} cmd := exec.Command(args[0], args[1:]...) - cmd.Dir = tmpDir - outB, err := cmd.CombinedOutput() + cmd.Env = os.Environ() - out := formatOutput(string(outB), err) - out = repls.Replace(out) - doComparison(t, filepath.Join(dir, "output.txt"), "script output", out) + var workspaceClient *databricks.WorkspaceClient + var user iam.User - for key := range outputs { - if key == "output.txt" { - // handled above - continue - } - pathNew := filepath.Join(tmpDir, key) - newValBytes, err := os.ReadFile(pathNew) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - t.Errorf("%s: expected to find this file but could not (%s)", key, tmpDir) - } else { - t.Errorf("%s: could not read: %s", key, err) + // Start a new server with a custom configuration if the acceptance test + // specifies a custom server stubs. + var server *testserver.Server + + if cloudEnv == "" { + // Start a new server for this test if either: + // 1. A custom server spec is defined in the test configuration. + // 2. The test is configured to record requests and assert on them. We need + // a duplicate of the default server to record requests because the default + // server otherwise is a shared resource. + + databricksLocalHost := os.Getenv("DATABRICKS_DEFAULT_HOST") + + if len(config.Server) > 0 || isTruePtr(config.RecordRequests) { + server = testserver.New(t) + if isTruePtr(config.RecordRequests) { + requestsPath := filepath.Join(tmpDir, "out.requests.txt") + server.RecordRequestsCallback = func(request *testserver.Request) { + req := getLoggedRequest(request, config.IncludeRequestHeaders) + reqJson, err := json.MarshalIndent(req, "", " ") + assert.NoErrorf(t, err, "Failed to indent: %#v", req) + + reqJsonWithRepls := repls.Replace(string(reqJson)) + + f, err := os.OpenFile(requestsPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644) + assert.NoError(t, err) + defer f.Close() + + _, err = f.WriteString(reqJsonWithRepls + "\n") + assert.NoError(t, err) + } } - continue + + // We want later stubs takes precedence, because then leaf configs take precedence over parent directory configs + // In gorilla/mux earlier handlers take precedence, so we need to reverse the order + slices.Reverse(config.Server) + + for _, stub := range config.Server { + require.NotEmpty(t, stub.Pattern) + items := strings.Split(stub.Pattern, " ") + require.Len(t, items, 2) + server.Handle(items[0], items[1], func(req testserver.Request) any { + return stub.Response + }) + } + + // The earliest handlers take precedence, add default handlers last + AddHandlers(server) + databricksLocalHost = server.URL } - pathExpected := filepath.Join(dir, key) - newVal := repls.Replace(string(newValBytes)) - doComparison(t, pathExpected, pathNew, newVal) + + // Each local test should use a new token that will result into a new fake workspace, + // so that test don't interfere with each other. + tokenSuffix := strings.ReplaceAll(uuid.NewString(), "-", "") + config := databricks.Config{ + Host: databricksLocalHost, + Token: "dbapi" + tokenSuffix, + } + workspaceClient, err = databricks.NewWorkspaceClient(&config) + require.NoError(t, err) + + cmd.Env = append(cmd.Env, "DATABRICKS_HOST="+config.Host) + cmd.Env = append(cmd.Env, "DATABRICKS_TOKEN="+config.Token) + + // For the purposes of replacements, use testUser. + // Note, users might have overriden /api/2.0/preview/scim/v2/Me but that should not affect the replacement: + user = testUser + } else { + // Use whatever authentication mechanism is configured by the test runner. + workspaceClient, err = databricks.NewWorkspaceClient(&databricks.Config{}) + require.NoError(t, err) + pUser, err := workspaceClient.CurrentUser.Me(context.Background()) + require.NoError(t, err, "Failed to get current user") + user = *pUser + } + + testdiff.PrepareReplacementsUser(t, &repls, user) + testdiff.PrepareReplacementsWorkspaceClient(t, &repls, workspaceClient) + + // Must be added PrepareReplacementsUser, otherwise conflicts with [USERNAME] + testdiff.PrepareReplacementsUUID(t, &repls) + + // User replacements come last: + repls.Repls = append(repls.Repls, config.Repls...) + + // Save replacements to temp test directory so that it can be read by diff.py + replsJson, err := json.MarshalIndent(repls.Repls, "", " ") + require.NoError(t, err) + testutil.WriteFile(t, filepath.Join(tmpDir, ReplsFile), string(replsJson)) + + if coverDir != "" { + // Creating individual coverage directory for each test, because writing to the same one + // results in sporadic failures like this one (only if tests are running in parallel): + // +error: coverage meta-data emit failed: writing ... rename .../tmp.covmeta.b3f... .../covmeta.b3f2c...: no such file or directory + coverDir = filepath.Join(coverDir, strings.ReplaceAll(dir, string(os.PathSeparator), "--")) + err := os.MkdirAll(coverDir, os.ModePerm) + require.NoError(t, err) + cmd.Env = append(cmd.Env, "GOCOVERDIR="+coverDir) + } + + absDir, err := filepath.Abs(dir) + require.NoError(t, err) + cmd.Env = append(cmd.Env, "TESTDIR="+absDir) + + // Write combined output to a file + out, err := os.Create(filepath.Join(tmpDir, "output.txt")) + require.NoError(t, err) + cmd.Stdout = out + cmd.Stderr = out + cmd.Dir = tmpDir + err = cmd.Run() + + // Include exit code in output (if non-zero) + formatOutput(out, err) + require.NoError(t, out.Close()) + + printedRepls := false + + // Compare expected outputs + for relPath := range outputs { + doComparison(t, repls, dir, tmpDir, relPath, &printedRepls) } // Make sure there are not unaccounted for new files - files, err := os.ReadDir(tmpDir) - require.NoError(t, err) - - for _, f := range files { - name := f.Name() - if _, ok := inputs[name]; ok { + files := ListDir(t, tmpDir) + unexpected := []string{} + for _, relPath := range files { + if _, ok := inputs[relPath]; ok { continue } - if _, ok := outputs[name]; ok { + if _, ok := outputs[relPath]; ok { continue } - t.Errorf("Unexpected output: %s", f) - if strings.HasPrefix(name, "out") { + if _, ok := Ignored[relPath]; ok { + continue + } + unexpected = append(unexpected, relPath) + if strings.HasPrefix(relPath, "out") { // We have a new file starting with "out" // Show the contents & support overwrite mode for it: - pathNew := filepath.Join(tmpDir, name) - newVal := testutil.ReadFile(t, pathNew) - newVal = repls.Replace(newVal) - doComparison(t, filepath.Join(dir, name), filepath.Join(tmpDir, name), newVal) + doComparison(t, repls, dir, tmpDir, relPath, &printedRepls) } } + + if len(unexpected) > 0 { + t.Error("Test produced unexpected files:\n" + strings.Join(unexpected, "\n")) + } } -func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) { - valueNew = testdiff.NormalizeNewlines(valueNew) - valueExpected := string(readIfExists(t, pathExpected)) - valueExpected = testdiff.NormalizeNewlines(valueExpected) - testdiff.AssertEqualTexts(t, pathExpected, pathNew, valueExpected, valueNew) - if testdiff.OverwriteMode { - if valueNew != "" { - t.Logf("Overwriting: %s", pathExpected) - testutil.WriteFile(t, pathExpected, valueNew) - } else { - t.Logf("Removing: %s", pathExpected) - _ = os.Remove(pathExpected) +func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirNew, relPath string, printedRepls *bool) { + pathRef := filepath.Join(dirRef, relPath) + pathNew := filepath.Join(dirNew, relPath) + bufRef, okRef := tryReading(t, pathRef) + bufNew, okNew := tryReading(t, pathNew) + if !okRef && !okNew { + t.Errorf("Both files are missing or have errors: %s\npathRef: %s\npathNew: %s", relPath, pathRef, pathNew) + return + } + + valueRef := testdiff.NormalizeNewlines(bufRef) + valueNew := testdiff.NormalizeNewlines(bufNew) + + // Apply replacements to the new value only. + // The reference value is stored after applying replacements. + if !NoRepl { + valueNew = repls.Replace(valueNew) + } + + // The test did not produce an expected output file. + if okRef && !okNew { + t.Errorf("Missing output file: %s", relPath) + if testdiff.OverwriteMode { + t.Logf("Removing output file: %s", relPath) + require.NoError(t, os.Remove(pathRef)) } + return + } + + // The test produced an unexpected output file. + if !okRef && okNew { + t.Errorf("Unexpected output file: %s\npathRef: %s\npathNew: %s", relPath, pathRef, pathNew) + testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if testdiff.OverwriteMode { + t.Logf("Writing output file: %s", relPath) + testutil.WriteFile(t, pathRef, valueNew) + } + return + } + + // Compare the reference and new values. + equal := testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if !equal && testdiff.OverwriteMode { + t.Logf("Overwriting existing output file: %s", relPath) + testutil.WriteFile(t, pathRef, valueNew) + } + + if VerboseTest && !equal && printedRepls != nil && !*printedRepls { + *printedRepls = true + var items []string + for _, item := range repls.Repls { + items = append(items, fmt.Sprintf("REPL %s => %s", item.Old, item.New)) + } + t.Log("Available replacements:\n" + strings.Join(items, "\n")) } } @@ -182,18 +464,23 @@ func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) { // Note, cleanups are not executed if main script fails; that's not a huge issue, since it runs it temp dir. func readMergedScriptContents(t *testing.T, dir string) string { scriptContents := testutil.ReadFile(t, filepath.Join(dir, EntryPointScript)) + + // Wrap script contents in a subshell such that changing the working + // directory only affects the main script and not cleanup. + scriptContents = "(\n" + scriptContents + ")\n" + prepares := []string{} cleanups := []string{} for { - x := readIfExists(t, filepath.Join(dir, CleanupScript)) - if len(x) > 0 { - cleanups = append(cleanups, string(x)) + x, ok := tryReading(t, filepath.Join(dir, CleanupScript)) + if ok { + cleanups = append(cleanups, x) } - x = readIfExists(t, filepath.Join(dir, PrepareScript)) - if len(x) > 0 { - prepares = append(prepares, string(x)) + x, ok = tryReading(t, filepath.Join(dir, PrepareScript)) + if ok { + prepares = append(prepares, x) } if dir == "" || dir == "." { @@ -210,28 +497,30 @@ func readMergedScriptContents(t *testing.T, dir string) string { return strings.Join(prepares, "\n") } -func BuildCLI(t *testing.T, cwd string) string { - execPath := filepath.Join(cwd, "build", "databricks") +func BuildCLI(t *testing.T, buildDir, coverDir string) string { + execPath := filepath.Join(buildDir, "databricks") if runtime.GOOS == "windows" { execPath += ".exe" } - start := time.Now() - args := []string{"go", "build", "-mod", "vendor", "-o", execPath} - cmd := exec.Command(args[0], args[1:]...) - cmd.Dir = ".." - out, err := cmd.CombinedOutput() - elapsed := time.Since(start) - t.Logf("%s took %s", args, elapsed) - require.NoError(t, err, "go build failed: %s: %s\n%s", args, err, out) - if len(out) > 0 { - t.Logf("go build output: %s: %s", args, out) + args := []string{ + "go", "build", + "-mod", "vendor", + "-o", execPath, } - // Quick check + warm up cache: - cmd = exec.Command(execPath, "--version") - out, err = cmd.CombinedOutput() - require.NoError(t, err, "%s --version failed: %s\n%s", execPath, err, out) + if coverDir != "" { + args = append(args, "-cover") + } + + if runtime.GOOS == "windows" { + // Get this error on my local Windows: + // error obtaining VCS status: exit status 128 + // Use -buildvcs=false to disable VCS stamping. + args = append(args, "-buildvcs=false") + } + + RunCommand(t, args, "..") return execPath } @@ -252,29 +541,45 @@ func copyFile(src, dst string) error { return err } -func formatOutput(out string, err error) string { +func formatOutput(w io.Writer, err error) { if err == nil { - return out + return } if exiterr, ok := err.(*exec.ExitError); ok { exitCode := exiterr.ExitCode() - out += fmt.Sprintf("\nExit code: %d\n", exitCode) + fmt.Fprintf(w, "\nExit code: %d\n", exitCode) } else { - out += fmt.Sprintf("\nError: %s\n", err) + fmt.Fprintf(w, "\nError: %s\n", err) } - return out } -func readIfExists(t *testing.T, path string) []byte { - data, err := os.ReadFile(path) - if err == nil { - return data +func tryReading(t *testing.T, path string) (string, bool) { + info, err := os.Stat(path) + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + t.Errorf("%s: %s", path, err) + } + return "", false } - if !errors.Is(err, os.ErrNotExist) { - t.Fatalf("%s: %s", path, err) + if info.Size() > MaxFileSize { + t.Errorf("%s: ignoring, too large: %d", path, info.Size()) + return "", false } - return []byte{} + + data, err := os.ReadFile(path) + if err != nil { + // already checked ErrNotExist above + t.Errorf("%s: %s", path, err) + return "", false + } + + if !utf8.Valid(data) { + t.Errorf("%s: not valid utf-8", path) + return "", false + } + + return string(data), true } func CopyDir(src, dst string, inputs, outputs map[string]bool) error { @@ -289,8 +594,10 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error { return err } - if strings.HasPrefix(name, "out") { - outputs[relPath] = true + if strings.HasPrefix(relPath, "out") { + if !info.IsDir() { + outputs[relPath] = true + } return nil } else { inputs[relPath] = true @@ -309,3 +616,98 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error { return copyFile(path, destPath) }) } + +func ListDir(t *testing.T, src string) []string { + var files []string + err := filepath.Walk(src, func(path string, info os.FileInfo, err error) error { + if err != nil { + // Do not FailNow here. + // The output comparison is happening after this call which includes output.txt which + // includes errors printed by commands which include explanation why a given file cannot be read. + t.Errorf("Error when listing %s: path=%s: %s", src, path, err) + return nil + } + + if info.IsDir() { + return nil + } + + relPath, err := filepath.Rel(src, path) + if err != nil { + return err + } + + files = append(files, relPath) + return nil + }) + if err != nil { + t.Errorf("Failed to list %s: %s", src, err) + } + return files +} + +func getUVDefaultCacheDir(t *testing.T) string { + // According to uv docs https://docs.astral.sh/uv/concepts/cache/#caching-in-continuous-integration + // the default cache directory is + // "A system-appropriate cache directory, e.g., $XDG_CACHE_HOME/uv or $HOME/.cache/uv on Unix and %LOCALAPPDATA%\uv\cache on Windows" + cacheDir, err := os.UserCacheDir() + require.NoError(t, err) + if runtime.GOOS == "windows" { + return cacheDir + "\\uv\\cache" + } else { + return cacheDir + "/uv" + } +} + +func RunCommand(t *testing.T, args []string, dir string) { + start := time.Now() + cmd := exec.Command(args[0], args[1:]...) + cmd.Dir = dir + out, err := cmd.CombinedOutput() + elapsed := time.Since(start) + t.Logf("%s took %s", args, elapsed) + + require.NoError(t, err, "%s failed: %s\n%s", args, err, out) + if len(out) > 0 { + t.Logf("%s output: %s", args, out) + } +} + +type LoggedRequest struct { + Headers http.Header `json:"headers,omitempty"` + Method string `json:"method"` + Path string `json:"path"` + Body any `json:"body,omitempty"` + RawBody string `json:"raw_body,omitempty"` +} + +func getLoggedRequest(req *testserver.Request, includedHeaders []string) LoggedRequest { + result := LoggedRequest{ + Method: req.Method, + Path: req.URL.Path, + Headers: filterHeaders(req.Headers, includedHeaders), + } + + if json.Valid(req.Body) { + result.Body = json.RawMessage(req.Body) + } else { + result.RawBody = string(req.Body) + } + + return result +} + +func filterHeaders(h http.Header, includedHeaders []string) http.Header { + headers := make(http.Header) + for k, v := range h { + if !slices.Contains(includedHeaders, k) { + continue + } + headers[k] = v + } + return headers +} + +func isTruePtr(value *bool) bool { + return value != nil && *value +} diff --git a/acceptance/auth/bundle_and_profile/.databrickscfg b/acceptance/auth/bundle_and_profile/.databrickscfg new file mode 100644 index 000000000..628505286 --- /dev/null +++ b/acceptance/auth/bundle_and_profile/.databrickscfg @@ -0,0 +1,5 @@ +[DEFAULT] +host = $DATABRICKS_HOST + +[profile_name] +host = https://test@non-existing-subdomain.databricks.com diff --git a/acceptance/auth/bundle_and_profile/databricks.yml b/acceptance/auth/bundle_and_profile/databricks.yml new file mode 100644 index 000000000..975661395 --- /dev/null +++ b/acceptance/auth/bundle_and_profile/databricks.yml @@ -0,0 +1,14 @@ +bundle: + name: test-auth + +workspace: + host: $DATABRICKS_HOST + +targets: + dev: + default: true + workspace: + host: $DATABRICKS_HOST + prod: + workspace: + host: https://bar.com diff --git a/acceptance/auth/bundle_and_profile/output.txt b/acceptance/auth/bundle_and_profile/output.txt new file mode 100644 index 000000000..f32d5ba22 --- /dev/null +++ b/acceptance/auth/bundle_and_profile/output.txt @@ -0,0 +1,91 @@ + +=== Inside the bundle, no flags +>>> errcode [CLI] current-user me +"[USERNAME]" + +=== Inside the bundle, target flags +>>> errcode [CLI] current-user me -t dev +"[USERNAME]" + +=== Inside the bundle, target and matching profile +>>> errcode [CLI] current-user me -t dev -p DEFAULT +"[USERNAME]" + +=== Inside the bundle, profile flag not matching bundle host. Should use profile from the flag and not the bundle. +>>> errcode [CLI] current-user me -p profile_name +Error: Get "https://non-existing-subdomain.databricks.com/api/2.0/preview/scim/v2/Me": (redacted) + +Exit code: 1 + +=== Inside the bundle, target and not matching profile +>>> errcode [CLI] current-user me -t dev -p profile_name +Error: cannot resolve bundle auth configuration: config host mismatch: profile uses host https://non-existing-subdomain.databricks.com, but CLI configured to use [DATABRICKS_TARGET] + +Exit code: 1 + +=== Bundle commands load bundle configuration when no flags, validation OK +>>> errcode [CLI] bundle validate +Name: test-auth +Target: dev +Workspace: + Host: [DATABRICKS_TARGET] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-auth/dev + +Validation OK! + +=== Bundle commands load bundle configuration with -t flag, validation OK +>>> errcode [CLI] bundle validate -t dev +Name: test-auth +Target: dev +Workspace: + Host: [DATABRICKS_TARGET] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-auth/dev + +Validation OK! + +=== Bundle commands load bundle configuration with -p flag, validation not OK (profile host don't match bundle host) +>>> errcode [CLI] bundle validate -p profile_name +Error: cannot resolve bundle auth configuration: config host mismatch: profile uses host https://non-existing-subdomain.databricks.com, but CLI configured to use [DATABRICKS_TARGET] + +Name: test-auth +Target: dev +Workspace: + Host: [DATABRICKS_TARGET] + +Found 1 error + +Exit code: 1 + +=== Bundle commands load bundle configuration with -t and -p flag, validation OK (profile host match bundle host) +>>> errcode [CLI] bundle validate -t dev -p DEFAULT +Name: test-auth +Target: dev +Workspace: + Host: [DATABRICKS_TARGET] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-auth/dev + +Validation OK! + +=== Bundle commands load bundle configuration with -t and -p flag, validation not OK (profile host don't match bundle host) +>>> errcode [CLI] bundle validate -t prod -p DEFAULT +Error: cannot resolve bundle auth configuration: config host mismatch: profile uses host [DATABRICKS_TARGET], but CLI configured to use https://bar.com + +Name: test-auth +Target: prod +Workspace: + Host: https://bar.com + +Found 1 error + +Exit code: 1 + +=== Outside the bundle, no flags +>>> errcode [CLI] current-user me +"[USERNAME]" + +=== Outside the bundle, profile flag +>>> errcode [CLI] current-user me -p profile_name +"[USERNAME]" diff --git a/acceptance/auth/bundle_and_profile/script b/acceptance/auth/bundle_and_profile/script new file mode 100644 index 000000000..c078d5316 --- /dev/null +++ b/acceptance/auth/bundle_and_profile/script @@ -0,0 +1,45 @@ +# Replace placeholder with an actual host URL +envsubst < databricks.yml > out.yml && mv out.yml databricks.yml +envsubst < .databrickscfg > out && mv out .databrickscfg +export DATABRICKS_CONFIG_FILE=.databrickscfg + +host=$DATABRICKS_HOST +unset DATABRICKS_HOST + +title "Inside the bundle, no flags" +trace errcode $CLI current-user me | jq .userName + +title "Inside the bundle, target flags" +trace errcode $CLI current-user me -t dev | jq .userName + +title "Inside the bundle, target and matching profile" +trace errcode $CLI current-user me -t dev -p DEFAULT | jq .userName + +title "Inside the bundle, profile flag not matching bundle host. Should use profile from the flag and not the bundle." +trace errcode $CLI current-user me -p profile_name | jq .userName + +title "Inside the bundle, target and not matching profile" +trace errcode $CLI current-user me -t dev -p profile_name + +title "Bundle commands load bundle configuration when no flags, validation OK" +trace errcode $CLI bundle validate + +title "Bundle commands load bundle configuration with -t flag, validation OK" +trace errcode $CLI bundle validate -t dev + +title "Bundle commands load bundle configuration with -p flag, validation not OK (profile host don't match bundle host)" +trace errcode $CLI bundle validate -p profile_name + +title "Bundle commands load bundle configuration with -t and -p flag, validation OK (profile host match bundle host)" +trace errcode $CLI bundle validate -t dev -p DEFAULT + +title "Bundle commands load bundle configuration with -t and -p flag, validation not OK (profile host don't match bundle host)" +trace errcode $CLI bundle validate -t prod -p DEFAULT + +cd .. +export DATABRICKS_HOST=$host +title "Outside the bundle, no flags" +trace errcode $CLI current-user me | jq .userName + +title "Outside the bundle, profile flag" +trace errcode $CLI current-user me -p profile_name | jq .userName diff --git a/acceptance/auth/bundle_and_profile/test.toml b/acceptance/auth/bundle_and_profile/test.toml new file mode 100644 index 000000000..697281ee5 --- /dev/null +++ b/acceptance/auth/bundle_and_profile/test.toml @@ -0,0 +1,14 @@ +# Some of the clouds have DATABRICKS_HOST variable setup without https:// prefix +# In the result, output is replaced with DATABRICKS_URL variable instead of DATABRICKS_HOST +# This is a workaround to replace DATABRICKS_URL with DATABRICKS_HOST +[[Repls]] +Old='DATABRICKS_HOST' +New='DATABRICKS_TARGET' + +[[Repls]] +Old='DATABRICKS_URL' +New='DATABRICKS_TARGET' + +[[Repls]] +Old='Get "https://non-existing-subdomain.databricks.com/api/2.0/preview/scim/v2/Me": .*' +New='Get "https://non-existing-subdomain.databricks.com/api/2.0/preview/scim/v2/Me": (redacted)' diff --git a/acceptance/auth/credentials/basic/out.requests.txt b/acceptance/auth/credentials/basic/out.requests.txt new file mode 100644 index 000000000..b549c7423 --- /dev/null +++ b/acceptance/auth/credentials/basic/out.requests.txt @@ -0,0 +1,12 @@ +{ + "headers": { + "Authorization": [ + "Basic [ENCODED_AUTH]" + ], + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/current-user_me cmd-exec-id/[UUID] auth/basic" + ] + }, + "method": "GET", + "path": "/api/2.0/preview/scim/v2/Me" +} diff --git a/acceptance/auth/credentials/basic/output.txt b/acceptance/auth/credentials/basic/output.txt new file mode 100644 index 000000000..c5747c9e4 --- /dev/null +++ b/acceptance/auth/credentials/basic/output.txt @@ -0,0 +1,4 @@ +{ + "id":"[USERID]", + "userName":"[USERNAME]" +} diff --git a/acceptance/auth/credentials/basic/script b/acceptance/auth/credentials/basic/script new file mode 100644 index 000000000..aae249083 --- /dev/null +++ b/acceptance/auth/credentials/basic/script @@ -0,0 +1,8 @@ +# Unset the token which is configured by default +# in acceptance tests +export DATABRICKS_TOKEN="" + +export DATABRICKS_USERNAME=username +export DATABRICKS_PASSWORD=password + +$CLI current-user me diff --git a/acceptance/auth/credentials/basic/test.toml b/acceptance/auth/credentials/basic/test.toml new file mode 100644 index 000000000..4998d81d7 --- /dev/null +++ b/acceptance/auth/credentials/basic/test.toml @@ -0,0 +1,4 @@ +# "username:password" in base64 is dXNlcm5hbWU6cGFzc3dvcmQ=, expect to see this in Authorization header +[[Repls]] +Old = "dXNlcm5hbWU6cGFzc3dvcmQ=" +New = "[ENCODED_AUTH]" diff --git a/acceptance/auth/credentials/oauth/out.requests.txt b/acceptance/auth/credentials/oauth/out.requests.txt new file mode 100644 index 000000000..525e148d8 --- /dev/null +++ b/acceptance/auth/credentials/oauth/out.requests.txt @@ -0,0 +1,34 @@ +{ + "headers": { + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS]" + ] + }, + "method": "GET", + "path": "/oidc/.well-known/oauth-authorization-server" +} +{ + "headers": { + "Authorization": [ + "Basic [ENCODED_AUTH]" + ], + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS]" + ] + }, + "method": "POST", + "path": "/oidc/v1/token", + "raw_body": "grant_type=client_credentials\u0026scope=all-apis" +} +{ + "headers": { + "Authorization": [ + "Bearer oauth-token" + ], + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/current-user_me cmd-exec-id/[UUID] auth/oauth-m2m" + ] + }, + "method": "GET", + "path": "/api/2.0/preview/scim/v2/Me" +} diff --git a/acceptance/auth/credentials/oauth/output.txt b/acceptance/auth/credentials/oauth/output.txt new file mode 100644 index 000000000..c5747c9e4 --- /dev/null +++ b/acceptance/auth/credentials/oauth/output.txt @@ -0,0 +1,4 @@ +{ + "id":"[USERID]", + "userName":"[USERNAME]" +} diff --git a/acceptance/auth/credentials/oauth/script b/acceptance/auth/credentials/oauth/script new file mode 100644 index 000000000..e4519e41b --- /dev/null +++ b/acceptance/auth/credentials/oauth/script @@ -0,0 +1,8 @@ +# Unset the token which is configured by default +# in acceptance tests +export DATABRICKS_TOKEN="" + +export DATABRICKS_CLIENT_ID=client_id +export DATABRICKS_CLIENT_SECRET=client_secret + +$CLI current-user me diff --git a/acceptance/auth/credentials/oauth/test.toml b/acceptance/auth/credentials/oauth/test.toml new file mode 100644 index 000000000..2adade96a --- /dev/null +++ b/acceptance/auth/credentials/oauth/test.toml @@ -0,0 +1,5 @@ +# "client_id:client_secret" in base64 is Y2xpZW50X2lkOmNsaWVudF9zZWNyZXQ=, expect to +# see this in Authorization header +[[Repls]] +Old = "Y2xpZW50X2lkOmNsaWVudF9zZWNyZXQ=" +New = "[ENCODED_AUTH]" diff --git a/acceptance/auth/credentials/pat/out.requests.txt b/acceptance/auth/credentials/pat/out.requests.txt new file mode 100644 index 000000000..73c448c2f --- /dev/null +++ b/acceptance/auth/credentials/pat/out.requests.txt @@ -0,0 +1,12 @@ +{ + "headers": { + "Authorization": [ + "Bearer dapi1234" + ], + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/current-user_me cmd-exec-id/[UUID] auth/pat" + ] + }, + "method": "GET", + "path": "/api/2.0/preview/scim/v2/Me" +} diff --git a/acceptance/auth/credentials/pat/output.txt b/acceptance/auth/credentials/pat/output.txt new file mode 100644 index 000000000..c5747c9e4 --- /dev/null +++ b/acceptance/auth/credentials/pat/output.txt @@ -0,0 +1,4 @@ +{ + "id":"[USERID]", + "userName":"[USERNAME]" +} diff --git a/acceptance/auth/credentials/pat/script b/acceptance/auth/credentials/pat/script new file mode 100644 index 000000000..ccf1098e7 --- /dev/null +++ b/acceptance/auth/credentials/pat/script @@ -0,0 +1,3 @@ +export DATABRICKS_TOKEN=dapi1234 + +$CLI current-user me diff --git a/acceptance/auth/credentials/test.toml b/acceptance/auth/credentials/test.toml new file mode 100644 index 000000000..dc775ea62 --- /dev/null +++ b/acceptance/auth/credentials/test.toml @@ -0,0 +1,18 @@ +RecordRequests = true +IncludeRequestHeaders = ["Authorization", "User-Agent"] + +[[Repls]] +Old = '(linux|darwin|windows)' +New = '[OS]' + +[[Repls]] +Old = " upstream/[A-Za-z0-9.-]+" +New = "" + +[[Repls]] +Old = " upstream-version/[A-Za-z0-9.-]+" +New = "" + +[[Repls]] +Old = " cicd/[A-Za-z0-9.-]+" +New = "" diff --git a/acceptance/bin/callserver.py b/acceptance/bin/callserver.py new file mode 100755 index 000000000..294ef8fdb --- /dev/null +++ b/acceptance/bin/callserver.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +import sys +import os +import json +import urllib.request +from urllib.parse import urlencode + +env = {} +for key, value in os.environ.items(): + if len(value) > 10_000: + sys.stderr.write(f"Dropping key={key} value len={len(value)}\n") + continue + env[key] = value + +q = { + "args": " ".join(sys.argv[1:]), + "cwd": os.getcwd(), + "env": json.dumps(env), +} + +url = os.environ["CMD_SERVER_URL"] + "/?" + urlencode(q) +if len(url) > 100_000: + sys.exit("url too large") + +resp = urllib.request.urlopen(url) +assert resp.status == 200, (resp.status, resp.url, resp.headers) +result = json.load(resp) +sys.stderr.write(result["stderr"]) +sys.stdout.write(result["stdout"]) +exitcode = int(result["exitcode"]) +sys.exit(exitcode) diff --git a/acceptance/bin/diff.py b/acceptance/bin/diff.py new file mode 100755 index 000000000..c1b59655a --- /dev/null +++ b/acceptance/bin/diff.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +"""This script implements "diff -r -U2 dir1 dir2" but applies replacements first""" + +import sys +import difflib +import json +import re +from pathlib import Path + + +def replaceAll(patterns, s): + for comp, new in patterns: + s = comp.sub(new, s) + return s + + +def main(): + d1, d2 = sys.argv[1:] + d1, d2 = Path(d1), Path(d2) + + with open("repls.json") as f: + repls = json.load(f) + + patterns = [] + for r in repls: + try: + c = re.compile(r["Old"]) + patterns.append((c, r["New"])) + except re.error as e: + print(f"Regex error for pattern {r}: {e}", file=sys.stderr) + + files1 = [str(p.relative_to(d1)) for p in d1.rglob("*") if p.is_file()] + files2 = [str(p.relative_to(d2)) for p in d2.rglob("*") if p.is_file()] + + set1 = set(files1) + set2 = set(files2) + + for f in sorted(set1 | set2): + p1 = d1 / f + p2 = d2 / f + if f not in set2: + print(f"Only in {d1}: {f}") + elif f not in set1: + print(f"Only in {d2}: {f}") + else: + a = replaceAll(patterns, p1.read_text()).splitlines(True) + b = replaceAll(patterns, p2.read_text()).splitlines(True) + if a != b: + p1_str = p1.as_posix() + p2_str = p2.as_posix() + for line in difflib.unified_diff(a, b, p1_str, p2_str, "", "", 2): + print(line, end="") + + +if __name__ == "__main__": + main() diff --git a/acceptance/bin/sort_blocks.py b/acceptance/bin/sort_blocks.py index f50c6f50f..d558f252a 100755 --- a/acceptance/bin/sort_blocks.py +++ b/acceptance/bin/sort_blocks.py @@ -4,6 +4,7 @@ Helper to sort blocks in text file. A block is a set of lines separated from oth This is to workaround non-determinism in the output. """ + import sys blocks = [] @@ -11,10 +12,10 @@ blocks = [] for line in sys.stdin: if not line.strip(): if blocks and blocks[-1]: - blocks.append('') + blocks.append("") continue if not blocks: - blocks.append('') + blocks.append("") blocks[-1] += line blocks.sort() diff --git a/acceptance/bin/sort_lines.py b/acceptance/bin/sort_lines.py new file mode 100755 index 000000000..9ac87feee --- /dev/null +++ b/acceptance/bin/sort_lines.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 +""" +Helper to sort lines in text file. Similar to 'sort' but no dependence on locale or presence of 'sort' in PATH. +""" + +import sys + +lines = sys.stdin.readlines() +lines.sort() +sys.stdout.write("".join(lines)) diff --git a/acceptance/build/.gitignore b/acceptance/build/.gitignore deleted file mode 100644 index a48b4db25..000000000 --- a/acceptance/build/.gitignore +++ /dev/null @@ -1 +0,0 @@ -databricks diff --git a/acceptance/bundle/debug/databricks.yml b/acceptance/bundle/debug/databricks.yml new file mode 100644 index 000000000..2c9dd3c90 --- /dev/null +++ b/acceptance/bundle/debug/databricks.yml @@ -0,0 +1,2 @@ +bundle: + name: debug diff --git a/acceptance/bundle/debug/out.stderr.parallel.txt b/acceptance/bundle/debug/out.stderr.parallel.txt new file mode 100644 index 000000000..13c81c511 --- /dev/null +++ b/acceptance/bundle/debug/out.stderr.parallel.txt @@ -0,0 +1,15 @@ +10:07:59 Debug: ApplyReadOnly pid=12345 mutator=validate mutator (read-only)=parallel +10:07:59 Debug: ApplyReadOnly pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=fast_validate(readonly) +10:07:59 Debug: ApplyReadOnly pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=fast_validate(readonly) mutator (read-only)=parallel +10:07:59 Debug: ApplyReadOnly pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=fast_validate(readonly) mutator (read-only)=parallel mutator (read-only)=validate:SingleNodeCluster +10:07:59 Debug: ApplyReadOnly pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=fast_validate(readonly) mutator (read-only)=parallel mutator (read-only)=validate:artifact_paths +10:07:59 Debug: ApplyReadOnly pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=fast_validate(readonly) mutator (read-only)=parallel mutator (read-only)=validate:job_cluster_key_defined +10:07:59 Debug: ApplyReadOnly pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=fast_validate(readonly) mutator (read-only)=parallel mutator (read-only)=validate:job_task_cluster_spec +10:07:59 Debug: ApplyReadOnly pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=validate:files_to_sync +10:07:59 Debug: ApplyReadOnly pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=validate:folder_permissions +10:07:59 Debug: ApplyReadOnly pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=validate:validate_sync_patterns +10:07:59 Debug: Path /Workspace/Users/[USERNAME]/.bundle/debug/default/files has type directory (ID: 0) pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=validate:files_to_sync +10:07:59 Debug: non-retriable error: Workspace path not found pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=validate:files_to_sync sdk=true +< HTTP/0.0 000 OK pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=validate:files_to_sync sdk=true +< } pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=validate:files_to_sync sdk=true +< } pid=12345 mutator=validate mutator (read-only)=parallel mutator (read-only)=validate:files_to_sync sdk=true diff --git a/acceptance/bundle/debug/out.stderr.txt b/acceptance/bundle/debug/out.stderr.txt new file mode 100644 index 000000000..e763cb6ff --- /dev/null +++ b/acceptance/bundle/debug/out.stderr.txt @@ -0,0 +1,88 @@ +10:07:59 Info: start pid=12345 version=[DEV_VERSION] args="[CLI], bundle, validate, --debug" +10:07:59 Debug: Found bundle root at [TMPDIR] (file [TMPDIR]/databricks.yml) pid=12345 +10:07:59 Info: Phase: load pid=12345 +10:07:59 Debug: Apply pid=12345 mutator=EntryPoint +10:07:59 Debug: Apply pid=12345 mutator=scripts.preinit +10:07:59 Debug: No script defined for preinit, skipping pid=12345 mutator=scripts.preinit +10:07:59 Debug: Apply pid=12345 mutator=ProcessRootIncludes +10:07:59 Debug: Apply pid=12345 mutator=VerifyCliVersion +10:07:59 Debug: Apply pid=12345 mutator=EnvironmentsToTargets +10:07:59 Debug: Apply pid=12345 mutator=ComputeIdToClusterId +10:07:59 Debug: Apply pid=12345 mutator=InitializeVariables +10:07:59 Debug: Apply pid=12345 mutator=DefineDefaultTarget(default) +10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load) +10:07:59 Debug: Apply pid=12345 mutator=validate:unique_resource_keys +10:07:59 Debug: Apply pid=12345 mutator=SelectDefaultTarget +10:07:59 Debug: Apply pid=12345 mutator=SelectDefaultTarget mutator=SelectTarget(default) +10:07:59 Debug: Apply pid=12345 mutator= +10:07:59 Info: Phase: initialize pid=12345 +10:07:59 Debug: Apply pid=12345 mutator=validate:AllResourcesHaveValues +10:07:59 Debug: Apply pid=12345 mutator=RewriteSyncPaths +10:07:59 Debug: Apply pid=12345 mutator=SyncDefaultPath +10:07:59 Debug: Apply pid=12345 mutator=SyncInferRoot +10:07:59 Debug: Apply pid=12345 mutator=PopulateCurrentUser +10:07:59 Debug: GET /api/2.0/preview/scim/v2/Me +< HTTP/1.1 200 OK +< { +< "id": "[USERID]", +< "userName": "[USERNAME]" +< } pid=12345 mutator=PopulateCurrentUser sdk=true +10:07:59 Debug: Apply pid=12345 mutator=LoadGitDetails +10:07:59 Debug: Apply pid=12345 mutator=ApplySourceLinkedDeploymentPreset +10:07:59 Debug: Apply pid=12345 mutator=DefineDefaultWorkspaceRoot +10:07:59 Debug: Apply pid=12345 mutator=ExpandWorkspaceRoot +10:07:59 Debug: Apply pid=12345 mutator=DefaultWorkspacePaths +10:07:59 Debug: Apply pid=12345 mutator=PrependWorkspacePrefix +10:07:59 Debug: Apply pid=12345 mutator=RewriteWorkspacePrefix +10:07:59 Debug: Apply pid=12345 mutator=SetVariables +10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(init) +10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(load_resources) +10:07:59 Debug: Apply pid=12345 mutator=PythonMutator(apply_mutators) +10:07:59 Debug: Apply pid=12345 mutator=ResolveVariableReferences +10:07:59 Debug: Apply pid=12345 mutator=ResolveResourceReferences +10:07:59 Debug: Apply pid=12345 mutator=ResolveVariableReferences +10:07:59 Debug: Apply pid=12345 mutator=MergeJobClusters +10:07:59 Debug: Apply pid=12345 mutator=MergeJobParameters +10:07:59 Debug: Apply pid=12345 mutator=MergeJobTasks +10:07:59 Debug: Apply pid=12345 mutator=MergePipelineClusters +10:07:59 Debug: Apply pid=12345 mutator=MergeApps +10:07:59 Debug: Apply pid=12345 mutator=CaptureSchemaDependency +10:07:59 Debug: Apply pid=12345 mutator=CheckPermissions +10:07:59 Debug: Apply pid=12345 mutator=SetRunAs +10:07:59 Debug: Apply pid=12345 mutator=OverrideCompute +10:07:59 Debug: Apply pid=12345 mutator=ConfigureDashboardDefaults +10:07:59 Debug: Apply pid=12345 mutator=ConfigureVolumeDefaults +10:07:59 Debug: Apply pid=12345 mutator=ProcessTargetMode +10:07:59 Debug: Apply pid=12345 mutator=ApplyPresets +10:07:59 Debug: Apply pid=12345 mutator=DefaultQueueing +10:07:59 Debug: Apply pid=12345 mutator=ExpandPipelineGlobPaths +10:07:59 Debug: Apply pid=12345 mutator=ConfigureWSFS +10:07:59 Debug: Apply pid=12345 mutator=TranslatePaths +10:07:59 Debug: Apply pid=12345 mutator=PythonWrapperWarning +10:07:59 Debug: Apply pid=12345 mutator=apps.Validate +10:07:59 Debug: Apply pid=12345 mutator=ValidateSharedRootPermissions +10:07:59 Debug: Apply pid=12345 mutator=ApplyBundlePermissions +10:07:59 Debug: Apply pid=12345 mutator=FilterCurrentUserFromPermissions +10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotateJobs +10:07:59 Debug: Apply pid=12345 mutator=metadata.AnnotatePipelines +10:07:59 Debug: Apply pid=12345 mutator=terraform.Initialize +10:07:59 Debug: Using Terraform from DATABRICKS_TF_EXEC_PATH at [TERRAFORM] pid=12345 mutator=terraform.Initialize +10:07:59 Debug: Using Terraform CLI config from DATABRICKS_TF_CLI_CONFIG_FILE at [DATABRICKS_TF_CLI_CONFIG_FILE] pid=12345 mutator=terraform.Initialize +10:07:59 Debug: Environment variables for Terraform: ...redacted... pid=12345 mutator=terraform.Initialize +10:07:59 Debug: Apply pid=12345 mutator=scripts.postinit +10:07:59 Debug: No script defined for postinit, skipping pid=12345 mutator=scripts.postinit +10:07:59 Debug: Apply pid=12345 mutator=validate +10:07:59 Debug: GET /api/2.0/workspace/get-status?path=/Workspace/Users/[USERNAME]/.bundle/debug/default/files +< HTTP/1.1 404 Not Found +< { +< "message": "Workspace path not found" +10:07:59 Debug: POST /api/2.0/workspace/mkdirs +> { +> "path": "/Workspace/Users/[USERNAME]/.bundle/debug/default/files" +> } +10:07:59 Debug: GET /api/2.0/workspace/get-status?path=/Workspace/Users/[USERNAME]/.bundle/debug/default/files +< HTTP/1.1 200 OK +< { +< "object_type": "DIRECTORY", +< "path": "/Workspace/Users/[USERNAME]/.bundle/debug/default/files" +10:07:59 Info: completed execution pid=12345 exit_code=0 diff --git a/acceptance/bundle/debug/output.txt b/acceptance/bundle/debug/output.txt new file mode 100644 index 000000000..ed72b360e --- /dev/null +++ b/acceptance/bundle/debug/output.txt @@ -0,0 +1,7 @@ +Name: debug +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/debug/default + +Validation OK! diff --git a/acceptance/bundle/debug/script b/acceptance/bundle/debug/script new file mode 100644 index 000000000..005a1a341 --- /dev/null +++ b/acceptance/bundle/debug/script @@ -0,0 +1,4 @@ +$CLI bundle validate --debug 2> full.stderr.txt +grep -vw parallel full.stderr.txt > out.stderr.txt +grep -w parallel full.stderr.txt | sed 's/[0-9]/0/g' | sort_lines.py > out.stderr.parallel.txt +rm full.stderr.txt diff --git a/acceptance/bundle/debug/test.toml b/acceptance/bundle/debug/test.toml new file mode 100644 index 000000000..79d1b9ee6 --- /dev/null +++ b/acceptance/bundle/debug/test.toml @@ -0,0 +1,18 @@ +Cloud = false + +[[Repls]] +# The keys are unsorted and also vary per OS +Old = 'Environment variables for Terraform: ([A-Z_ ,]+) ' +New = 'Environment variables for Terraform: ...redacted... ' + +[[Repls]] +Old = 'pid=[0-9]+' +New = 'pid=12345' + +[[Repls]] +Old = '\d\d:\d\d:\d\d' +New = '10:07:59' + +[[Repls]] +Old = '\\' +New = '/' diff --git a/acceptance/bundle/generate/git_job/databricks.yml b/acceptance/bundle/generate/git_job/databricks.yml new file mode 100644 index 000000000..adaa7aab3 --- /dev/null +++ b/acceptance/bundle/generate/git_job/databricks.yml @@ -0,0 +1,2 @@ +bundle: + name: git_job diff --git a/acceptance/bundle/generate/git_job/out.job.yml b/acceptance/bundle/generate/git_job/out.job.yml new file mode 100644 index 000000000..0eb2a3fb1 --- /dev/null +++ b/acceptance/bundle/generate/git_job/out.job.yml @@ -0,0 +1,17 @@ +resources: + jobs: + out: + name: gitjob + tasks: + - task_key: test_task + notebook_task: + notebook_path: some/test/notebook.py + - task_key: test_task_2 + notebook_task: + notebook_path: /Workspace/Users/foo@bar.com/some/test/notebook.py + source: WORKSPACE + git_source: + git_branch: main + git_commit: abcdef + git_provider: github + git_url: https://git.databricks.com diff --git a/acceptance/bundle/generate/git_job/output.txt b/acceptance/bundle/generate/git_job/output.txt new file mode 100644 index 000000000..680c92ff9 --- /dev/null +++ b/acceptance/bundle/generate/git_job/output.txt @@ -0,0 +1,2 @@ +Job is using Git source, skipping downloading files +Job configuration successfully saved to out.job.yml diff --git a/acceptance/bundle/generate/git_job/script b/acceptance/bundle/generate/git_job/script new file mode 100644 index 000000000..7598966b0 --- /dev/null +++ b/acceptance/bundle/generate/git_job/script @@ -0,0 +1 @@ +$CLI bundle generate job --existing-job-id 1234 --config-dir . --key out diff --git a/acceptance/bundle/generate/git_job/test.toml b/acceptance/bundle/generate/git_job/test.toml new file mode 100644 index 000000000..fce46071a --- /dev/null +++ b/acceptance/bundle/generate/git_job/test.toml @@ -0,0 +1,33 @@ +Cloud = false # This test needs to run against stubbed Databricks API + +[[Server]] +Pattern = "GET /api/2.1/jobs/get" +Response.Body = ''' +{ + "job_id": 11223344, + "settings": { + "name": "gitjob", + "git_source": { + "git_url": "https://git.databricks.com", + "git_provider": "github", + "git_branch": "main", + "git_commit": "abcdef" + }, + "tasks": [ + { + "task_key": "test_task", + "notebook_task": { + "notebook_path": "some/test/notebook.py" + } + }, + { + "task_key": "test_task_2", + "notebook_task": { + "source": "WORKSPACE", + "notebook_path": "/Workspace/Users/foo@bar.com/some/test/notebook.py" + } + } + ] + } +} +''' diff --git a/acceptance/bundle/git-permerror/databricks.yml b/acceptance/bundle/git-permerror/databricks.yml new file mode 100644 index 000000000..83e0acda8 --- /dev/null +++ b/acceptance/bundle/git-permerror/databricks.yml @@ -0,0 +1,2 @@ +bundle: + name: git-permerror diff --git a/acceptance/bundle/git-permerror/output.txt b/acceptance/bundle/git-permerror/output.txt new file mode 100644 index 000000000..730e8255b --- /dev/null +++ b/acceptance/bundle/git-permerror/output.txt @@ -0,0 +1,81 @@ +=== No permission to access .git. Badness: inferred flag is set to true even though we did not infer branch. bundle_root_path is not correct in subdir case. + +>>> chmod 000 .git + +>>> [CLI] bundle validate +Warn: failed to read .git: unable to load repository specific gitconfig: open config: permission denied +Error: unable to load repository specific gitconfig: open config: permission denied + +Name: git-permerror +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/git-permerror/default + +Found 1 error + +Exit code: 1 + +>>> [CLI] bundle validate -o json +Warn: failed to read .git: unable to load repository specific gitconfig: open config: permission denied +Error: unable to load repository specific gitconfig: open config: permission denied + + +Exit code: 1 +{ + "bundle_root_path": "." +} + +>>> withdir subdir/a/b [CLI] bundle validate -o json +Warn: failed to read .git: unable to load repository specific gitconfig: open config: permission denied +Error: unable to load repository specific gitconfig: open config: permission denied + + +Exit code: 1 +{ + "bundle_root_path": "." +} + + +=== No permissions to read .git/HEAD. Badness: warning is not shown. inferred is incorrectly set to true. bundle_root_path is not correct in subdir case. + +>>> chmod 000 .git/HEAD + +>>> [CLI] bundle validate -o json +Warn: failed to load current branch: open HEAD: permission denied +Warn: failed to load latest commit: open HEAD: permission denied +{ + "bundle_root_path": "." +} + +>>> withdir subdir/a/b [CLI] bundle validate -o json +Warn: failed to load current branch: open HEAD: permission denied +Warn: failed to load latest commit: open HEAD: permission denied +{ + "bundle_root_path": "." +} + + +=== No permissions to read .git/config. Badness: inferred is incorretly set to true. bundle_root_path is not correct is subdir case. + +>>> chmod 000 .git/config + +>>> [CLI] bundle validate -o json +Warn: failed to read .git: unable to load repository specific gitconfig: open config: permission denied +Error: unable to load repository specific gitconfig: open config: permission denied + + +Exit code: 1 +{ + "bundle_root_path": "." +} + +>>> withdir subdir/a/b [CLI] bundle validate -o json +Warn: failed to read .git: unable to load repository specific gitconfig: open config: permission denied +Error: unable to load repository specific gitconfig: open config: permission denied + + +Exit code: 1 +{ + "bundle_root_path": "." +} diff --git a/acceptance/bundle/git-permerror/script b/acceptance/bundle/git-permerror/script new file mode 100644 index 000000000..3a9b4db24 --- /dev/null +++ b/acceptance/bundle/git-permerror/script @@ -0,0 +1,26 @@ +mkdir myrepo +cd myrepo +cp ../databricks.yml . +git-repo-init +mkdir -p subdir/a/b + +printf "=== No permission to access .git. Badness: inferred flag is set to true even though we did not infer branch. bundle_root_path is not correct in subdir case.\n" +trace chmod 000 .git +errcode trace $CLI bundle validate +errcode trace $CLI bundle validate -o json | jq .bundle.git +errcode trace withdir subdir/a/b $CLI bundle validate -o json | jq .bundle.git + +printf "\n\n=== No permissions to read .git/HEAD. Badness: warning is not shown. inferred is incorrectly set to true. bundle_root_path is not correct in subdir case.\n" +chmod 700 .git +trace chmod 000 .git/HEAD +errcode trace $CLI bundle validate -o json | jq .bundle.git +errcode trace withdir subdir/a/b $CLI bundle validate -o json | jq .bundle.git + +printf "\n\n=== No permissions to read .git/config. Badness: inferred is incorretly set to true. bundle_root_path is not correct is subdir case.\n" +chmod 666 .git/HEAD +trace chmod 000 .git/config +errcode trace $CLI bundle validate -o json | jq .bundle.git +errcode trace withdir subdir/a/b $CLI bundle validate -o json | jq .bundle.git + +cd .. +rm -fr myrepo diff --git a/acceptance/bundle/git-permerror/test.toml b/acceptance/bundle/git-permerror/test.toml new file mode 100644 index 000000000..15305cff1 --- /dev/null +++ b/acceptance/bundle/git-permerror/test.toml @@ -0,0 +1,5 @@ +Badness = "inferred flag is set to true incorrect; bundle_root_path is not correct; Warn and Error talk about the same; Warn goes to stderr, Error goes to stdout (for backward compat); Warning about permissions repeated twice" + +[GOOS] +# This test relies on chmod which does not work on Windows +windows = false diff --git a/acceptance/bundle/help/bundle-deploy/output.txt b/acceptance/bundle/help/bundle-deploy/output.txt new file mode 100644 index 000000000..84351e375 --- /dev/null +++ b/acceptance/bundle/help/bundle-deploy/output.txt @@ -0,0 +1,21 @@ + +>>> [CLI] bundle deploy --help +Deploy bundle + +Usage: + databricks bundle deploy [flags] + +Flags: + --auto-approve Skip interactive approvals that might be required for deployment. + -c, --cluster-id string Override cluster in the deployment with the given cluster ID. + --fail-on-active-runs Fail if there are running jobs or pipelines in the deployment. + --force Force-override Git branch validation. + --force-lock Force acquisition of deployment lock. + -h, --help help for deploy + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-deploy/script b/acceptance/bundle/help/bundle-deploy/script new file mode 100644 index 000000000..6375cfea3 --- /dev/null +++ b/acceptance/bundle/help/bundle-deploy/script @@ -0,0 +1 @@ +trace $CLI bundle deploy --help diff --git a/acceptance/bundle/help/bundle-deployment/output.txt b/acceptance/bundle/help/bundle-deployment/output.txt new file mode 100644 index 000000000..4199703b3 --- /dev/null +++ b/acceptance/bundle/help/bundle-deployment/output.txt @@ -0,0 +1,22 @@ + +>>> [CLI] bundle deployment --help +Deployment related commands + +Usage: + databricks bundle deployment [command] + +Available Commands: + bind Bind bundle-defined resources to existing resources + unbind Unbind bundle-defined resources from its managed remote resource + +Flags: + -h, --help help for deployment + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Use "databricks bundle deployment [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle-deployment/script b/acceptance/bundle/help/bundle-deployment/script new file mode 100644 index 000000000..ef93f7dc2 --- /dev/null +++ b/acceptance/bundle/help/bundle-deployment/script @@ -0,0 +1 @@ +trace $CLI bundle deployment --help diff --git a/acceptance/bundle/help/bundle-destroy/output.txt b/acceptance/bundle/help/bundle-destroy/output.txt new file mode 100644 index 000000000..5ed9c1b7b --- /dev/null +++ b/acceptance/bundle/help/bundle-destroy/output.txt @@ -0,0 +1,18 @@ + +>>> [CLI] bundle destroy --help +Destroy deployed bundle resources + +Usage: + databricks bundle destroy [flags] + +Flags: + --auto-approve Skip interactive approvals for deleting resources and files + --force-lock Force acquisition of deployment lock. + -h, --help help for destroy + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-destroy/script b/acceptance/bundle/help/bundle-destroy/script new file mode 100644 index 000000000..955d7b7f9 --- /dev/null +++ b/acceptance/bundle/help/bundle-destroy/script @@ -0,0 +1 @@ +trace $CLI bundle destroy --help diff --git a/acceptance/bundle/help/bundle-generate-dashboard/output.txt b/acceptance/bundle/help/bundle-generate-dashboard/output.txt new file mode 100644 index 000000000..683175940 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-dashboard/output.txt @@ -0,0 +1,24 @@ + +>>> [CLI] bundle generate dashboard --help +Generate configuration for a dashboard + +Usage: + databricks bundle generate dashboard [flags] + +Flags: + -s, --dashboard-dir string directory to write the dashboard representation to (default "src") + --existing-id string ID of the dashboard to generate configuration for + --existing-path string workspace path of the dashboard to generate configuration for + -f, --force force overwrite existing files in the output directory + -h, --help help for dashboard + --resource string resource key of dashboard to watch for changes + -d, --resource-dir string directory to write the configuration to (default "resources") + --watch watch for changes to the dashboard and update the configuration + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-dashboard/script b/acceptance/bundle/help/bundle-generate-dashboard/script new file mode 100644 index 000000000..320156129 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-dashboard/script @@ -0,0 +1 @@ +trace $CLI bundle generate dashboard --help diff --git a/acceptance/bundle/help/bundle-generate-job/output.txt b/acceptance/bundle/help/bundle-generate-job/output.txt new file mode 100644 index 000000000..6a4274223 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-job/output.txt @@ -0,0 +1,21 @@ + +>>> [CLI] bundle generate job --help +Generate bundle configuration for a job + +Usage: + databricks bundle generate job [flags] + +Flags: + -d, --config-dir string Dir path where the output config will be stored (default "resources") + --existing-job-id int Job ID of the job to generate config for + -f, --force Force overwrite existing files in the output directory + -h, --help help for job + -s, --source-dir string Dir path where the downloaded files will be stored (default "src") + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-job/script b/acceptance/bundle/help/bundle-generate-job/script new file mode 100644 index 000000000..109ed59aa --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-job/script @@ -0,0 +1 @@ +trace $CLI bundle generate job --help diff --git a/acceptance/bundle/help/bundle-generate-pipeline/output.txt b/acceptance/bundle/help/bundle-generate-pipeline/output.txt new file mode 100644 index 000000000..05c5573b8 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-pipeline/output.txt @@ -0,0 +1,21 @@ + +>>> [CLI] bundle generate pipeline --help +Generate bundle configuration for a pipeline + +Usage: + databricks bundle generate pipeline [flags] + +Flags: + -d, --config-dir string Dir path where the output config will be stored (default "resources") + --existing-pipeline-id string ID of the pipeline to generate config for + -f, --force Force overwrite existing files in the output directory + -h, --help help for pipeline + -s, --source-dir string Dir path where the downloaded files will be stored (default "src") + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-pipeline/script b/acceptance/bundle/help/bundle-generate-pipeline/script new file mode 100644 index 000000000..c6af62d0a --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-pipeline/script @@ -0,0 +1 @@ +trace $CLI bundle generate pipeline --help diff --git a/acceptance/bundle/help/bundle-generate/output.txt b/acceptance/bundle/help/bundle-generate/output.txt new file mode 100644 index 000000000..725f19af0 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate/output.txt @@ -0,0 +1,25 @@ + +>>> [CLI] bundle generate --help +Generate bundle configuration + +Usage: + databricks bundle generate [command] + +Available Commands: + app Generate bundle configuration for a Databricks app + dashboard Generate configuration for a dashboard + job Generate bundle configuration for a job + pipeline Generate bundle configuration for a pipeline + +Flags: + -h, --help help for generate + --key string resource key to use for the generated configuration + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Use "databricks bundle generate [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle-generate/script b/acceptance/bundle/help/bundle-generate/script new file mode 100644 index 000000000..932588768 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate/script @@ -0,0 +1 @@ +trace $CLI bundle generate --help diff --git a/acceptance/bundle/help/bundle-init/output.txt b/acceptance/bundle/help/bundle-init/output.txt new file mode 100644 index 000000000..fbafedea2 --- /dev/null +++ b/acceptance/bundle/help/bundle-init/output.txt @@ -0,0 +1,31 @@ + +>>> [CLI] bundle init --help +Initialize using a bundle template. + +TEMPLATE_PATH optionally specifies which template to use. It can be one of the following: +- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows +- default-sql: The default SQL template for .sql files that run with Databricks SQL +- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) +- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks) +- a local file system path with a template directory +- a Git repository URL, e.g. https://github.com/my/repository + +See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates. + +Usage: + databricks bundle init [TEMPLATE_PATH] [flags] + +Flags: + --branch string Git branch to use for template initialization + --config-file string JSON file containing key value pairs of input parameters required for template initialization. + -h, --help help for init + --output-dir string Directory to write the initialized template to. + --tag string Git tag to use for template initialization + --template-dir string Directory path within a Git repository containing the template. + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-init/script b/acceptance/bundle/help/bundle-init/script new file mode 100644 index 000000000..9bcce7559 --- /dev/null +++ b/acceptance/bundle/help/bundle-init/script @@ -0,0 +1 @@ +trace $CLI bundle init --help diff --git a/acceptance/bundle/help/bundle-open/output.txt b/acceptance/bundle/help/bundle-open/output.txt new file mode 100644 index 000000000..b8f3f118b --- /dev/null +++ b/acceptance/bundle/help/bundle-open/output.txt @@ -0,0 +1,17 @@ + +>>> [CLI] bundle open --help +Open a resource in the browser + +Usage: + databricks bundle open [flags] + +Flags: + --force-pull Skip local cache and load the state from the remote workspace + -h, --help help for open + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-open/script b/acceptance/bundle/help/bundle-open/script new file mode 100644 index 000000000..b4dfa2222 --- /dev/null +++ b/acceptance/bundle/help/bundle-open/script @@ -0,0 +1 @@ +trace $CLI bundle open --help diff --git a/acceptance/bundle/help/bundle-run/output.txt b/acceptance/bundle/help/bundle-run/output.txt new file mode 100644 index 000000000..4b9efbf2a --- /dev/null +++ b/acceptance/bundle/help/bundle-run/output.txt @@ -0,0 +1,57 @@ + +>>> [CLI] bundle run --help +Run the job or pipeline identified by KEY. + +The KEY is the unique identifier of the resource to run. In addition to +customizing the run using any of the available flags, you can also specify +keyword or positional arguments as shown in these examples: + + databricks bundle run my_job -- --key1 value1 --key2 value2 + +Or: + + databricks bundle run my_job -- value1 value2 value3 + +If the specified job uses job parameters or the job has a notebook task with +parameters, the first example applies and flag names are mapped to the +parameter names. + +If the specified job does not use job parameters and the job has a Python file +task or a Python wheel task, the second example applies. + +Usage: + databricks bundle run [flags] KEY + +Job Flags: + --params stringToString comma separated k=v pairs for job parameters (default []) + +Job Task Flags: + Note: please prefer use of job-level parameters (--param) over task-level parameters. + For more information, see https://docs.databricks.com/en/workflows/jobs/create-run-jobs.html#pass-parameters-to-a-databricks-job-task + --dbt-commands strings A list of commands to execute for jobs with DBT tasks. + --jar-params strings A list of parameters for jobs with Spark JAR tasks. + --notebook-params stringToString A map from keys to values for jobs with notebook tasks. (default []) + --pipeline-params stringToString A map from keys to values for jobs with pipeline tasks. (default []) + --python-named-params stringToString A map from keys to values for jobs with Python wheel tasks. (default []) + --python-params strings A list of parameters for jobs with Python tasks. + --spark-submit-params strings A list of parameters for jobs with Spark submit tasks. + --sql-params stringToString A map from keys to values for jobs with SQL tasks. (default []) + +Pipeline Flags: + --full-refresh strings List of tables to reset and recompute. + --full-refresh-all Perform a full graph reset and recompute. + --refresh strings List of tables to update. + --refresh-all Perform a full graph update. + --validate-only Perform an update to validate graph correctness. + +Flags: + -h, --help help for run + --no-wait Don't wait for the run to complete. + --restart Restart the run if it is already running. + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-run/script b/acceptance/bundle/help/bundle-run/script new file mode 100644 index 000000000..edcf1786a --- /dev/null +++ b/acceptance/bundle/help/bundle-run/script @@ -0,0 +1 @@ +trace $CLI bundle run --help diff --git a/acceptance/bundle/help/bundle-schema/output.txt b/acceptance/bundle/help/bundle-schema/output.txt new file mode 100644 index 000000000..8b8a6b8e9 --- /dev/null +++ b/acceptance/bundle/help/bundle-schema/output.txt @@ -0,0 +1,16 @@ + +>>> [CLI] bundle schema --help +Generate JSON Schema for bundle configuration + +Usage: + databricks bundle schema [flags] + +Flags: + -h, --help help for schema + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-schema/script b/acceptance/bundle/help/bundle-schema/script new file mode 100644 index 000000000..a084fec8e --- /dev/null +++ b/acceptance/bundle/help/bundle-schema/script @@ -0,0 +1 @@ +trace $CLI bundle schema --help diff --git a/acceptance/bundle/help/bundle-summary/output.txt b/acceptance/bundle/help/bundle-summary/output.txt new file mode 100644 index 000000000..534bb8214 --- /dev/null +++ b/acceptance/bundle/help/bundle-summary/output.txt @@ -0,0 +1,17 @@ + +>>> [CLI] bundle summary --help +Summarize resources deployed by this bundle + +Usage: + databricks bundle summary [flags] + +Flags: + --force-pull Skip local cache and load the state from the remote workspace + -h, --help help for summary + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-summary/script b/acceptance/bundle/help/bundle-summary/script new file mode 100644 index 000000000..967279d86 --- /dev/null +++ b/acceptance/bundle/help/bundle-summary/script @@ -0,0 +1 @@ +trace $CLI bundle summary --help diff --git a/acceptance/bundle/help/bundle-sync/output.txt b/acceptance/bundle/help/bundle-sync/output.txt new file mode 100644 index 000000000..992138a20 --- /dev/null +++ b/acceptance/bundle/help/bundle-sync/output.txt @@ -0,0 +1,19 @@ + +>>> [CLI] bundle sync --help +Synchronize bundle tree to the workspace + +Usage: + databricks bundle sync [flags] + +Flags: + --full perform full synchronization (default is incremental) + -h, --help help for sync + --interval duration file system polling interval (for --watch) (default 1s) + --output type type of the output format + --watch watch local file system for changes + +Global Flags: + --debug enable debug logging + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-sync/script b/acceptance/bundle/help/bundle-sync/script new file mode 100644 index 000000000..fe1d6c7e3 --- /dev/null +++ b/acceptance/bundle/help/bundle-sync/script @@ -0,0 +1 @@ +trace $CLI bundle sync --help diff --git a/acceptance/bundle/help/bundle-validate/output.txt b/acceptance/bundle/help/bundle-validate/output.txt new file mode 100644 index 000000000..7fd1ae7ea --- /dev/null +++ b/acceptance/bundle/help/bundle-validate/output.txt @@ -0,0 +1,16 @@ + +>>> [CLI] bundle validate --help +Validate configuration + +Usage: + databricks bundle validate [flags] + +Flags: + -h, --help help for validate + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-validate/script b/acceptance/bundle/help/bundle-validate/script new file mode 100644 index 000000000..8b8434b2d --- /dev/null +++ b/acceptance/bundle/help/bundle-validate/script @@ -0,0 +1 @@ +trace $CLI bundle validate --help diff --git a/acceptance/bundle/help/bundle/output.txt b/acceptance/bundle/help/bundle/output.txt new file mode 100644 index 000000000..fc6dd623d --- /dev/null +++ b/acceptance/bundle/help/bundle/output.txt @@ -0,0 +1,33 @@ + +>>> [CLI] bundle --help +Databricks Asset Bundles let you express data/AI/analytics projects as code. + +Online documentation: https://docs.databricks.com/en/dev-tools/bundles/index.html + +Usage: + databricks bundle [command] + +Available Commands: + deploy Deploy bundle + deployment Deployment related commands + destroy Destroy deployed bundle resources + generate Generate bundle configuration + init Initialize using a bundle template + open Open a resource in the browser + run Run a job or pipeline update + schema Generate JSON Schema for bundle configuration + summary Summarize resources deployed by this bundle + sync Synchronize bundle tree to the workspace + validate Validate configuration + +Flags: + -h, --help help for bundle + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + +Use "databricks bundle [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle/script b/acceptance/bundle/help/bundle/script new file mode 100644 index 000000000..eac116817 --- /dev/null +++ b/acceptance/bundle/help/bundle/script @@ -0,0 +1 @@ +trace $CLI bundle --help diff --git a/acceptance/bundle/help/test.toml b/acceptance/bundle/help/test.toml new file mode 100644 index 000000000..18b1a8841 --- /dev/null +++ b/acceptance/bundle/help/test.toml @@ -0,0 +1 @@ +Cloud = false diff --git a/acceptance/bundle/includes/non_yaml_in_include/databricks.yml b/acceptance/bundle/includes/non_yaml_in_include/databricks.yml new file mode 100644 index 000000000..162bd6013 --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: non_yaml_in_includes + +include: + - test.py + - resources/*.yml diff --git a/acceptance/bundle/includes/non_yaml_in_include/output.txt b/acceptance/bundle/includes/non_yaml_in_include/output.txt new file mode 100644 index 000000000..f5211cc4b --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/output.txt @@ -0,0 +1,10 @@ +Error: Files in the 'include' configuration section must be YAML or JSON files. + in databricks.yml:5:4 + +The file test.py in the 'include' configuration section is not a YAML or JSON file, and only such files are supported. To include files to sync, specify them in the 'sync.include' configuration section instead. + +Name: non_yaml_in_includes + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/includes/non_yaml_in_include/script b/acceptance/bundle/includes/non_yaml_in_include/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/includes/non_yaml_in_include/test.py b/acceptance/bundle/includes/non_yaml_in_include/test.py new file mode 100644 index 000000000..44159b395 --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/test.py @@ -0,0 +1 @@ +print("Hello world") diff --git a/acceptance/bundle/libraries/maven/.gitignore b/acceptance/bundle/libraries/maven/.gitignore new file mode 100644 index 000000000..15bcc6dd0 --- /dev/null +++ b/acceptance/bundle/libraries/maven/.gitignore @@ -0,0 +1 @@ +.databricks diff --git a/acceptance/bundle/libraries/maven/databricks.yml b/acceptance/bundle/libraries/maven/databricks.yml new file mode 100644 index 000000000..785142626 --- /dev/null +++ b/acceptance/bundle/libraries/maven/databricks.yml @@ -0,0 +1,27 @@ +bundle: + name: maven + + +resources: + jobs: + testjob: + name: test-job + tasks: + - task_key: dbt + spark_jar_task: + main_class_name: com.databricks.example.Main + + libraries: + - maven: + coordinates: org.jsoup:jsoup:1.7.2 + + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode diff --git a/acceptance/bundle/libraries/maven/out.job.libraries.txt b/acceptance/bundle/libraries/maven/out.job.libraries.txt new file mode 100644 index 000000000..2b4a0d5f5 --- /dev/null +++ b/acceptance/bundle/libraries/maven/out.job.libraries.txt @@ -0,0 +1,7 @@ +[ + { + "maven": { + "coordinates": "org.jsoup:jsoup:1.7.2" + } + } +] diff --git a/acceptance/bundle/libraries/maven/output.txt b/acceptance/bundle/libraries/maven/output.txt new file mode 100644 index 000000000..fd72d8d14 --- /dev/null +++ b/acceptance/bundle/libraries/maven/output.txt @@ -0,0 +1,15 @@ + +>>> [CLI] bundle validate -o json +[ + { + "maven": { + "coordinates": "org.jsoup:jsoup:1.7.2" + } + } +] + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/maven/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/acceptance/bundle/libraries/maven/script b/acceptance/bundle/libraries/maven/script new file mode 100644 index 000000000..06d1b6409 --- /dev/null +++ b/acceptance/bundle/libraries/maven/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate -o json | jq '.resources.jobs.testjob.tasks[0].libraries' +trace $CLI bundle deploy +cat out.requests.txt | jq 'select(.path == "/api/2.1/jobs/create")' | jq '.body.tasks[0].libraries' > out.job.libraries.txt +rm out.requests.txt diff --git a/acceptance/bundle/libraries/maven/test.toml b/acceptance/bundle/libraries/maven/test.toml new file mode 100644 index 000000000..62ba36982 --- /dev/null +++ b/acceptance/bundle/libraries/maven/test.toml @@ -0,0 +1,5 @@ +# We run this test only locally for now because we need to figure out how to do +# bundle destroy on script.cleanup first. +Cloud = false + +RecordRequests = true diff --git a/acceptance/bundle/libraries/pypi/.gitignore b/acceptance/bundle/libraries/pypi/.gitignore new file mode 100644 index 000000000..15bcc6dd0 --- /dev/null +++ b/acceptance/bundle/libraries/pypi/.gitignore @@ -0,0 +1 @@ +.databricks diff --git a/acceptance/bundle/libraries/pypi/databricks.yml b/acceptance/bundle/libraries/pypi/databricks.yml new file mode 100644 index 000000000..67f3da254 --- /dev/null +++ b/acceptance/bundle/libraries/pypi/databricks.yml @@ -0,0 +1,32 @@ +bundle: + name: pypi + + +resources: + jobs: + testjob: + name: test-job + tasks: + - task_key: dbt + dbt_task: + project_directory: ./ + profiles_directory: dbt_profiles/ + commands: + - 'dbt deps --target=${bundle.target}' + - 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + - 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + + libraries: + - pypi: + package: dbt-databricks>=1.8.0,<2.0.0 + + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode diff --git a/acceptance/bundle/libraries/pypi/out.job.libraries.txt b/acceptance/bundle/libraries/pypi/out.job.libraries.txt new file mode 100644 index 000000000..ddc7d84a5 --- /dev/null +++ b/acceptance/bundle/libraries/pypi/out.job.libraries.txt @@ -0,0 +1,7 @@ +[ + { + "pypi": { + "package": "dbt-databricks>=1.8.0,<2.0.0" + } + } +] diff --git a/acceptance/bundle/libraries/pypi/output.txt b/acceptance/bundle/libraries/pypi/output.txt new file mode 100644 index 000000000..002677d64 --- /dev/null +++ b/acceptance/bundle/libraries/pypi/output.txt @@ -0,0 +1,15 @@ + +>>> [CLI] bundle validate -o json +[ + { + "pypi": { + "package": "dbt-databricks>=1.8.0,<2.0.0" + } + } +] + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/pypi/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/acceptance/bundle/libraries/pypi/script b/acceptance/bundle/libraries/pypi/script new file mode 100644 index 000000000..06d1b6409 --- /dev/null +++ b/acceptance/bundle/libraries/pypi/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate -o json | jq '.resources.jobs.testjob.tasks[0].libraries' +trace $CLI bundle deploy +cat out.requests.txt | jq 'select(.path == "/api/2.1/jobs/create")' | jq '.body.tasks[0].libraries' > out.job.libraries.txt +rm out.requests.txt diff --git a/acceptance/bundle/libraries/pypi/test.toml b/acceptance/bundle/libraries/pypi/test.toml new file mode 100644 index 000000000..62ba36982 --- /dev/null +++ b/acceptance/bundle/libraries/pypi/test.toml @@ -0,0 +1,5 @@ +# We run this test only locally for now because we need to figure out how to do +# bundle destroy on script.cleanup first. +Cloud = false + +RecordRequests = true diff --git a/acceptance/bundle/override/clusters/output.txt b/acceptance/bundle/override/clusters/output.txt index cff30b3af..a30a7bbff 100644 --- a/acceptance/bundle/override/clusters/output.txt +++ b/acceptance/bundle/override/clusters/output.txt @@ -1,5 +1,5 @@ ->>> $CLI bundle validate -o json -t default +>>> [CLI] bundle validate -o json -t default { "autoscale": { "max_workers": 7, @@ -15,7 +15,7 @@ "spark_version": "13.3.x-scala2.12" } ->>> $CLI bundle validate -o json -t development +>>> [CLI] bundle validate -o json -t development { "autoscale": { "max_workers": 3, diff --git a/acceptance/bundle/override/job_cluster/output.txt b/acceptance/bundle/override/job_cluster/output.txt index 947d19032..e4120e1c3 100644 --- a/acceptance/bundle/override/job_cluster/output.txt +++ b/acceptance/bundle/override/job_cluster/output.txt @@ -1,10 +1,10 @@ ->>> $CLI bundle validate -o json -t development +>>> [CLI] bundle validate -o json -t development { "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/override_job_cluster/development/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", @@ -27,12 +27,12 @@ } } ->>> $CLI bundle validate -o json -t staging +>>> [CLI] bundle validate -o json -t staging { "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/override_job_cluster/staging/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/override/job_cluster_var/output.txt b/acceptance/bundle/override/job_cluster_var/output.txt index cb76de5a8..3545d6987 100644 --- a/acceptance/bundle/override/job_cluster_var/output.txt +++ b/acceptance/bundle/override/job_cluster_var/output.txt @@ -1,10 +1,10 @@ ->>> $CLI bundle validate -o json -t development +>>> [CLI] bundle validate -o json -t development { "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/override_job_cluster/development/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", @@ -27,21 +27,21 @@ } } ->>> $CLI bundle validate -t development +>>> [CLI] bundle validate -t development Name: override_job_cluster Target: development Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/override_job_cluster/development Validation OK! ->>> $CLI bundle validate -o json -t staging +>>> [CLI] bundle validate -o json -t staging { "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/override_job_cluster/staging/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", @@ -64,11 +64,11 @@ Validation OK! } } ->>> $CLI bundle validate -t staging +>>> [CLI] bundle validate -t staging Name: override_job_cluster Target: staging Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/override_job_cluster/staging Validation OK! diff --git a/acceptance/bundle/override/job_tasks/out.development.stderr.txt b/acceptance/bundle/override/job_tasks/out.development.stderr.txt index 7b6fef0cc..1873feb35 100644 --- a/acceptance/bundle/override/job_tasks/out.development.stderr.txt +++ b/acceptance/bundle/override/job_tasks/out.development.stderr.txt @@ -1,5 +1,5 @@ ->>> errcode $CLI bundle validate -o json -t development +>>> errcode [CLI] bundle validate -o json -t development Error: file ./test1.py not found diff --git a/acceptance/bundle/override/job_tasks/output.txt b/acceptance/bundle/override/job_tasks/output.txt index 0bb0b1812..1f7796217 100644 --- a/acceptance/bundle/override/job_tasks/output.txt +++ b/acceptance/bundle/override/job_tasks/output.txt @@ -28,7 +28,7 @@ ] } ->>> errcode $CLI bundle validate -o json -t staging +>>> errcode [CLI] bundle validate -o json -t staging Error: file ./test1.py not found @@ -63,14 +63,14 @@ Exit code: 1 ] } ->>> errcode $CLI bundle validate -t staging +>>> errcode [CLI] bundle validate -t staging Error: file ./test1.py not found Name: override_job_tasks Target: staging Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_tasks/staging + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/override_job_tasks/staging Found 1 error diff --git a/acceptance/bundle/override/merge-string-map/output.txt b/acceptance/bundle/override/merge-string-map/output.txt index 986da8174..6e2aef87b 100644 --- a/acceptance/bundle/override/merge-string-map/output.txt +++ b/acceptance/bundle/override/merge-string-map/output.txt @@ -1,5 +1,5 @@ ->>> $CLI bundle validate -o json -t dev +>>> [CLI] bundle validate -o json -t dev Warning: expected map, found string at resources.clusters.my_cluster in databricks.yml:6:17 @@ -13,7 +13,7 @@ Warning: expected map, found string } } ->>> $CLI bundle validate -t dev +>>> [CLI] bundle validate -t dev Warning: expected map, found string at resources.clusters.my_cluster in databricks.yml:6:17 @@ -21,7 +21,7 @@ Warning: expected map, found string Name: merge-string-map Target: dev Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/merge-string-map/dev + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/merge-string-map/dev Found 1 warning diff --git a/acceptance/bundle/override/pipeline_cluster/output.txt b/acceptance/bundle/override/pipeline_cluster/output.txt index 81bf58180..d1a67f6b9 100644 --- a/acceptance/bundle/override/pipeline_cluster/output.txt +++ b/acceptance/bundle/override/pipeline_cluster/output.txt @@ -1,5 +1,5 @@ ->>> $CLI bundle validate -o json -t development +>>> [CLI] bundle validate -o json -t development { "foo": { "clusters": [ @@ -14,14 +14,14 @@ ], "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_pipeline_cluster/development/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/override_pipeline_cluster/development/state/metadata.json" }, "name": "job", "permissions": [] } } ->>> $CLI bundle validate -o json -t staging +>>> [CLI] bundle validate -o json -t staging { "foo": { "clusters": [ @@ -36,7 +36,7 @@ ], "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_pipeline_cluster/staging/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/override_pipeline_cluster/staging/state/metadata.json" }, "name": "job", "permissions": [] diff --git a/bundle/tests/path_translation/fallback/README.md b/acceptance/bundle/paths/fallback/README.md similarity index 100% rename from bundle/tests/path_translation/fallback/README.md rename to acceptance/bundle/paths/fallback/README.md diff --git a/bundle/tests/path_translation/nominal/databricks.yml b/acceptance/bundle/paths/fallback/databricks.yml similarity index 80% rename from bundle/tests/path_translation/nominal/databricks.yml rename to acceptance/bundle/paths/fallback/databricks.yml index cd425920d..c6d0abe0a 100644 --- a/bundle/tests/path_translation/nominal/databricks.yml +++ b/acceptance/bundle/paths/fallback/databricks.yml @@ -1,5 +1,5 @@ bundle: - name: path_translation_nominal + name: fallback include: - "resources/*.yml" diff --git a/acceptance/bundle/paths/fallback/output.job.json b/acceptance/bundle/paths/fallback/output.job.json new file mode 100644 index 000000000..ac79e0cf6 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.job.json @@ -0,0 +1,67 @@ +[ + { + "job_cluster_key": "default", + "notebook_task": { + "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/fallback/development/files/src/notebook" + }, + "task_key": "notebook_example" + }, + { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/[USERNAME]/.bundle/fallback/development/files/src/file.py" + }, + "task_key": "spark_python_example" + }, + { + "dbt_task": { + "commands": [ + "dbt run", + "dbt run" + ], + "project_directory": "/Workspace/Users/[USERNAME]/.bundle/fallback/development/files/src/dbt_project" + }, + "job_cluster_key": "default", + "task_key": "dbt_example" + }, + { + "job_cluster_key": "default", + "sql_task": { + "file": { + "path": "/Workspace/Users/[USERNAME]/.bundle/fallback/development/files/src/sql.sql" + }, + "warehouse_id": "cafef00d" + }, + "task_key": "sql_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "whl": "dist/wheel1.whl" + }, + { + "whl": "dist/wheel2.whl" + } + ], + "python_wheel_task": { + "package_name": "my_package" + }, + "task_key": "python_wheel_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "jar": "target/jar1.jar" + }, + { + "jar": "target/jar2.jar" + } + ], + "spark_jar_task": { + "main_class_name": "com.example.Main" + }, + "task_key": "spark_jar_example" + } +] diff --git a/acceptance/bundle/paths/fallback/output.pipeline.json b/acceptance/bundle/paths/fallback/output.pipeline.json new file mode 100644 index 000000000..7ed4f74e6 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.pipeline.json @@ -0,0 +1,22 @@ +[ + { + "file": { + "path": "/Workspace/Users/[USERNAME]/.bundle/fallback/development/files/src/file1.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/[USERNAME]/.bundle/fallback/development/files/src/notebook1" + } + }, + { + "file": { + "path": "/Workspace/Users/[USERNAME]/.bundle/fallback/development/files/src/file2.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/[USERNAME]/.bundle/fallback/development/files/src/notebook2" + } + } +] diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt new file mode 100644 index 000000000..85f185851 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.txt @@ -0,0 +1,16 @@ + +>>> [CLI] bundle validate -t development -o json + +>>> [CLI] bundle validate -t error +Error: notebook this value is overridden not found. Local notebook references are expected +to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] + +Name: fallback +Target: error +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/fallback/error + +Found 1 error + +Exit code: 1 diff --git a/bundle/tests/path_translation/fallback/override_job.yml b/acceptance/bundle/paths/fallback/override_job.yml similarity index 100% rename from bundle/tests/path_translation/fallback/override_job.yml rename to acceptance/bundle/paths/fallback/override_job.yml diff --git a/bundle/tests/path_translation/fallback/override_pipeline.yml b/acceptance/bundle/paths/fallback/override_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/fallback/override_pipeline.yml rename to acceptance/bundle/paths/fallback/override_pipeline.yml diff --git a/bundle/tests/path_translation/fallback/resources/my_job.yml b/acceptance/bundle/paths/fallback/resources/my_job.yml similarity index 71% rename from bundle/tests/path_translation/fallback/resources/my_job.yml rename to acceptance/bundle/paths/fallback/resources/my_job.yml index 4907df4f0..921ee412b 100644 --- a/bundle/tests/path_translation/fallback/resources/my_job.yml +++ b/acceptance/bundle/paths/fallback/resources/my_job.yml @@ -4,33 +4,45 @@ resources: name: "placeholder" tasks: - task_key: notebook_example + job_cluster_key: default notebook_task: notebook_path: "this value is overridden" - task_key: spark_python_example + job_cluster_key: default spark_python_task: python_file: "this value is overridden" - task_key: dbt_example + job_cluster_key: default dbt_task: project_directory: "this value is overridden" commands: - "dbt run" - task_key: sql_example + job_cluster_key: default sql_task: file: path: "this value is overridden" warehouse_id: cafef00d - task_key: python_wheel_example + job_cluster_key: default python_wheel_task: package_name: my_package libraries: - whl: ../dist/wheel1.whl - task_key: spark_jar_example + job_cluster_key: default spark_jar_task: main_class_name: com.example.Main libraries: - jar: ../target/jar1.jar + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/bundle/tests/path_translation/fallback/resources/my_pipeline.yml b/acceptance/bundle/paths/fallback/resources/my_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/fallback/resources/my_pipeline.yml rename to acceptance/bundle/paths/fallback/resources/my_pipeline.yml diff --git a/acceptance/bundle/paths/fallback/script b/acceptance/bundle/paths/fallback/script new file mode 100644 index 000000000..29aa420c5 --- /dev/null +++ b/acceptance/bundle/paths/fallback/script @@ -0,0 +1,10 @@ +errcode trace $CLI bundle validate -t development -o json > output.tmp.json + +# Capture job tasks +jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json + +# Capture pipeline libraries +jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json + +# Expect failure for the "error" target +errcode trace $CLI bundle validate -t error diff --git a/acceptance/bundle/paths/fallback/script.cleanup b/acceptance/bundle/paths/fallback/script.cleanup new file mode 100644 index 000000000..f93425dff --- /dev/null +++ b/acceptance/bundle/paths/fallback/script.cleanup @@ -0,0 +1 @@ +rm -f output.tmp.json diff --git a/bundle/tests/path_translation/fallback/src/dbt_project/.gitkeep b/acceptance/bundle/paths/fallback/src/dbt_project/.gitkeep similarity index 100% rename from bundle/tests/path_translation/fallback/src/dbt_project/.gitkeep rename to acceptance/bundle/paths/fallback/src/dbt_project/.gitkeep diff --git a/bundle/tests/path_translation/fallback/src/file.py b/acceptance/bundle/paths/fallback/src/file.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file.py rename to acceptance/bundle/paths/fallback/src/file.py diff --git a/bundle/tests/path_translation/fallback/src/file1.py b/acceptance/bundle/paths/fallback/src/file1.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file1.py rename to acceptance/bundle/paths/fallback/src/file1.py diff --git a/bundle/tests/path_translation/fallback/src/file2.py b/acceptance/bundle/paths/fallback/src/file2.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file2.py rename to acceptance/bundle/paths/fallback/src/file2.py diff --git a/bundle/tests/path_translation/fallback/src/notebook.py b/acceptance/bundle/paths/fallback/src/notebook.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook.py rename to acceptance/bundle/paths/fallback/src/notebook.py diff --git a/bundle/tests/path_translation/fallback/src/notebook1.py b/acceptance/bundle/paths/fallback/src/notebook1.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook1.py rename to acceptance/bundle/paths/fallback/src/notebook1.py diff --git a/bundle/tests/path_translation/fallback/src/notebook2.py b/acceptance/bundle/paths/fallback/src/notebook2.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook2.py rename to acceptance/bundle/paths/fallback/src/notebook2.py diff --git a/bundle/tests/path_translation/fallback/src/sql.sql b/acceptance/bundle/paths/fallback/src/sql.sql similarity index 100% rename from bundle/tests/path_translation/fallback/src/sql.sql rename to acceptance/bundle/paths/fallback/src/sql.sql diff --git a/bundle/tests/path_translation/nominal/README.md b/acceptance/bundle/paths/nominal/README.md similarity index 100% rename from bundle/tests/path_translation/nominal/README.md rename to acceptance/bundle/paths/nominal/README.md diff --git a/bundle/tests/path_translation/fallback/databricks.yml b/acceptance/bundle/paths/nominal/databricks.yml similarity index 79% rename from bundle/tests/path_translation/fallback/databricks.yml rename to acceptance/bundle/paths/nominal/databricks.yml index 92be3f921..5d3c22f91 100644 --- a/bundle/tests/path_translation/fallback/databricks.yml +++ b/acceptance/bundle/paths/nominal/databricks.yml @@ -1,5 +1,5 @@ bundle: - name: path_translation_fallback + name: nominal include: - "resources/*.yml" diff --git a/acceptance/bundle/paths/nominal/output.job.json b/acceptance/bundle/paths/nominal/output.job.json new file mode 100644 index 000000000..26d19d77c --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.job.json @@ -0,0 +1,89 @@ +[ + { + "job_cluster_key": "default", + "notebook_task": { + "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/nominal/development/files/src/notebook" + }, + "task_key": "notebook_example" + }, + { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/[USERNAME]/.bundle/nominal/development/files/src/file.py" + }, + "task_key": "spark_python_example" + }, + { + "dbt_task": { + "commands": [ + "dbt run", + "dbt run" + ], + "project_directory": "/Workspace/Users/[USERNAME]/.bundle/nominal/development/files/src/dbt_project" + }, + "job_cluster_key": "default", + "task_key": "dbt_example" + }, + { + "job_cluster_key": "default", + "sql_task": { + "file": { + "path": "/Workspace/Users/[USERNAME]/.bundle/nominal/development/files/src/sql.sql" + }, + "warehouse_id": "cafef00d" + }, + "task_key": "sql_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "whl": "dist/wheel1.whl" + }, + { + "whl": "dist/wheel2.whl" + } + ], + "python_wheel_task": { + "package_name": "my_package" + }, + "task_key": "python_wheel_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "jar": "target/jar1.jar" + }, + { + "jar": "target/jar2.jar" + } + ], + "spark_jar_task": { + "main_class_name": "com.example.Main" + }, + "task_key": "spark_jar_example" + }, + { + "for_each_task": { + "task": { + "notebook_task": { + "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/nominal/development/files/src/notebook" + } + } + }, + "job_cluster_key": "default", + "task_key": "for_each_notebook_example" + }, + { + "for_each_task": { + "task": { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/[USERNAME]/.bundle/nominal/development/files/src/file.py" + } + } + }, + "task_key": "for_each_spark_python_example" + } +] diff --git a/acceptance/bundle/paths/nominal/output.pipeline.json b/acceptance/bundle/paths/nominal/output.pipeline.json new file mode 100644 index 000000000..c6f2e0868 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.pipeline.json @@ -0,0 +1,22 @@ +[ + { + "file": { + "path": "/Workspace/Users/[USERNAME]/.bundle/nominal/development/files/src/file1.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/[USERNAME]/.bundle/nominal/development/files/src/notebook1" + } + }, + { + "file": { + "path": "/Workspace/Users/[USERNAME]/.bundle/nominal/development/files/src/file2.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/[USERNAME]/.bundle/nominal/development/files/src/notebook2" + } + } +] diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt new file mode 100644 index 000000000..40670f4cb --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.txt @@ -0,0 +1,16 @@ + +>>> [CLI] bundle validate -t development -o json + +>>> [CLI] bundle validate -t error +Error: notebook this value is overridden not found. Local notebook references are expected +to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] + +Name: nominal +Target: error +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/nominal/error + +Found 1 error + +Exit code: 1 diff --git a/bundle/tests/path_translation/nominal/override_job.yml b/acceptance/bundle/paths/nominal/override_job.yml similarity index 100% rename from bundle/tests/path_translation/nominal/override_job.yml rename to acceptance/bundle/paths/nominal/override_job.yml diff --git a/bundle/tests/path_translation/nominal/override_pipeline.yml b/acceptance/bundle/paths/nominal/override_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/nominal/override_pipeline.yml rename to acceptance/bundle/paths/nominal/override_pipeline.yml diff --git a/bundle/tests/path_translation/nominal/resources/my_job.yml b/acceptance/bundle/paths/nominal/resources/my_job.yml similarity index 74% rename from bundle/tests/path_translation/nominal/resources/my_job.yml rename to acceptance/bundle/paths/nominal/resources/my_job.yml index 2020c9dc8..13996a20c 100644 --- a/bundle/tests/path_translation/nominal/resources/my_job.yml +++ b/acceptance/bundle/paths/nominal/resources/my_job.yml @@ -4,38 +4,45 @@ resources: name: "placeholder" tasks: - task_key: notebook_example + job_cluster_key: default notebook_task: notebook_path: "this value is overridden" - task_key: spark_python_example + job_cluster_key: default spark_python_task: python_file: "this value is overridden" - task_key: dbt_example + job_cluster_key: default dbt_task: project_directory: "this value is overridden" commands: - "dbt run" - task_key: sql_example + job_cluster_key: default sql_task: file: path: "this value is overridden" warehouse_id: cafef00d - task_key: python_wheel_example + job_cluster_key: default python_wheel_task: package_name: my_package libraries: - whl: ../dist/wheel1.whl - task_key: spark_jar_example + job_cluster_key: default spark_jar_task: main_class_name: com.example.Main libraries: - jar: ../target/jar1.jar - task_key: for_each_notebook_example + job_cluster_key: default for_each_task: task: notebook_task: @@ -44,5 +51,12 @@ resources: - task_key: for_each_spark_python_example for_each_task: task: + job_cluster_key: default spark_python_task: python_file: "this value is overridden" + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/bundle/tests/path_translation/nominal/resources/my_pipeline.yml b/acceptance/bundle/paths/nominal/resources/my_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/nominal/resources/my_pipeline.yml rename to acceptance/bundle/paths/nominal/resources/my_pipeline.yml diff --git a/acceptance/bundle/paths/nominal/script b/acceptance/bundle/paths/nominal/script new file mode 100644 index 000000000..29aa420c5 --- /dev/null +++ b/acceptance/bundle/paths/nominal/script @@ -0,0 +1,10 @@ +errcode trace $CLI bundle validate -t development -o json > output.tmp.json + +# Capture job tasks +jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json + +# Capture pipeline libraries +jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json + +# Expect failure for the "error" target +errcode trace $CLI bundle validate -t error diff --git a/acceptance/bundle/paths/nominal/script.cleanup b/acceptance/bundle/paths/nominal/script.cleanup new file mode 100644 index 000000000..f93425dff --- /dev/null +++ b/acceptance/bundle/paths/nominal/script.cleanup @@ -0,0 +1 @@ +rm -f output.tmp.json diff --git a/bundle/tests/path_translation/nominal/src/dbt_project/.gitkeep b/acceptance/bundle/paths/nominal/src/dbt_project/.gitkeep similarity index 100% rename from bundle/tests/path_translation/nominal/src/dbt_project/.gitkeep rename to acceptance/bundle/paths/nominal/src/dbt_project/.gitkeep diff --git a/bundle/tests/path_translation/nominal/src/file.py b/acceptance/bundle/paths/nominal/src/file.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file.py rename to acceptance/bundle/paths/nominal/src/file.py diff --git a/bundle/tests/path_translation/nominal/src/file1.py b/acceptance/bundle/paths/nominal/src/file1.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file1.py rename to acceptance/bundle/paths/nominal/src/file1.py diff --git a/bundle/tests/path_translation/nominal/src/file2.py b/acceptance/bundle/paths/nominal/src/file2.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file2.py rename to acceptance/bundle/paths/nominal/src/file2.py diff --git a/bundle/tests/path_translation/nominal/src/notebook.py b/acceptance/bundle/paths/nominal/src/notebook.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook.py rename to acceptance/bundle/paths/nominal/src/notebook.py diff --git a/bundle/tests/path_translation/nominal/src/notebook1.py b/acceptance/bundle/paths/nominal/src/notebook1.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook1.py rename to acceptance/bundle/paths/nominal/src/notebook1.py diff --git a/bundle/tests/path_translation/nominal/src/notebook2.py b/acceptance/bundle/paths/nominal/src/notebook2.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook2.py rename to acceptance/bundle/paths/nominal/src/notebook2.py diff --git a/bundle/tests/path_translation/nominal/src/sql.sql b/acceptance/bundle/paths/nominal/src/sql.sql similarity index 100% rename from bundle/tests/path_translation/nominal/src/sql.sql rename to acceptance/bundle/paths/nominal/src/sql.sql diff --git a/bundle/tests/relative_path_translation/databricks.yml b/acceptance/bundle/paths/relative_path_translation/databricks.yml similarity index 100% rename from bundle/tests/relative_path_translation/databricks.yml rename to acceptance/bundle/paths/relative_path_translation/databricks.yml diff --git a/acceptance/bundle/paths/relative_path_translation/output.default.json b/acceptance/bundle/paths/relative_path_translation/output.default.json new file mode 100644 index 000000000..e2514b392 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.default.json @@ -0,0 +1,6 @@ +{ + "paths": [ + "/Workspace/remote/src/file1.py", + "/Workspace/remote/src/file1.py" + ] +} diff --git a/acceptance/bundle/paths/relative_path_translation/output.override.json b/acceptance/bundle/paths/relative_path_translation/output.override.json new file mode 100644 index 000000000..729d2eaa0 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.override.json @@ -0,0 +1,6 @@ +{ + "paths": [ + "/Workspace/remote/src/file2.py", + "/Workspace/remote/src/file2.py" + ] +} diff --git a/acceptance/bundle/paths/relative_path_translation/output.txt b/acceptance/bundle/paths/relative_path_translation/output.txt new file mode 100644 index 000000000..b13d612b6 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.txt @@ -0,0 +1,4 @@ + +>>> [CLI] bundle validate -t default -o json + +>>> [CLI] bundle validate -t override -o json diff --git a/bundle/tests/relative_path_translation/resources/job.yml b/acceptance/bundle/paths/relative_path_translation/resources/job.yml similarity index 66% rename from bundle/tests/relative_path_translation/resources/job.yml rename to acceptance/bundle/paths/relative_path_translation/resources/job.yml index 93f121f25..9540ff1ad 100644 --- a/bundle/tests/relative_path_translation/resources/job.yml +++ b/acceptance/bundle/paths/relative_path_translation/resources/job.yml @@ -3,12 +3,20 @@ resources: job: tasks: - task_key: local + job_cluster_key: default spark_python_task: python_file: ../src/file1.py - task_key: variable_reference + job_cluster_key: default spark_python_task: # Note: this is a pure variable reference yet needs to persist the location # of the reference, not the location of the variable value. # Also see https://github.com/databricks/cli/issues/1330. python_file: ${var.file_path} + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/acceptance/bundle/paths/relative_path_translation/script b/acceptance/bundle/paths/relative_path_translation/script new file mode 100644 index 000000000..252e9a07f --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate -t default -o json | \ + jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.default.json +trace $CLI bundle validate -t override -o json | \ + jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.override.json diff --git a/bundle/tests/relative_path_translation/src/file1.py b/acceptance/bundle/paths/relative_path_translation/src/file1.py similarity index 100% rename from bundle/tests/relative_path_translation/src/file1.py rename to acceptance/bundle/paths/relative_path_translation/src/file1.py diff --git a/bundle/tests/relative_path_translation/src/file2.py b/acceptance/bundle/paths/relative_path_translation/src/file2.py similarity index 100% rename from bundle/tests/relative_path_translation/src/file2.py rename to acceptance/bundle/paths/relative_path_translation/src/file2.py diff --git a/bundle/tests/quality_monitor/databricks.yml b/acceptance/bundle/quality_monitor/databricks.yml similarity index 100% rename from bundle/tests/quality_monitor/databricks.yml rename to acceptance/bundle/quality_monitor/databricks.yml diff --git a/acceptance/bundle/quality_monitor/output.txt b/acceptance/bundle/quality_monitor/output.txt new file mode 100644 index 000000000..8a7f64ef2 --- /dev/null +++ b/acceptance/bundle/quality_monitor/output.txt @@ -0,0 +1,73 @@ + +>>> [CLI] bundle validate -o json -t development +{ + "mode": "development", + "quality_monitors": { + "my_monitor": { + "assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1", + "inference_log": { + "granularities": [ + "1 day" + ], + "model_id_col": "model_id", + "prediction_col": "prediction", + "problem_type": "PROBLEM_TYPE_REGRESSION", + "timestamp_col": "timestamp" + }, + "output_schema_name": "main.dev", + "schedule": null, + "table_name": "main.test.dev" + } + } +} + +>>> [CLI] bundle validate -o json -t staging +{ + "mode": null, + "quality_monitors": { + "my_monitor": { + "assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1", + "inference_log": { + "granularities": [ + "1 day" + ], + "model_id_col": "model_id", + "prediction_col": "prediction", + "problem_type": "PROBLEM_TYPE_REGRESSION", + "timestamp_col": "timestamp" + }, + "output_schema_name": "main.staging", + "schedule": { + "quartz_cron_expression": "0 0 12 * * ?", + "timezone_id": "UTC" + }, + "table_name": "main.test.staging" + } + } +} + +>>> [CLI] bundle validate -o json -t production +{ + "mode": null, + "quality_monitors": { + "my_monitor": { + "assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1", + "inference_log": { + "granularities": [ + "1 day", + "1 hour" + ], + "model_id_col": "model_id_prod", + "prediction_col": "prediction_prod", + "problem_type": "PROBLEM_TYPE_REGRESSION", + "timestamp_col": "timestamp_prod" + }, + "output_schema_name": "main.prod", + "schedule": { + "quartz_cron_expression": "0 0 12 * * ?", + "timezone_id": "UTC" + }, + "table_name": "main.test.prod" + } + } +} diff --git a/acceptance/bundle/quality_monitor/script b/acceptance/bundle/quality_monitor/script new file mode 100644 index 000000000..85a69d5e7 --- /dev/null +++ b/acceptance/bundle/quality_monitor/script @@ -0,0 +1,3 @@ +trace $CLI bundle validate -o json -t development | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }' +trace $CLI bundle validate -o json -t staging | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }' +trace $CLI bundle validate -o json -t production | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }' diff --git a/acceptance/bundle/scripts/databricks.yml b/acceptance/bundle/scripts/databricks.yml new file mode 100644 index 000000000..6421e2b59 --- /dev/null +++ b/acceptance/bundle/scripts/databricks.yml @@ -0,0 +1,11 @@ +bundle: + name: scripts + +experimental: + scripts: + preinit: "python3 ./myscript.py $EXITCODE preinit" + postinit: "python3 ./myscript.py 0 postinit" + prebuild: "python3 ./myscript.py 0 prebuild" + postbuild: "python3 ./myscript.py 0 postbuild" + predeploy: "python3 ./myscript.py 0 predeploy" + postdeploy: "python3 ./myscript.py 0 postdeploy" diff --git a/acceptance/bundle/scripts/myscript.py b/acceptance/bundle/scripts/myscript.py new file mode 100644 index 000000000..d10f497e1 --- /dev/null +++ b/acceptance/bundle/scripts/myscript.py @@ -0,0 +1,8 @@ +import sys + +info = " ".join(sys.argv[1:]) +sys.stderr.write(f"from myscript.py {info}: hello stderr!\n") +sys.stdout.write(f"from myscript.py {info}: hello stdout!\n") + +exitcode = int(sys.argv[1]) +sys.exit(exitcode) diff --git a/acceptance/bundle/scripts/output.txt b/acceptance/bundle/scripts/output.txt new file mode 100644 index 000000000..68afb2fec --- /dev/null +++ b/acceptance/bundle/scripts/output.txt @@ -0,0 +1,50 @@ + +>>> EXITCODE=0 errcode [CLI] bundle validate +Executing 'preinit' script +from myscript.py 0 preinit: hello stdout! +from myscript.py 0 preinit: hello stderr! +Executing 'postinit' script +from myscript.py 0 postinit: hello stdout! +from myscript.py 0 postinit: hello stderr! +Name: scripts +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/scripts/default + +Validation OK! + +>>> EXITCODE=1 errcode [CLI] bundle validate +Executing 'preinit' script +from myscript.py 1 preinit: hello stdout! +from myscript.py 1 preinit: hello stderr! +Error: failed to execute script: exit status 1 + +Name: scripts + +Found 1 error + +Exit code: 1 + +>>> EXITCODE=0 errcode [CLI] bundle deploy +Executing 'preinit' script +from myscript.py 0 preinit: hello stdout! +from myscript.py 0 preinit: hello stderr! +Executing 'postinit' script +from myscript.py 0 postinit: hello stdout! +from myscript.py 0 postinit: hello stderr! +Executing 'prebuild' script +from myscript.py 0 prebuild: hello stdout! +from myscript.py 0 prebuild: hello stderr! +Executing 'postbuild' script +from myscript.py 0 postbuild: hello stdout! +from myscript.py 0 postbuild: hello stderr! +Executing 'predeploy' script +from myscript.py 0 predeploy: hello stdout! +from myscript.py 0 predeploy: hello stderr! +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/scripts/default/files... +Deploying resources... +Deployment complete! +Executing 'postdeploy' script +from myscript.py 0 postdeploy: hello stdout! +from myscript.py 0 postdeploy: hello stderr! diff --git a/acceptance/bundle/scripts/script b/acceptance/bundle/scripts/script new file mode 100644 index 000000000..de07d277e --- /dev/null +++ b/acceptance/bundle/scripts/script @@ -0,0 +1,3 @@ +trace EXITCODE=0 errcode $CLI bundle validate +trace EXITCODE=1 errcode $CLI bundle validate +trace EXITCODE=0 errcode $CLI bundle deploy diff --git a/acceptance/bundle/syncroot/dotdot-git/databricks.yml b/acceptance/bundle/syncroot/dotdot-git/databricks.yml new file mode 100644 index 000000000..7215ffea2 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-git/databricks.yml @@ -0,0 +1,5 @@ +bundle: + name: test-bundle +sync: + paths: + - .. diff --git a/acceptance/bundle/syncroot/dotdot-git/output.txt b/acceptance/bundle/syncroot/dotdot-git/output.txt new file mode 100644 index 000000000..dbfc8451f --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-git/output.txt @@ -0,0 +1,11 @@ +Error: path "[TMPDIR]" is not within repository root "[TMPDIR]/myrepo" + +Name: test-bundle +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/syncroot/dotdot-git/script b/acceptance/bundle/syncroot/dotdot-git/script new file mode 100644 index 000000000..278e77101 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-git/script @@ -0,0 +1,8 @@ +# This should error, we do not allow syncroot outside of git repo. +mkdir myrepo +cd myrepo +cp ../databricks.yml . +git-repo-init +errcode $CLI bundle validate +cd .. +rm -fr myrepo diff --git a/acceptance/bundle/syncroot/dotdot-git/test.toml b/acceptance/bundle/syncroot/dotdot-git/test.toml new file mode 100644 index 000000000..f57f83ee4 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-git/test.toml @@ -0,0 +1,3 @@ +[[Repls]] +Old = '\\\\myrepo' +New = '/myrepo' diff --git a/acceptance/bundle/syncroot/dotdot-nogit/databricks.yml b/acceptance/bundle/syncroot/dotdot-nogit/databricks.yml new file mode 100644 index 000000000..7215ffea2 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-nogit/databricks.yml @@ -0,0 +1,5 @@ +bundle: + name: test-bundle +sync: + paths: + - .. diff --git a/acceptance/bundle/syncroot/dotdot-nogit/output.txt b/acceptance/bundle/syncroot/dotdot-nogit/output.txt new file mode 100644 index 000000000..4f189effd --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-nogit/output.txt @@ -0,0 +1,7 @@ +Name: test-bundle +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle/default + +Validation OK! diff --git a/acceptance/bundle/syncroot/dotdot-nogit/script b/acceptance/bundle/syncroot/dotdot-nogit/script new file mode 100644 index 000000000..d3388903e --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-nogit/script @@ -0,0 +1,2 @@ +# This should not error, syncroot can be outside bundle root. +$CLI bundle validate diff --git a/acceptance/bundle/templates-machinery/helpers-error/databricks_template_schema.json b/acceptance/bundle/templates-machinery/helpers-error/databricks_template_schema.json new file mode 100644 index 000000000..0967ef424 --- /dev/null +++ b/acceptance/bundle/templates-machinery/helpers-error/databricks_template_schema.json @@ -0,0 +1 @@ +{} diff --git a/acceptance/bundle/templates-machinery/helpers-error/output.txt b/acceptance/bundle/templates-machinery/helpers-error/output.txt new file mode 100644 index 000000000..6020e944f --- /dev/null +++ b/acceptance/bundle/templates-machinery/helpers-error/output.txt @@ -0,0 +1,3 @@ +Error: failed to compute file content for helpers.txt.tmpl. template: :1:14: executing "" at : error calling user_name: + +Exit code: 1 diff --git a/acceptance/bundle/templates-machinery/helpers-error/script b/acceptance/bundle/templates-machinery/helpers-error/script new file mode 100644 index 000000000..d9fcbf62c --- /dev/null +++ b/acceptance/bundle/templates-machinery/helpers-error/script @@ -0,0 +1 @@ +$CLI bundle init . diff --git a/acceptance/bundle/templates-machinery/helpers-error/template/helpers.txt.tmpl b/acceptance/bundle/templates-machinery/helpers-error/template/helpers.txt.tmpl new file mode 100644 index 000000000..70e60edac --- /dev/null +++ b/acceptance/bundle/templates-machinery/helpers-error/template/helpers.txt.tmpl @@ -0,0 +1 @@ +user_name: {{ user_name }} diff --git a/acceptance/bundle/templates-machinery/helpers-error/test.toml b/acceptance/bundle/templates-machinery/helpers-error/test.toml new file mode 100644 index 000000000..3839635db --- /dev/null +++ b/acceptance/bundle/templates-machinery/helpers-error/test.toml @@ -0,0 +1,6 @@ +Badness = '''(minor) error message is not great: executing "" at : error calling user_name:''' + +[[Server]] +Pattern = "GET /api/2.0/preview/scim/v2/Me" +Response.Body = '{}' +Response.StatusCode = 500 diff --git a/acceptance/bundle/templates-machinery/helpers/databricks_template_schema.json b/acceptance/bundle/templates-machinery/helpers/databricks_template_schema.json new file mode 100644 index 000000000..0967ef424 --- /dev/null +++ b/acceptance/bundle/templates-machinery/helpers/databricks_template_schema.json @@ -0,0 +1 @@ +{} diff --git a/acceptance/bundle/templates-machinery/helpers/output.txt b/acceptance/bundle/templates-machinery/helpers/output.txt new file mode 100644 index 000000000..3126ea5af --- /dev/null +++ b/acceptance/bundle/templates-machinery/helpers/output.txt @@ -0,0 +1,2 @@ +✨ Successfully initialized template +user_name: [USERNAME] diff --git a/acceptance/bundle/templates-machinery/helpers/script b/acceptance/bundle/templates-machinery/helpers/script new file mode 100644 index 000000000..1773e7b03 --- /dev/null +++ b/acceptance/bundle/templates-machinery/helpers/script @@ -0,0 +1,3 @@ +$CLI bundle init . +cat helpers.txt +rm helpers.txt diff --git a/acceptance/bundle/templates-machinery/helpers/template/helpers.txt.tmpl b/acceptance/bundle/templates-machinery/helpers/template/helpers.txt.tmpl new file mode 100644 index 000000000..70e60edac --- /dev/null +++ b/acceptance/bundle/templates-machinery/helpers/template/helpers.txt.tmpl @@ -0,0 +1 @@ +user_name: {{ user_name }} diff --git a/acceptance/bundle/templates-machinery/test.toml b/acceptance/bundle/templates-machinery/test.toml new file mode 100644 index 000000000..18b1a8841 --- /dev/null +++ b/acceptance/bundle/templates-machinery/test.toml @@ -0,0 +1 @@ +Cloud = false diff --git a/acceptance/bundle/templates-machinery/wrong-path/output.txt b/acceptance/bundle/templates-machinery/wrong-path/output.txt new file mode 100644 index 000000000..0a6fdfc84 --- /dev/null +++ b/acceptance/bundle/templates-machinery/wrong-path/output.txt @@ -0,0 +1,3 @@ +Error: not a bundle template: expected to find a template schema file at databricks_template_schema.json + +Exit code: 1 diff --git a/acceptance/bundle/templates-machinery/wrong-path/script b/acceptance/bundle/templates-machinery/wrong-path/script new file mode 100644 index 000000000..00c05927a --- /dev/null +++ b/acceptance/bundle/templates-machinery/wrong-path/script @@ -0,0 +1,2 @@ +export NO_COLOR=1 +$CLI bundle init /DOES/NOT/EXIST diff --git a/acceptance/bundle/templates-machinery/wrong-path/test.toml b/acceptance/bundle/templates-machinery/wrong-path/test.toml new file mode 100644 index 000000000..4bbcb5100 --- /dev/null +++ b/acceptance/bundle/templates-machinery/wrong-path/test.toml @@ -0,0 +1 @@ +Badness = 'The error message should include full path: "expected to find a template schema file at databricks_template_schema.json"' diff --git a/acceptance/bundle/templates-machinery/wrong-url/output.txt b/acceptance/bundle/templates-machinery/wrong-url/output.txt new file mode 100644 index 000000000..6b4f9c459 --- /dev/null +++ b/acceptance/bundle/templates-machinery/wrong-url/output.txt @@ -0,0 +1,5 @@ +Error: git clone failed: git clone https://invalid-domain-123.databricks.com/hello/world [TMPDIR]_GPARENT/world-123456 --no-tags --depth=1: exit status 128. Cloning into '[TMPDIR]_GPARENT/world-123456'... +fatal: unable to access 'https://invalid-domain-123.databricks.com/hello/world/': Could not resolve host: invalid-domain-123.databricks.com + + +Exit code: 1 diff --git a/acceptance/bundle/templates-machinery/wrong-url/script b/acceptance/bundle/templates-machinery/wrong-url/script new file mode 100644 index 000000000..e9bc0f4f6 --- /dev/null +++ b/acceptance/bundle/templates-machinery/wrong-url/script @@ -0,0 +1,2 @@ +export NO_COLOR=1 +$CLI bundle init https://invalid-domain-123.databricks.com/hello/world diff --git a/acceptance/bundle/templates-machinery/wrong-url/test.toml b/acceptance/bundle/templates-machinery/wrong-url/test.toml new file mode 100644 index 000000000..0bb24bf1a --- /dev/null +++ b/acceptance/bundle/templates-machinery/wrong-url/test.toml @@ -0,0 +1,7 @@ +[[Repls]] +Old = '\\' +New = '/' + +[[Repls]] +Old = '/world-[0-9]+' +New = '/world-123456' diff --git a/acceptance/bundle/templates/dbt-sql/input.json b/acceptance/bundle/templates/dbt-sql/input.json new file mode 100644 index 000000000..201ac9667 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_dbt_sql", + "http_path": "/sql/2.0/warehouses/f00dcafe", + "default_catalog": "main", + "personal_schemas": "yes, use a schema based on the current user name during development" +} diff --git a/acceptance/bundle/templates/dbt-sql/output.txt b/acceptance/bundle/templates/dbt-sql/output.txt new file mode 100644 index 000000000..2699ad554 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output.txt @@ -0,0 +1,32 @@ + +>>> [CLI] bundle init dbt-sql --config-file ./input.json --output-dir output + +Welcome to the dbt template for Databricks Asset Bundles! + +A workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html. +workspace_host: [DATABRICKS_URL] + +📊 Your new project has been created in the 'my_dbt_sql' directory! +If you already have dbt installed, just type 'cd my_dbt_sql; dbt init' to get started. +Refer to the README.md file for full "getting started" guide and production setup instructions. + + +>>> [CLI] bundle validate -t dev +Name: my_dbt_sql +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_dbt_sql/dev + +Validation OK! + +>>> [CLI] bundle validate -t prod +Name: my_dbt_sql +Target: prod +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_dbt_sql/prod + +Validation OK! diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json new file mode 100644 index 000000000..28fe943fd --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "redhat.vscode-yaml", + "innoverio.vscode-dbt-power-user", + ] +} diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json new file mode 100644 index 000000000..e8dcd1a83 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json @@ -0,0 +1,32 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "python.envFile": "${workspaceFolder}/.databricks/.databricks.env", + "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "hive_metastore", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/2.0/warehouses/f00dcafe" + } + ], + "sqltools.autoConnectTo": "", + "[jinja-sql]": { + "editor.defaultFormatter": "innoverio.vscode-dbt-power-user" + } +} diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md new file mode 100644 index 000000000..756a2eda4 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md @@ -0,0 +1,138 @@ +# my_dbt_sql + +The 'my_dbt_sql' project was generated by using the dbt template for +Databricks Asset Bundles. It follows the standard dbt project structure +and has an additional `resources` directory to define Databricks resources such as jobs +that run dbt models. + +* Learn more about dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. +* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html + +The remainder of this file includes instructions for local development (using dbt) +and deployment to production (using Databricks Asset Bundles). + +## Development setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. Install dbt + + To install dbt, you need a recent version of Python. For the instructions below, + we assume `python3` refers to the Python version you want to use. On some systems, + you may need to refer to a different Python version, e.g. `python` or `/usr/bin/python`. + + Run these instructions from the `my_dbt_sql` directory. We recommend making + use of a Python virtual environment and installing dbt as follows: + + ``` + $ python3 -m venv .venv + $ . .venv/bin/activate + $ pip install -r requirements-dev.txt + ``` + +4. Initialize your dbt profile + + Use `dbt init` to initialize your profile. + + ``` + $ dbt init + ``` + + Note that dbt authentication uses personal access tokens by default + (see https://docs.databricks.com/dev-tools/auth/pat.html). + You can use OAuth as an alternative, but this currently requires manual configuration. + See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md + for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605 + for advice on setting up OAuth for Azure Databricks. + + To setup up additional profiles, such as a 'prod' profile, + see https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles. + +5. Activate dbt so it can be used from the terminal + + ``` + $ . .venv/bin/activate + ``` + +## Local development with dbt + +Use `dbt` to [run this project locally using a SQL warehouse](https://docs.databricks.com/partners/prep/dbt.html): + +``` +$ dbt seed +$ dbt run +``` + +(Did you get an error that the dbt command could not be found? You may need +to try the last step from the development setup above to re-activate +your Python virtual environment!) + + +To just evaluate a single model defined in a file called orders.sql, use: + +``` +$ dbt run --model orders +``` + +Use `dbt test` to run tests generated from yml files such as `models/schema.yml` +and any SQL tests from `tests/` + +``` +$ dbt test +``` + +## Production setup + +Your production dbt profiles are defined in dbt_profiles/profiles.yml. +These profiles define the default catalog, schema, and any other +target-specific settings. Read more about dbt profiles on Databricks at +https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. + +The target workspaces for staging and prod are defined in databricks.yml. +You can manually deploy based on these configurations (see below). +Or you can use CI/CD to automate deployment. See +https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation +on CI/CD setup. + +## Manually deploying to Databricks with Databricks Asset Bundles + +Databricks Asset Bundles can be used to deploy to Databricks and to execute +dbt commands as a job using Databricks Workflows. See +https://docs.databricks.com/dev-tools/bundles/index.html to learn more. + +Use the Databricks CLI to deploy a development copy of this project to a workspace: + +``` +$ databricks bundle deploy --target dev +``` + +(Note that "dev" is the default target, so the `--target` parameter +is optional here.) + +This deploys everything that's defined for this project. +For example, the default template would deploy a job called +`[dev yourname] my_dbt_sql_job` to your workspace. +You can find that job by opening your workpace and clicking on **Workflows**. + +You can also deploy to your production target directly from the command-line. +The warehouse, catalog, and schema for that target are configured in databricks.yml. +When deploying to this target, note that the default job at resources/my_dbt_sql.job.yml +has a schedule set that runs every day. The schedule is paused when deploying in development mode +(see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +To deploy a production copy, type: + +``` +$ databricks bundle deploy --target prod +``` + +## IDE support + +Optionally, install developer tools such as the Databricks extension for Visual Studio Code from +https://docs.databricks.com/dev-tools/vscode-ext.html. Third-party extensions +related to dbt may further enhance your dbt development experience! diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml new file mode 100644 index 000000000..4285a44eb --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml @@ -0,0 +1,32 @@ +# This file defines the structure of this project and how it is deployed +# to production using Databricks Asset Bundles. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_dbt_sql + uuid: [UUID] + +include: + - resources/*.yml + +# Deployment targets. +# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml new file mode 100644 index 000000000..fdaf30dda --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml @@ -0,0 +1,38 @@ + +# This file defines dbt profiles for deployed dbt jobs. +my_dbt_sql: + target: dev # default target + outputs: + + # Doing local development with the dbt CLI? + # Then you should create your own profile in your .dbt/profiles.yml using 'dbt init' + # (See README.md) + + # The default target when deployed with the Databricks CLI + # N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml + dev: + type: databricks + method: http + catalog: main + schema: "{{ var('dev_schema') }}" + + http_path: /sql/2.0/warehouses/f00dcafe + + # The workspace host / token are provided by Databricks + # see databricks.yml for the workspace host used for 'dev' + host: "{{ env_var('DBT_HOST') }}" + token: "{{ env_var('DBT_ACCESS_TOKEN') }}" + + # The production target when deployed with the Databricks CLI + prod: + type: databricks + method: http + catalog: main + schema: default + + http_path: /sql/2.0/warehouses/f00dcafe + + # The workspace host / token are provided by Databricks + # see databricks.yml for the workspace host used for 'prod' + host: "{{ env_var('DBT_HOST') }}" + token: "{{ env_var('DBT_ACCESS_TOKEN') }}" diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml new file mode 100644 index 000000000..4218640d8 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml @@ -0,0 +1,32 @@ +name: 'my_dbt_sql' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'my_dbt_sql' + +# These configurations specify where dbt should look for different types of files. +# For Databricks asset bundles, we put everything in src, as you may have +# non-dbt resources in your project. +model-paths: ["src/models"] +analysis-paths: ["src/analyses"] +test-paths: ["src/tests"] +seed-paths: ["src/seeds"] +macro-paths: ["src/macros"] +snapshot-paths: ["src/snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views by default. These settings can be overridden in the +# individual model files using the `{{ config(...) }}` macro. +models: + my_dbt_sql: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/out.gitignore b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/out.gitignore new file mode 100644 index 000000000..231162918 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/out.gitignore @@ -0,0 +1,15 @@ +# DABs +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md + +# dbt +target/ +dbt_packages/ +dbt_modules/ +logs/ diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml new file mode 100644 index 000000000..bdb41ab20 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml @@ -0,0 +1,23 @@ +# This file defines prompts with defaults for dbt initializaton. +# It is used when the `dbt init` command is invoked. +# +fixed: + type: databricks +prompts: + host: + default: [DATABRICKS_HOST] + token: + hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + hide_input: true + http_path: + hint: 'HTTP path of SQL warehouse to use' + default: /sql/2.0/warehouses/f00dcafe + catalog: + hint: 'initial catalog' + default: main + schema: + hint: 'personal schema where dbt will build objects during development, example: [USERNAME]' + threads: + hint: 'threads to use during development, 1 or more' + type: 'int' + default: 4 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt new file mode 100644 index 000000000..e6b861203 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt @@ -0,0 +1,3 @@ +## requirements-dev.txt: dependencies for local development. + +dbt-databricks>=1.8.0,<2.0.0 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml new file mode 100644 index 000000000..b522931f9 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml @@ -0,0 +1,43 @@ +resources: + jobs: + my_dbt_sql_job: + name: my_dbt_sql_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - [USERNAME] + + + tasks: + - task_key: dbt + + dbt_task: + project_directory: ../ + # The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml + profiles_directory: dbt_profiles/ + commands: + # The dbt commands to run (see also dbt_profiles/profiles.yml; dev_schema is used in the dev profile) + - 'dbt deps --target=${bundle.target}' + - 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + - 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + + libraries: + - pypi: + package: dbt-databricks>=1.8.0,<2.0.0 + + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/analyses/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/macros/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql new file mode 100644 index 000000000..e32736ceb --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql @@ -0,0 +1,17 @@ + +-- This model file defines a materialized view called 'orders_daily' +-- +-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'materialized_view') }} + +select order_date, count(*) AS number_of_orders + +from {{ ref('orders_raw') }} + +-- During development, only process a smaller range of data +{% if target.name != 'prod' %} +where order_date >= '2019-08-01' and order_date < '2019-09-01' +{% endif %} + +group by order_date diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql new file mode 100644 index 000000000..8faf8f38b --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql @@ -0,0 +1,16 @@ +-- This model file defines a streaming table called 'orders_raw' +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ +-- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'streaming_table') }} + +select + customer_name, + date(timestamp(from_unixtime(try_cast(order_datetime as bigint)))) as order_date, + order_number +from stream read_files( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml new file mode 100644 index 000000000..c64f1bfce --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml @@ -0,0 +1,21 @@ + +version: 2 + +models: + - name: orders_raw + description: "Raw ingested orders" + columns: + - name: customer_name + description: "The name of a customer" + data_tests: + - unique + - not_null + + - name: orders_daily + description: "Number of orders by day" + columns: + - name: order_date + description: "The date on which orders took place" + data_tests: + - unique + - not_null diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/seeds/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/snapshots/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/tests/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/script b/acceptance/bundle/templates/dbt-sql/script new file mode 100644 index 000000000..3a2660de5 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/script @@ -0,0 +1,8 @@ +trace $CLI bundle init dbt-sql --config-file ./input.json --output-dir output + +cd output/my_dbt_sql +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod + +# Do not affect this repository's git behaviour #2318 +mv .gitignore out.gitignore diff --git a/acceptance/bundle/templates/default-python/classic/input.json b/acceptance/bundle/templates/default-python/classic/input.json new file mode 100644 index 000000000..2c4416c00 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/input.json @@ -0,0 +1,7 @@ +{ + "project_name": "my_default_python", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes", + "serverless": "no" +} diff --git a/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff new file mode 100644 index 000000000..6890badf0 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff @@ -0,0 +1,54 @@ +--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.job.yml ++++ output/my_default_python/resources/my_default_python.job.yml +@@ -17,4 +17,5 @@ + tasks: + - task_key: notebook_task ++ job_cluster_key: job_cluster + notebook_task: + notebook_path: ../src/notebook.ipynb +@@ -29,17 +30,21 @@ + depends_on: + - task_key: refresh_pipeline +- environment_key: default ++ job_cluster_key: job_cluster + python_wheel_task: + package_name: my_default_python + entry_point: main ++ libraries: ++ # By default we just include the .whl file generated for the my_default_python package. ++ # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html ++ # for more information on how to add other libraries. ++ - whl: ../dist/*.whl + +- # A list of task execution environment specifications that can be referenced by tasks of this job. +- environments: +- - environment_key: default +- +- # Full documentation of this spec can be found at: +- # https://docs.databricks.com/api/workspace/jobs/create#environments-spec +- spec: +- client: "1" +- dependencies: +- - ../dist/*.whl ++ job_clusters: ++ - job_cluster_key: job_cluster ++ new_cluster: ++ spark_version: 15.4.x-scala2.12 ++ node_type_id: i3.xlarge ++ data_security_mode: SINGLE_USER ++ autoscale: ++ min_workers: 1 ++ max_workers: 4 +--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml ++++ output/my_default_python/resources/my_default_python.pipeline.yml +@@ -4,8 +4,7 @@ + my_default_python_pipeline: + name: my_default_python_pipeline +- ## Catalog is required for serverless compute +- catalog: main ++ ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: ++ # catalog: catalog_name + target: my_default_python_${bundle.target} +- serverless: true + libraries: + - notebook: diff --git a/acceptance/bundle/templates/default-python/classic/output.txt b/acceptance/bundle/templates/default-python/classic/output.txt new file mode 100644 index 000000000..930e756de --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> [CLI] bundle validate -t dev +Name: my_default_python +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev + +Validation OK! + +>>> [CLI] bundle validate -t prod +Name: my_default_python +Target: prod +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json new file mode 100644 index 000000000..5d15eba36 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json new file mode 100644 index 000000000..8ee87c30d --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json @@ -0,0 +1,16 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, +} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md new file mode 100644 index 000000000..10f570bf4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md @@ -0,0 +1,49 @@ +# my_default_python + +The 'my_default_python' project was generated by using the default-python template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_python_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_default_python.job.yml). The schedule + is paused when deploying in development mode (see + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +5. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` +6. Optionally, install the Databricks extension for Visual Studio code for local development from + https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your + virtual environment and setup Databricks Connect for running unit tests locally. + When not using these tools, consult your development environment's documentation + and/or the documentation for Databricks Connect for manually setting up your environment + (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html). + +7. For documentation on the Databricks asset bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml new file mode 100644 index 000000000..6080a368f --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml @@ -0,0 +1,29 @@ +# This is a Databricks asset bundle definition for my_default_python. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_python + uuid: [UUID] + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore b/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini new file mode 100644 index 000000000..80432c220 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = src diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt b/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt new file mode 100644 index 000000000..0ffbf6aed --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt @@ -0,0 +1,29 @@ +## requirements-dev.txt: dependencies for local development. +## +## For defining dependencies used by jobs in Databricks Workflows, see +## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + +## Add code completion support for DLT +databricks-dlt + +## pytest is the default package used for testing +pytest + +## Dependencies for building wheel files +setuptools +wheel + +## databricks-connect can be used to run parts of this project locally. +## See https://docs.databricks.com/dev-tools/databricks-connect.html. +## +## databricks-connect is automatically installed if you're using Databricks +## extension for Visual Studio Code +## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). +## +## To manually install databricks-connect, either follow the instructions +## at https://docs.databricks.com/dev-tools/databricks-connect.html +## to install the package system-wide. Or uncomment the line below to install a +## version of db-connect that corresponds to the Databricks Runtime version used +## for this project. +# +# databricks-connect>=15.4,<15.5 diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml new file mode 100644 index 000000000..7c11e143f --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml @@ -0,0 +1,50 @@ +# The main job for my_default_python. +resources: + jobs: + my_default_python_job: + name: my_default_python_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - [USERNAME] + + tasks: + - task_key: notebook_task + job_cluster_key: job_cluster + notebook_task: + notebook_path: ../src/notebook.ipynb + + - task_key: refresh_pipeline + depends_on: + - task_key: notebook_task + pipeline_task: + pipeline_id: ${resources.pipelines.my_default_python_pipeline.id} + + - task_key: main_task + depends_on: + - task_key: refresh_pipeline + job_cluster_key: job_cluster + python_wheel_task: + package_name: my_default_python + entry_point: main + libraries: + # By default we just include the .whl file generated for the my_default_python package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + - whl: ../dist/*.whl + + job_clusters: + - job_cluster_key: job_cluster + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + autoscale: + min_workers: 1 + max_workers: 4 diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml new file mode 100644 index 000000000..4176f765d --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml @@ -0,0 +1,14 @@ +# The main pipeline for my_default_python +resources: + pipelines: + my_default_python_pipeline: + name: my_default_python_pipeline + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + # catalog: catalog_name + target: my_default_python_${bundle.target} + libraries: + - notebook: + path: ../src/dlt_pipeline.ipynb + + configuration: + bundle.sourcePath: ${workspace.file_path}/src diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb new file mode 100644 index 000000000..a12773d4e --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append(\"../src\")\n", + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "ipynb-notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py new file mode 100644 index 000000000..548f1035e --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py @@ -0,0 +1,41 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_default_python project. +""" + +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import datetime +import my_default_python + +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + +setup( + name="my_default_python", + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + version=my_default_python.__version__ + "+" + local_version, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb new file mode 100644 index 000000000..8a02183e7 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Import DLT and src/my_default_python\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from my_default_python import main" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb new file mode 100644 index 000000000..472ccb219 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py new file mode 100644 index 000000000..dc449154a --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py @@ -0,0 +1,6 @@ +from my_default_python.main import get_taxis, get_spark + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/default-python/classic/script b/acceptance/bundle/templates/default-python/classic/script new file mode 100644 index 000000000..7e5524065 --- /dev/null +++ b/acceptance/bundle/templates/default-python/classic/script @@ -0,0 +1,13 @@ +trace $CLI bundle init default-python --config-file ./input.json --output-dir output + +cd output/my_default_python +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod + +# Do not affect this repository's git behaviour #2318 +mv .gitignore out.gitignore + +cd ../../ + +# Calculate the difference from the serverless template +diff.py $TESTDIR/../serverless/output output/ > out.compare-vs-serverless.diff diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt new file mode 100644 index 000000000..30726013b --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt @@ -0,0 +1,22 @@ + +>>> [CLI] bundle init default-python --config-file [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> diff.py [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output output/ +--- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml ++++ output/my_default_python/resources/my_default_python.pipeline.yml +@@ -4,6 +4,5 @@ + my_default_python_pipeline: + name: my_default_python_pipeline +- ## Catalog is required for serverless compute +- catalog: main ++ catalog: customcatalog + target: my_default_python_${bundle.target} + serverless: true diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/script b/acceptance/bundle/templates/default-python/serverless-customcatalog/script new file mode 100644 index 000000000..2d1597c81 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/script @@ -0,0 +1,4 @@ +trace $CLI bundle init default-python --config-file $TESTDIR/../serverless/input.json --output-dir output +mv output/my_default_python/.gitignore output/my_default_python/out.gitignore +trace diff.py $TESTDIR/../serverless/output output/ +rm -fr output diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml b/acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml new file mode 100644 index 000000000..4029057be --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/test.toml @@ -0,0 +1,8 @@ +[[Server]] +Pattern = "GET /api/2.1/unity-catalog/current-metastore-assignment" +Response.Body = '{"default_catalog_name": "customcatalog"}' + +[[Repls]] +# windows fix +Old = '\\' +New = '/' diff --git a/acceptance/bundle/templates/default-python/serverless/input.json b/acceptance/bundle/templates/default-python/serverless/input.json new file mode 100644 index 000000000..b1ae9a2ba --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/input.json @@ -0,0 +1,7 @@ +{ + "project_name": "my_default_python", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes", + "serverless": "yes" +} diff --git a/acceptance/bundle/templates/default-python/serverless/output.txt b/acceptance/bundle/templates/default-python/serverless/output.txt new file mode 100644 index 000000000..930e756de --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL] + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> [CLI] bundle validate -t dev +Name: my_default_python +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev + +Validation OK! + +>>> [CLI] bundle validate -t prod +Name: my_default_python +Target: prod +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json new file mode 100644 index 000000000..5d15eba36 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json new file mode 100644 index 000000000..8ee87c30d --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/.vscode/settings.json @@ -0,0 +1,16 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md new file mode 100644 index 000000000..10f570bf4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/README.md @@ -0,0 +1,49 @@ +# my_default_python + +The 'my_default_python' project was generated by using the default-python template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_python_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_default_python.job.yml). The schedule + is paused when deploying in development mode (see + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +5. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` +6. Optionally, install the Databricks extension for Visual Studio code for local development from + https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your + virtual environment and setup Databricks Connect for running unit tests locally. + When not using these tools, consult your development environment's documentation + and/or the documentation for Databricks Connect for manually setting up your environment + (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html). + +7. For documentation on the Databricks asset bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml new file mode 100644 index 000000000..6080a368f --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/databricks.yml @@ -0,0 +1,29 @@ +# This is a Databricks asset bundle definition for my_default_python. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_python + uuid: [UUID] + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/out.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pytest.ini b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pytest.ini new file mode 100644 index 000000000..80432c220 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = src diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/requirements-dev.txt b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/requirements-dev.txt new file mode 100644 index 000000000..0ffbf6aed --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/requirements-dev.txt @@ -0,0 +1,29 @@ +## requirements-dev.txt: dependencies for local development. +## +## For defining dependencies used by jobs in Databricks Workflows, see +## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + +## Add code completion support for DLT +databricks-dlt + +## pytest is the default package used for testing +pytest + +## Dependencies for building wheel files +setuptools +wheel + +## databricks-connect can be used to run parts of this project locally. +## See https://docs.databricks.com/dev-tools/databricks-connect.html. +## +## databricks-connect is automatically installed if you're using Databricks +## extension for Visual Studio Code +## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). +## +## To manually install databricks-connect, either follow the instructions +## at https://docs.databricks.com/dev-tools/databricks-connect.html +## to install the package system-wide. Or uncomment the line below to install a +## version of db-connect that corresponds to the Databricks Runtime version used +## for this project. +# +# databricks-connect>=15.4,<15.5 diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml new file mode 100644 index 000000000..cc5aeb71c --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.job.yml @@ -0,0 +1,45 @@ +# The main job for my_default_python. +resources: + jobs: + my_default_python_job: + name: my_default_python_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - [USERNAME] + + tasks: + - task_key: notebook_task + notebook_task: + notebook_path: ../src/notebook.ipynb + + - task_key: refresh_pipeline + depends_on: + - task_key: notebook_task + pipeline_task: + pipeline_id: ${resources.pipelines.my_default_python_pipeline.id} + + - task_key: main_task + depends_on: + - task_key: refresh_pipeline + environment_key: default + python_wheel_task: + package_name: my_default_python + entry_point: main + + # A list of task execution environment specifications that can be referenced by tasks of this job. + environments: + - environment_key: default + + # Full documentation of this spec can be found at: + # https://docs.databricks.com/api/workspace/jobs/create#environments-spec + spec: + client: "1" + dependencies: + - ../dist/*.whl diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml new file mode 100644 index 000000000..6dac62ded --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml @@ -0,0 +1,15 @@ +# The main pipeline for my_default_python +resources: + pipelines: + my_default_python_pipeline: + name: my_default_python_pipeline + ## Catalog is required for serverless compute + catalog: main + target: my_default_python_${bundle.target} + serverless: true + libraries: + - notebook: + path: ../src/dlt_pipeline.ipynb + + configuration: + bundle.sourcePath: ${workspace.file_path}/src diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb new file mode 100644 index 000000000..a12773d4e --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/scratch/exploration.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append(\"../src\")\n", + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "ipynb-notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/setup.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/setup.py new file mode 100644 index 000000000..548f1035e --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/setup.py @@ -0,0 +1,41 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_default_python project. +""" + +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import datetime +import my_default_python + +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + +setup( + name="my_default_python", + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + version=my_default_python.__version__ + "+" + local_version, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb new file mode 100644 index 000000000..8a02183e7 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/dlt_pipeline.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Import DLT and src/my_default_python\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from my_default_python import main" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/__init__.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/my_default_python/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb new file mode 100644 index 000000000..472ccb219 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py new file mode 100644 index 000000000..dc449154a --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/tests/main_test.py @@ -0,0 +1,6 @@ +from my_default_python.main import get_taxis, get_spark + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/default-python/serverless/script b/acceptance/bundle/templates/default-python/serverless/script new file mode 100644 index 000000000..e5fcb7741 --- /dev/null +++ b/acceptance/bundle/templates/default-python/serverless/script @@ -0,0 +1,8 @@ +trace $CLI bundle init default-python --config-file ./input.json --output-dir output + +cd output/my_default_python +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod + +# Do not affect this repository's git behaviour #2318 +mv .gitignore out.gitignore diff --git a/acceptance/bundle/templates/default-sql/.ruff.toml b/acceptance/bundle/templates/default-sql/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/acceptance/bundle/templates/default-sql/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/acceptance/bundle/templates/default-sql/input.json b/acceptance/bundle/templates/default-sql/input.json new file mode 100644 index 000000000..c728d25de --- /dev/null +++ b/acceptance/bundle/templates/default-sql/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_default_sql", + "http_path": "/sql/2.0/warehouses/f00dcafe", + "default_catalog": "main", + "personal_schemas": "yes, automatically use a schema based on the current user name during development" +} diff --git a/acceptance/bundle/templates/default-sql/output.txt b/acceptance/bundle/templates/default-sql/output.txt new file mode 100644 index 000000000..06eff962b --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output.txt @@ -0,0 +1,32 @@ + +>>> [CLI] bundle init default-sql --config-file ./input.json --output-dir output + +Welcome to the default SQL template for Databricks Asset Bundles! + +A workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html. +workspace_host: [DATABRICKS_URL] + +✨ Your new project has been created in the 'my_default_sql' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> [CLI] bundle validate -t dev +Name: my_default_sql +Target: dev +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_sql/dev + +Validation OK! + +>>> [CLI] bundle validate -t prod +Name: my_default_sql +Target: prod +Workspace: + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/my_default_sql/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json new file mode 100644 index 000000000..8e1023465 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "redhat.vscode-yaml", + "databricks.sqltools-databricks-driver", + ] +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json new file mode 100644 index 000000000..c641abe39 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json @@ -0,0 +1,27 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "main", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/2.0/warehouses/f00dcafe" + } + ], + "sqltools.autoConnectTo": "", +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md new file mode 100644 index 000000000..67ded153f --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md @@ -0,0 +1,41 @@ +# my_default_sql + +The 'my_default_sql' project was generated by using the default-sql template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/install.html + +2. Authenticate to your Databricks workspace (if you have not done so already): + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_sql_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + +5. To run a job, use the "run" command: + ``` + $ databricks bundle run + ``` + +6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. + +7. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml new file mode 100644 index 000000000..07562ce7a --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml @@ -0,0 +1,46 @@ +# This is a Databricks asset bundle definition for my_default_sql. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_sql + uuid: [UUID] + +include: + - resources/*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + warehouse_id: + description: The warehouse to use + catalog: + description: The catalog to use + schema: + description: The schema to use + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + variables: + warehouse_id: f00dcafe + catalog: main + schema: ${workspace.current_user.short_name} + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + variables: + warehouse_id: f00dcafe + catalog: main + schema: default + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/out.gitignore b/acceptance/bundle/templates/default-sql/output/my_default_sql/out.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/out.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml new file mode 100644 index 000000000..34d60e3d5 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml @@ -0,0 +1,38 @@ +# A job running SQL queries on a SQL warehouse +resources: + jobs: + my_default_sql_sql_job: + name: my_default_sql_sql_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - [USERNAME] + + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} + - name: bundle_target + default: ${bundle.target} + + tasks: + - task_key: orders_raw + sql_task: + warehouse_id: ${var.warehouse_id} + file: + path: ../src/orders_raw.sql + + - task_key: orders_daily + depends_on: + - task_key: orders_raw + sql_task: + warehouse_id: ${var.warehouse_id} + file: + path: ../src/orders_daily.sql diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md new file mode 100644 index 000000000..5350d09cf --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks and SQL files. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb new file mode 100644 index 000000000..f3976c1de --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb @@ -0,0 +1,35 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "SELECT * FROM json.`/databricks-datasets/nyctaxi/sample/json/`" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "exploration", + "widgets": {} + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql new file mode 100644 index 000000000..ea7b80b54 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql @@ -0,0 +1,21 @@ +-- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) + +USE CATALOG {{catalog}}; +USE IDENTIFIER({{schema}}); + +CREATE OR REPLACE MATERIALIZED VIEW + orders_daily +AS SELECT + order_date, count(*) AS number_of_orders +FROM + orders_raw + +WHERE if( + {{bundle_target}} = "prod", + true, + + -- During development, only process a smaller range of data + order_date >= '2019-08-01' AND order_date < '2019-09-01' +) + +GROUP BY order_date diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql new file mode 100644 index 000000000..79b1354cf --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql @@ -0,0 +1,19 @@ +-- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ +-- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html + +USE CATALOG {{catalog}}; +USE IDENTIFIER({{schema}}); + +CREATE OR REFRESH STREAMING TABLE + orders_raw +AS SELECT + customer_name, + DATE(TIMESTAMP(FROM_UNIXTIME(TRY_CAST(order_datetime AS BIGINT)))) AS order_date, + order_number +FROM STREAM READ_FILES( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/acceptance/bundle/templates/default-sql/script b/acceptance/bundle/templates/default-sql/script new file mode 100644 index 000000000..7ea0d863c --- /dev/null +++ b/acceptance/bundle/templates/default-sql/script @@ -0,0 +1,8 @@ +trace $CLI bundle init default-sql --config-file ./input.json --output-dir output + +cd output/my_default_sql +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod + +# Do not affect this repository's git behaviour #2318 +mv .gitignore out.gitignore diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/input.json b/acceptance/bundle/templates/experimental-jobs-as-code/input.json new file mode 100644 index 000000000..748076c75 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/input.json @@ -0,0 +1,5 @@ +{ + "project_name": "my_jobs_as_code", + "include_notebook": "yes", + "include_python": "yes" +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt new file mode 100644 index 000000000..2099dd498 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt @@ -0,0 +1,88 @@ + +>>> [CLI] bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output + +Welcome to (EXPERIMENTAL) "Jobs as code" template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): [DATABRICKS_URL] + +✨ Your new project has been created in the 'my_jobs_as_code' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> [CLI] bundle validate -t dev --output json +Warning: Ignoring Databricks CLI version constraint for development build. Required: >= 0.238.0, current: [DEV_VERSION] + +{ + "jobs": { + "my_jobs_as_code_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "email_notifications": { + "on_failure": [ + "[USERNAME]" + ] + }, + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "autoscale": { + "max_workers": 4, + "min_workers": 1 + }, + "data_security_mode": "SINGLE_USER", + "node_type_id": "i3.xlarge", + "spark_version": "15.4.x-scala2.12" + } + } + ], + "max_concurrent_runs": 4, + "name": "[dev [USERNAME]] my_jobs_as_code_job", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": { + "dev": "[USERNAME]" + }, + "tasks": [ + { + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/my_jobs_as_code/dev/files/src/notebook" + }, + "task_key": "notebook_task" + }, + { + "depends_on": [ + { + "task_key": "notebook_task" + } + ], + "job_cluster_key": "job_cluster", + "libraries": [ + { + "whl": "dist/*.whl" + } + ], + "python_wheel_task": { + "entry_point": "main", + "package_name": "my_jobs_as_code" + }, + "task_key": "main_task" + } + ], + "trigger": { + "pause_status": "PAUSED", + "periodic": { + "interval": 1, + "unit": "DAYS" + } + } + } + } +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md new file mode 100644 index 000000000..8c429c6e5 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md @@ -0,0 +1,58 @@ +# my_jobs_as_code + +The 'my_jobs_as_code' project was generated by using the "Jobs as code" template. + +## Prerequisites + +1. Install Databricks CLI 0.238 or later. + See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). + +2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). + We use uv to create a virtual environment and install the required dependencies. + +3. Authenticate to your Databricks workspace if you have not done so already: + ``` + $ databricks configure + ``` + +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + +5. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. + +## Deploy and run jobs + +1. Create a new virtual environment and install the required dependencies: + ``` + $ uv sync + ``` + +2. To deploy the bundle to the development target: + ``` + $ databricks bundle deploy --target dev + ``` + + *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_jobs_as_code_job` to your workspace. + You can find that job by opening your workspace and clicking on **Workflows**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_jobs_as_code_job.py). The schedule + is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). + +4. To run a job: + ``` + $ databricks bundle run + ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml new file mode 100644 index 000000000..9299c96e8 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml @@ -0,0 +1,49 @@ +# This is a Databricks asset bundle definition for my_jobs_as_code. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_jobs_as_code + uuid: [UUID] + databricks_cli_version: ">= 0.238.0" + +experimental: + python: + # Activate virtual environment before loading resources defined in Python. + # If disabled, defaults to using the Python interpreter available in the current shell. + venv_path: .venv + # Functions called to load resources defined in Python. See resources/__init__.py + resources: + - "resources:load_resources" + +artifacts: + default: + type: whl + path: . + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: [DATABRICKS_URL] + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE + run_as: + user_name: [USERNAME] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/out.gitignore b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/out.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/out.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml new file mode 100644 index 000000000..28240e3ec --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml @@ -0,0 +1,49 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "my_jobs_as_code" +requires-python = ">=3.10" +description = "wheel file based on my_jobs_as_code" + +# Dependencies in case the output wheel file is used as a library dependency. +# For defining dependencies, when this package is used in Databricks, see: +# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +# +# Example: +# dependencies = [ +# "requests==x.y.z", +# ] +dependencies = [ +] + +# see setup.py +dynamic = ["version"] + +[project.entry-points.packages] +main = "my_jobs_as_code.main:main" + +[tool.setuptools] +py-modules = ["resources", "my_jobs_as_code"] + +[tool.uv] +## Dependencies for local development +dev-dependencies = [ + "databricks-bundles==0.7.0", + + ## Add code completion support for DLT + # "databricks-dlt", + + ## databricks-connect can be used to run parts of this project locally. + ## See https://docs.databricks.com/dev-tools/databricks-connect.html. + ## + ## Uncomment line below to install a version of db-connect that corresponds to + ## the Databricks Runtime version used for this project. + # "databricks-connect>=15.4,<15.5", +] + +override-dependencies = [ + # pyspark package conflicts with 'databricks-connect' + "pyspark; sys_platform == 'never'", +] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py new file mode 100644 index 000000000..fbcb9dc5f --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import ( + Bundle, + Resources, + load_resources_from_current_package_module, +) + + +def load_resources(bundle: Bundle) -> Resources: + """ + 'load_resources' function is referenced in databricks.yml and is responsible for loading + bundle resources defined in Python code. This function is called by Databricks CLI during + bundle deployment. After deployment, this function is not used. + """ + + # the default implementation loads all Python files in 'resources' directory + return load_resources_from_current_package_module() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py new file mode 100644 index 000000000..be7254b80 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py @@ -0,0 +1,68 @@ +from databricks.bundles.jobs import Job + +""" +The main job for my_jobs_as_code. +""" + + +my_jobs_as_code_job = Job.from_dict( + { + "name": "my_jobs_as_code_job", + "trigger": { + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + "periodic": { + "interval": 1, + "unit": "DAYS", + }, + }, + "email_notifications": { + "on_failure": [ + "[USERNAME]", + ], + }, + "tasks": [ + { + "task_key": "notebook_task", + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "src/notebook.ipynb", + }, + }, + { + "task_key": "main_task", + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + "job_cluster_key": "job_cluster", + "python_wheel_task": { + "package_name": "my_jobs_as_code", + "entry_point": "main", + }, + "libraries": [ + # By default we just include the .whl file generated for the my_jobs_as_code package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + { + "whl": "dist/*.whl", + }, + ], + }, + ], + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "spark_version": "15.4.x-scala2.12", + "node_type_id": "i3.xlarge", + "data_security_mode": "SINGLE_USER", + "autoscale": { + "min_workers": 1, + "max_workers": 4, + }, + }, + }, + ], + } +) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py new file mode 100644 index 000000000..ba284ba82 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py @@ -0,0 +1,18 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_jobs_as_code project. +""" + +import os + +from setuptools import setup + +local_version = os.getenv("LOCAL_VERSION") +version = "0.0.1" + +setup( + version=f"{version}+{local_version}" if local_version else version, +) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb new file mode 100644 index 000000000..227c7cc55 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_jobs_as_code import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py new file mode 100644 index 000000000..13e100ee2 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py @@ -0,0 +1,8 @@ +from my_jobs_as_code.main import get_taxis, get_spark + +# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/script b/acceptance/bundle/templates/experimental-jobs-as-code/script new file mode 100644 index 000000000..08e48fc5f --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/script @@ -0,0 +1,13 @@ +trace $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output + +cd output/my_jobs_as_code + +# silence uv output because it's non-deterministic +uv sync -q + +trace $CLI bundle validate -t dev --output json | jq ".resources" + +rm -fr .venv resources/__pycache__ uv.lock my_jobs_as_code.egg-info + +# Do not affect this repository's git behaviour #2318 +mv .gitignore out.gitignore diff --git a/acceptance/bundle/templates/test.toml b/acceptance/bundle/templates/test.toml new file mode 100644 index 000000000..d0d289b5c --- /dev/null +++ b/acceptance/bundle/templates/test.toml @@ -0,0 +1,2 @@ +# At the moment, there are many differences across different envs w.r.t to catalog use, node type and so on. +Cloud = false diff --git a/acceptance/bundle/test.toml b/acceptance/bundle/test.toml new file mode 100644 index 000000000..0e8c8a384 --- /dev/null +++ b/acceptance/bundle/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = true diff --git a/acceptance/bundle/trampoline/warning_message/databricks.yml b/acceptance/bundle/trampoline/warning_message/databricks.yml new file mode 100644 index 000000000..c6125f5f0 --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message/databricks.yml @@ -0,0 +1,37 @@ +bundle: + name: trampoline_warning_message + +targets: + dev: + mode: development + default: true + + prod: + resources: + clusters: + interactive_cluster: + spark_version: 14.2.x-cpu-ml-scala2.12 + + +resources: + clusters: + interactive_cluster: + cluster_name: jobs-as-code-all-purpose-cluster + spark_version: 12.2.x-cpu-ml-scala2.12 + node_type_id: r5d.8xlarge + autotermination_minutes: 30 + autoscale: + min_workers: 1 + max_workers: 1 + driver_node_type_id: r5d.8xlarge + jobs: + whl: + name: "wheel-job" + tasks: + - task_key: test_task + python_wheel_task: + package_name: my_package + entry_point: my_module.my_function + existing_cluster_id: ${resources.clusters.interactive_cluster.id} + libraries: + - whl: ./dist/*.whl diff --git a/acceptance/bundle/trampoline/warning_message/output.txt b/acceptance/bundle/trampoline/warning_message/output.txt new file mode 100644 index 000000000..2f7d69e1f --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message/output.txt @@ -0,0 +1,22 @@ + +>>> errcode [CLI] bundle validate -t dev +Error: Python wheel tasks require compute with DBR 13.3+ to include local libraries. Please change your cluster configuration or use the experimental 'python_wheel_wrapper' setting. See https://docs.databricks.com/dev-tools/bundles/python-wheel.html for more information. + +Name: trampoline_warning_message +Target: dev +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message/dev + +Found 1 error + +Exit code: 1 + +>>> errcode [CLI] bundle validate -t prod +Name: trampoline_warning_message +Target: prod +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message/prod + +Validation OK! diff --git a/acceptance/bundle/trampoline/warning_message/script b/acceptance/bundle/trampoline/warning_message/script new file mode 100644 index 000000000..ffc151840 --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle validate -t dev +trace errcode $CLI bundle validate -t prod diff --git a/acceptance/bundle/trampoline/warning_message_with_new_spark/databricks.yml b/acceptance/bundle/trampoline/warning_message_with_new_spark/databricks.yml new file mode 100644 index 000000000..fa1a05dfb --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message_with_new_spark/databricks.yml @@ -0,0 +1,20 @@ +bundle: + name: trampoline_warning_message_with_new_spark + +targets: + dev: + mode: development + default: true + +resources: + jobs: + whl: + name: "wheel-job" + tasks: + - task_key: test_task + python_wheel_task: + package_name: my_package + entry_point: my_module.my_function + existing_cluster_id: "some-test-cluster-id" + libraries: + - whl: ./dist/*.whl diff --git a/acceptance/bundle/trampoline/warning_message_with_new_spark/output.txt b/acceptance/bundle/trampoline/warning_message_with_new_spark/output.txt new file mode 100644 index 000000000..e311ab9fd --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message_with_new_spark/output.txt @@ -0,0 +1,9 @@ + +>>> errcode [CLI] bundle validate +Name: trampoline_warning_message_with_new_spark +Target: dev +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message_with_new_spark/dev + +Validation OK! diff --git a/acceptance/bundle/trampoline/warning_message_with_new_spark/script b/acceptance/bundle/trampoline/warning_message_with_new_spark/script new file mode 100644 index 000000000..9ecda517f --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message_with_new_spark/script @@ -0,0 +1 @@ +trace errcode $CLI bundle validate diff --git a/acceptance/bundle/trampoline/warning_message_with_new_spark/test.toml b/acceptance/bundle/trampoline/warning_message_with_new_spark/test.toml new file mode 100644 index 000000000..4e52dbb5e --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message_with_new_spark/test.toml @@ -0,0 +1,16 @@ +# Since we use existing cluster id value which is not available in cloud envs, we need to stub the request +# and run this test only locally +Cloud = false + +[[Server]] +Pattern = "GET /api/2.1/clusters/get" +Response.Body = ''' +{ + "cluster_id": "some-cluster-id", + "state": "RUNNING", + "spark_version": "13.3.x-scala2.12", + "node_type_id": "Standard_DS3_v2", + "driver_node_type_id": "Standard_DS3_v2", + "cluster_name": "some-cluster-name", + "spark_context_id": 12345 +}''' diff --git a/acceptance/bundle/trampoline/warning_message_with_old_spark/databricks.yml b/acceptance/bundle/trampoline/warning_message_with_old_spark/databricks.yml new file mode 100644 index 000000000..864c0f3fe --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message_with_old_spark/databricks.yml @@ -0,0 +1,20 @@ +bundle: + name: trampoline_warning_message_with_old_spark + +targets: + dev: + mode: development + default: true + +resources: + jobs: + whl: + name: "wheel-job" + tasks: + - task_key: test_task + python_wheel_task: + package_name: my_package + entry_point: my_module.my_function + existing_cluster_id: "some-test-cluster-id" + libraries: + - whl: ./dist/*.whl diff --git a/acceptance/bundle/trampoline/warning_message_with_old_spark/output.txt b/acceptance/bundle/trampoline/warning_message_with_old_spark/output.txt new file mode 100644 index 000000000..551cd17bc --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message_with_old_spark/output.txt @@ -0,0 +1,13 @@ + +>>> errcode [CLI] bundle validate +Error: Python wheel tasks require compute with DBR 13.3+ to include local libraries. Please change your cluster configuration or use the experimental 'python_wheel_wrapper' setting. See https://docs.databricks.com/dev-tools/bundles/python-wheel.html for more information. + +Name: trampoline_warning_message_with_old_spark +Target: dev +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message_with_old_spark/dev + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/trampoline/warning_message_with_old_spark/script b/acceptance/bundle/trampoline/warning_message_with_old_spark/script new file mode 100644 index 000000000..9ecda517f --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message_with_old_spark/script @@ -0,0 +1 @@ +trace errcode $CLI bundle validate diff --git a/acceptance/bundle/trampoline/warning_message_with_old_spark/test.toml b/acceptance/bundle/trampoline/warning_message_with_old_spark/test.toml new file mode 100644 index 000000000..09021bfc0 --- /dev/null +++ b/acceptance/bundle/trampoline/warning_message_with_old_spark/test.toml @@ -0,0 +1,16 @@ +# Since we use existing cluster id value which is not available in cloud envs, we need to stub the request +# and run this test only locally +Cloud = false + +[[Server]] +Pattern = "GET /api/2.1/clusters/get" +Response.Body = ''' +{ + "cluster_id": "some-cluster-id", + "state": "RUNNING", + "spark_version": "7.3.x-scala2.12", + "node_type_id": "Standard_DS3_v2", + "driver_node_type_id": "Standard_DS3_v2", + "cluster_name": "some-cluster-name", + "spark_context_id": 12345 +}''' diff --git a/acceptance/bundle/variables/arg-repeat/databricks.yml b/acceptance/bundle/variables/arg-repeat/databricks.yml new file mode 100644 index 000000000..377c6cfab --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: arg-repeat + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/arg-repeat/output.txt b/acceptance/bundle/variables/arg-repeat/output.txt new file mode 100644 index 000000000..4b97d70a1 --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/output.txt @@ -0,0 +1,18 @@ + +>>> errcode [CLI] bundle validate --var a=one -o json +{ + "a": { + "default": "hello", + "value": "one" + } +} + +>>> errcode [CLI] bundle validate --var a=one --var a=two +Error: failed to assign two to a: variable has already been assigned value: one + +Name: arg-repeat +Target: default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/variables/arg-repeat/script b/acceptance/bundle/variables/arg-repeat/script new file mode 100644 index 000000000..3e03dbcb1 --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle validate --var a=one -o json | jq .variables +trace errcode $CLI bundle validate --var a=one --var a=two diff --git a/acceptance/bundle/variables/complex-cross-ref/databricks.yml b/acceptance/bundle/variables/complex-cross-ref/databricks.yml new file mode 100644 index 000000000..4459f44df --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/databricks.yml @@ -0,0 +1,12 @@ +bundle: + name: complex-cross-ref + +variables: + a: + default: + a_1: 500 + a_2: ${var.b.b_2} + b: + default: + b_1: ${var.a.a_1} + b_2: 2.5 diff --git a/acceptance/bundle/variables/complex-cross-ref/output.txt b/acceptance/bundle/variables/complex-cross-ref/output.txt new file mode 100644 index 000000000..f1b624d29 --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/output.txt @@ -0,0 +1,22 @@ +{ + "a": { + "default": { + "a_1": 500, + "a_2": 2.5 + }, + "value": { + "a_1": 500, + "a_2": 2.5 + } + }, + "b": { + "default": { + "b_1": 500, + "b_2": 2.5 + }, + "value": { + "b_1": 500, + "b_2": 2.5 + } + } +} diff --git a/acceptance/bundle/variables/complex-cross-ref/script b/acceptance/bundle/variables/complex-cross-ref/script new file mode 100644 index 000000000..0e53f237e --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .variables diff --git a/acceptance/bundle/variables/complex-cycle-self/databricks.yml b/acceptance/bundle/variables/complex-cycle-self/databricks.yml new file mode 100644 index 000000000..bb461795c --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/databricks.yml @@ -0,0 +1,7 @@ +bundle: + name: cycle + +variables: + a: + default: + hello: ${var.a} diff --git a/acceptance/bundle/variables/complex-cycle-self/output.txt b/acceptance/bundle/variables/complex-cycle-self/output.txt new file mode 100644 index 000000000..7447de349 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/output.txt @@ -0,0 +1,9 @@ +Warning: Detected unresolved variables after 11 resolution rounds + +Name: cycle +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/cycle/default + +Found 1 warning diff --git a/acceptance/bundle/variables/complex-cycle-self/script b/acceptance/bundle/variables/complex-cycle-self/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/variables/complex-cycle/databricks.yml b/acceptance/bundle/variables/complex-cycle/databricks.yml new file mode 100644 index 000000000..9784a4e25 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: cycle + +variables: + a: + default: + hello: ${var.b} + b: + default: + hello: ${var.a} diff --git a/acceptance/bundle/variables/complex-cycle/output.txt b/acceptance/bundle/variables/complex-cycle/output.txt new file mode 100644 index 000000000..7447de349 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/output.txt @@ -0,0 +1,9 @@ +Warning: Detected unresolved variables after 11 resolution rounds + +Name: cycle +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/cycle/default + +Found 1 warning diff --git a/acceptance/bundle/variables/complex-cycle/script b/acceptance/bundle/variables/complex-cycle/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/variables/complex-simple/databricks.yml b/acceptance/bundle/variables/complex-simple/databricks.yml new file mode 100644 index 000000000..135ff86cf --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/databricks.yml @@ -0,0 +1,27 @@ +# This example works and properly merges resources.jobs.job1.job_clusters.new_cluster and ${var.cluster}. +# retaining num_workers, spark_version and overriding node_type_id. +bundle: + name: TestResolveComplexVariable + +variables: + cluster: + type: "complex" + value: + node_type_id: "Standard_DS3_v2" + num_workers: 2 + +resources: + jobs: + job1: + job_clusters: + - new_cluster: + node_type_id: "random" + spark_version: 13.3.x-scala2.12 + +targets: + dev: + resources: + jobs: + job1: + job_clusters: + - new_cluster: ${var.cluster} diff --git a/acceptance/bundle/variables/complex-simple/output.txt b/acceptance/bundle/variables/complex-simple/output.txt new file mode 100644 index 000000000..16b0ec80f --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/output.txt @@ -0,0 +1,10 @@ +[ + { + "job_cluster_key": "", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "spark_version": "13.3.x-scala2.12" + } + } +] diff --git a/acceptance/bundle/variables/complex-simple/script b/acceptance/bundle/variables/complex-simple/script new file mode 100644 index 000000000..1c31d0b40 --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.job_clusters diff --git a/acceptance/bundle/variables/complex-transitive-deep/databricks.yml b/acceptance/bundle/variables/complex-transitive-deep/databricks.yml new file mode 100644 index 000000000..1357c291a --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: complex-transitive + +variables: + catalog: + default: hive_metastore + spark_conf_1: + default: + "spark.databricks.sql.initial.catalog.name": ${var.catalog} + spark_conf: + default: ${var.spark_conf_1} + etl_cluster_config: + type: complex + default: + spark_version: 14.3.x-scala2.12 + runtime_engine: PHOTON + spark_conf: ${var.spark_conf} + +resources: + clusters: + my_cluster: ${var.etl_cluster_config} diff --git a/acceptance/bundle/variables/complex-transitive-deep/output.txt b/acceptance/bundle/variables/complex-transitive-deep/output.txt new file mode 100644 index 000000000..29c41cda5 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/output.txt @@ -0,0 +1,3 @@ +{ + "spark.databricks.sql.initial.catalog.name": "hive_metastore" +} diff --git a/acceptance/bundle/variables/complex-transitive-deep/script b/acceptance/bundle/variables/complex-transitive-deep/script new file mode 100644 index 000000000..52bb08ed4 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/script @@ -0,0 +1,2 @@ +# Currently, this incorrectly outputs variable reference instead of resolved value +$CLI bundle validate -o json | jq '.resources.clusters.my_cluster.spark_conf' diff --git a/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml b/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml new file mode 100644 index 000000000..3f9bea464 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml @@ -0,0 +1,22 @@ +bundle: + name: complex-transitive-deeper + +variables: + catalog_1: + default: + name: hive_metastore + catalog: + default: ${var.catalog_1} + spark_conf: + default: + "spark.databricks.sql.initial.catalog.name": ${var.catalog.name} + etl_cluster_config: + type: complex + default: + spark_version: 14.3.x-scala2.12 + runtime_engine: PHOTON + spark_conf: ${var.spark_conf} + +resources: + clusters: + my_cluster: ${var.etl_cluster_config} diff --git a/acceptance/bundle/variables/complex-transitive-deeper/output.txt b/acceptance/bundle/variables/complex-transitive-deeper/output.txt new file mode 100644 index 000000000..3bedbfb9a --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/output.txt @@ -0,0 +1,7 @@ +Error: expected a map to index "variables.catalog.value.name", found string + +{ + "my_cluster": "${var.etl_cluster_config}" +} + +Exit code: 1 diff --git a/acceptance/bundle/variables/complex-transitive-deeper/script b/acceptance/bundle/variables/complex-transitive-deeper/script new file mode 100644 index 000000000..d4fb404b1 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/script @@ -0,0 +1,2 @@ +# Currently, this errors instead of interpolating variables +$CLI bundle validate -o json | jq '.resources.clusters' diff --git a/acceptance/bundle/variables/complex-transitive/output.txt b/acceptance/bundle/variables/complex-transitive/output.txt index a031e0497..29c41cda5 100644 --- a/acceptance/bundle/variables/complex-transitive/output.txt +++ b/acceptance/bundle/variables/complex-transitive/output.txt @@ -1,3 +1,3 @@ { - "spark.databricks.sql.initial.catalog.name": "${var.catalog}" + "spark.databricks.sql.initial.catalog.name": "hive_metastore" } diff --git a/acceptance/bundle/variables/complex-with-var-reference/databricks.yml b/acceptance/bundle/variables/complex-with-var-reference/databricks.yml new file mode 100644 index 000000000..104f9a470 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/databricks.yml @@ -0,0 +1,17 @@ +bundle: + name: TestResolveComplexVariableWithVarReference + +variables: + package_version: + default: "1.0.0" + cluster_libraries: + type: "complex" + default: + - pypi: + package: "cicd_template==${var.package_version}" + +resources: + jobs: + job1: + tasks: + - libraries: ${var.cluster_libraries} diff --git a/acceptance/bundle/variables/complex-with-var-reference/output.txt b/acceptance/bundle/variables/complex-with-var-reference/output.txt new file mode 100644 index 000000000..a5b792ac4 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/output.txt @@ -0,0 +1,12 @@ +[ + { + "libraries": [ + { + "pypi": { + "package": "cicd_template==1.0.0" + } + } + ], + "task_key": "" + } +] diff --git a/acceptance/bundle/variables/complex-with-var-reference/script b/acceptance/bundle/variables/complex-with-var-reference/script new file mode 100644 index 000000000..0f7353ad1 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.tasks diff --git a/acceptance/bundle/variables/complex-within-complex/databricks.yml b/acceptance/bundle/variables/complex-within-complex/databricks.yml new file mode 100644 index 000000000..f1d77289e --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/databricks.yml @@ -0,0 +1,34 @@ +# Does not work currently, explicitly disabled, even though it works if you remove 'type: "complex"' lines +# Also fails to merge clusters. +bundle: + name: TestResolveComplexVariableReferencesWithComplexVariablesError + +variables: + cluster: + type: "complex" + value: + node_type_id: "Standard_DS3_v2" + num_workers: 2 + spark_conf: "${var.spark_conf}" + spark_conf: + type: "complex" + value: + spark.executor.memory: "4g" + spark.executor.cores: "2" + +resources: + jobs: + job1: + job_clusters: + - job_cluster_key: my_cluster + new_cluster: + node_type_id: "random" + +targets: + dev: + resources: + jobs: + job1: + job_clusters: + - job_cluster_key: my_cluster + new_cluster: ${var.cluster} diff --git a/acceptance/bundle/variables/complex-within-complex/output.txt b/acceptance/bundle/variables/complex-within-complex/output.txt new file mode 100644 index 000000000..72e6ef69a --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/output.txt @@ -0,0 +1,17 @@ +Warning: unknown field: node_type_id + at resources.jobs.job1.job_clusters[0] + in databricks.yml:25:11 + +[ + { + "job_cluster_key": "my_cluster", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "spark_conf": { + "spark.executor.cores": "2", + "spark.executor.memory": "4g" + } + } + } +] diff --git a/acceptance/bundle/variables/complex-within-complex/script b/acceptance/bundle/variables/complex-within-complex/script new file mode 100644 index 000000000..1c31d0b40 --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.job_clusters diff --git a/acceptance/bundle/variables/complex/out.default.json b/acceptance/bundle/variables/complex/out.default.json index 6454562a6..0804ad588 100644 --- a/acceptance/bundle/variables/complex/out.default.json +++ b/acceptance/bundle/variables/complex/out.default.json @@ -4,7 +4,7 @@ "my_job": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables/default/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/complex-variables/default/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/variables/complex/out.dev.json b/acceptance/bundle/variables/complex/out.dev.json index cede5feb2..e93c2c297 100644 --- a/acceptance/bundle/variables/complex/out.dev.json +++ b/acceptance/bundle/variables/complex/out.dev.json @@ -4,7 +4,7 @@ "my_job": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables/dev/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/complex-variables/dev/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/variables/complex/output.txt b/acceptance/bundle/variables/complex/output.txt index ce295421f..f1d4c04cc 100644 --- a/acceptance/bundle/variables/complex/output.txt +++ b/acceptance/bundle/variables/complex/output.txt @@ -1,10 +1,10 @@ ->>> $CLI bundle validate -o json +>>> [CLI] bundle validate -o json >>> jq .resources.jobs.my_job.tasks[0].task_key out.default.json "task with spark version 13.2.x-scala2.11 and jar /path/to/jar" ->>> $CLI bundle validate -o json -t dev +>>> [CLI] bundle validate -o json -t dev >>> jq .resources.jobs.my_job.tasks[0].task_key out.dev.json "task with spark version 14.2.x-scala2.11 and jar /newpath/to/jar" diff --git a/acceptance/bundle/variables/complex_multiple_files/output.txt b/acceptance/bundle/variables/complex_multiple_files/output.txt index e87b8df11..433e6da0c 100644 --- a/acceptance/bundle/variables/complex_multiple_files/output.txt +++ b/acceptance/bundle/variables/complex_multiple_files/output.txt @@ -4,7 +4,7 @@ "my_job": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables-multiple-files/dev/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/complex-variables-multiple-files/dev/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/variables/cycle/databricks.yml b/acceptance/bundle/variables/cycle/databricks.yml new file mode 100644 index 000000000..b35196671 --- /dev/null +++ b/acceptance/bundle/variables/cycle/databricks.yml @@ -0,0 +1,8 @@ +bundle: + name: cycle + +variables: + a: + default: ${var.b} + b: + default: ${var.a} diff --git a/acceptance/bundle/variables/cycle/output.txt b/acceptance/bundle/variables/cycle/output.txt new file mode 100644 index 000000000..ea9c95cd4 --- /dev/null +++ b/acceptance/bundle/variables/cycle/output.txt @@ -0,0 +1,14 @@ +Error: cycle detected in field resolution: variables.a.default -> var.b -> var.a -> var.b + +{ + "a": { + "default": "${var.b}", + "value": "${var.b}" + }, + "b": { + "default": "${var.a}", + "value": "${var.a}" + } +} + +Exit code: 1 diff --git a/acceptance/bundle/variables/cycle/script b/acceptance/bundle/variables/cycle/script new file mode 100644 index 000000000..0e53f237e --- /dev/null +++ b/acceptance/bundle/variables/cycle/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .variables diff --git a/acceptance/bundle/variables/double_underscore/databricks.yml b/acceptance/bundle/variables/double_underscore/databricks.yml new file mode 100644 index 000000000..3bb15d42d --- /dev/null +++ b/acceptance/bundle/variables/double_underscore/databricks.yml @@ -0,0 +1,14 @@ +bundle: + name: double_underscore + +variables: + double__underscore: + description: "This is a variable with a double underscore" + default: "default" + +resources: + jobs: + test_job: + name: "test" + tasks: + - task_key: "test ${var.double__underscore}" diff --git a/acceptance/bundle/variables/double_underscore/output.txt b/acceptance/bundle/variables/double_underscore/output.txt new file mode 100644 index 000000000..0124f5442 --- /dev/null +++ b/acceptance/bundle/variables/double_underscore/output.txt @@ -0,0 +1,7 @@ + +>>> [CLI] bundle validate -o json +[ + { + "task_key": "test default" + } +] diff --git a/acceptance/bundle/variables/double_underscore/script b/acceptance/bundle/variables/double_underscore/script new file mode 100644 index 000000000..a7394df77 --- /dev/null +++ b/acceptance/bundle/variables/double_underscore/script @@ -0,0 +1 @@ +trace $CLI bundle validate -o json | jq .resources.jobs.test_job.tasks diff --git a/acceptance/bundle/variables/empty/output.txt b/acceptance/bundle/variables/empty/output.txt index c3f0af130..cbd0f1989 100644 --- a/acceptance/bundle/variables/empty/output.txt +++ b/acceptance/bundle/variables/empty/output.txt @@ -1,10 +1,10 @@ -Error: no value assigned to required variable a. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_a environment variable +Error: no value assigned to required variable a. Assignment can be done using "--var", by setting the BUNDLE_VAR_a environment variable, or in .databricks/bundle//variable-overrides.json file Name: empty${var.a} Target: default Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/empty${var.a}/default + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/empty${var.a}/default Found 1 error diff --git a/acceptance/bundle/variables/env_overrides/databricks.yml b/acceptance/bundle/variables/env_overrides/databricks.yml index 560513bc3..e5fc7fcc4 100644 --- a/acceptance/bundle/variables/env_overrides/databricks.yml +++ b/acceptance/bundle/variables/env_overrides/databricks.yml @@ -18,12 +18,13 @@ variables: description: variable with lookup lookup: cluster_policy: wrong-cluster-policy + + result: + default: ${var.a} ${var.b} + bundle: name: test bundle -workspace: - profile: ${var.a} ${var.b} - targets: env-with-single-variable-override: variables: diff --git a/acceptance/bundle/variables/env_overrides/output.txt b/acceptance/bundle/variables/env_overrides/output.txt index e8fb99938..93b3b6716 100644 --- a/acceptance/bundle/variables/env_overrides/output.txt +++ b/acceptance/bundle/variables/env_overrides/output.txt @@ -1,27 +1,27 @@ ->>> $CLI bundle validate -t env-with-single-variable-override -o json +>>> [CLI] bundle validate -t env-with-single-variable-override -o json "default-a dev-b" ->>> $CLI bundle validate -t env-with-two-variable-overrides -o json +>>> [CLI] bundle validate -t env-with-two-variable-overrides -o json "prod-a prod-b" ->>> BUNDLE_VAR_b=env-var-b $CLI bundle validate -t env-with-two-variable-overrides -o json +>>> BUNDLE_VAR_b=env-var-b [CLI] bundle validate -t env-with-two-variable-overrides -o json "prod-a env-var-b" ->>> errcode $CLI bundle validate -t env-missing-a-required-variable-assignment -Error: no value assigned to required variable b. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_b environment variable +>>> errcode [CLI] bundle validate -t env-missing-a-required-variable-assignment +Error: no value assigned to required variable b. Assignment can be done using "--var", by setting the BUNDLE_VAR_b environment variable, or in .databricks/bundle//variable-overrides.json file Name: test bundle Target: env-missing-a-required-variable-assignment Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/test bundle/env-missing-a-required-variable-assignment + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test bundle/env-missing-a-required-variable-assignment Found 1 error Exit code: 1 ->>> errcode $CLI bundle validate -t env-using-an-undefined-variable +>>> errcode [CLI] bundle validate -t env-using-an-undefined-variable Error: variable c is not defined but is assigned a value Name: test bundle @@ -30,11 +30,12 @@ Found 1 error Exit code: 1 ->>> $CLI bundle validate -t env-overrides-lookup -o json +>>> [CLI] bundle validate -t env-overrides-lookup -o json { "a": "default-a", "b": "prod-b", "d": "4321", "e": "1234", - "f": "9876" + "f": "9876", + "result": "default-a prod-b" } diff --git a/acceptance/bundle/variables/env_overrides/script b/acceptance/bundle/variables/env_overrides/script index 30919fd8a..3965d1564 100644 --- a/acceptance/bundle/variables/env_overrides/script +++ b/acceptance/bundle/variables/env_overrides/script @@ -1,6 +1,6 @@ -trace $CLI bundle validate -t env-with-single-variable-override -o json | jq .workspace.profile -trace $CLI bundle validate -t env-with-two-variable-overrides -o json | jq .workspace.profile -trace BUNDLE_VAR_b=env-var-b $CLI bundle validate -t env-with-two-variable-overrides -o json | jq .workspace.profile +trace $CLI bundle validate -t env-with-single-variable-override -o json | jq .variables.result.value +trace $CLI bundle validate -t env-with-two-variable-overrides -o json | jq .variables.result.value +trace BUNDLE_VAR_b=env-var-b $CLI bundle validate -t env-with-two-variable-overrides -o json | jq .variables.result.value trace errcode $CLI bundle validate -t env-missing-a-required-variable-assignment trace errcode $CLI bundle validate -t env-using-an-undefined-variable trace $CLI bundle validate -t env-overrides-lookup -o json | jq '.variables | map_values(.value)' diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/complex_to_string/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/complex_to_string/variable-overrides.json new file mode 100644 index 000000000..602567a68 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/complex_to_string/variable-overrides.json @@ -0,0 +1,5 @@ +{ + "cluster_key": { + "node_type_id": "Standard_DS3_v2" + } +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/default/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/default/variable-overrides.json new file mode 100644 index 000000000..3a865e120 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/default/variable-overrides.json @@ -0,0 +1,7 @@ +{ + "cluster": { + "node_type_id": "Standard_DS3_v2" + }, + "cluster_key": "mlops_stacks-cluster", + "cluster_workers": 2 +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/invalid_json/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/invalid_json/variable-overrides.json new file mode 100644 index 000000000..257cc5642 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/invalid_json/variable-overrides.json @@ -0,0 +1 @@ +foo diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/string_to_complex/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/string_to_complex/variable-overrides.json new file mode 100644 index 000000000..1ea719446 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/string_to_complex/variable-overrides.json @@ -0,0 +1,3 @@ +{ + "cluster": "mlops_stacks-cluster" +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/with_value/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/with_value/variable-overrides.json new file mode 100644 index 000000000..686d68548 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/with_value/variable-overrides.json @@ -0,0 +1,3 @@ +{ + "cluster_key": "mlops_stacks-cluster-from-file" +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/without_defaults/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/without_defaults/variable-overrides.json new file mode 100644 index 000000000..86166408e --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/without_defaults/variable-overrides.json @@ -0,0 +1,4 @@ +{ + "cluster_key": "mlops_stacks-cluster", + "cluster_workers": 2 +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/wrong_file_structure/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/wrong_file_structure/variable-overrides.json new file mode 100644 index 000000000..de140ba36 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/wrong_file_structure/variable-overrides.json @@ -0,0 +1,3 @@ +[ + "foo" +] diff --git a/acceptance/bundle/variables/file-defaults/.gitignore b/acceptance/bundle/variables/file-defaults/.gitignore new file mode 100644 index 000000000..bd1711fd1 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.gitignore @@ -0,0 +1 @@ +!.databricks diff --git a/acceptance/bundle/variables/file-defaults/databricks.yml b/acceptance/bundle/variables/file-defaults/databricks.yml new file mode 100644 index 000000000..5838843e1 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/databricks.yml @@ -0,0 +1,53 @@ +bundle: + name: TestResolveVariablesFromFile + +variables: + cluster: + type: "complex" + cluster_key: + cluster_workers: + +resources: + jobs: + job1: + job_clusters: + - job_cluster_key: ${var.cluster_key} + new_cluster: + node_type_id: "${var.cluster.node_type_id}" + num_workers: ${var.cluster_workers} + +targets: + default: + default: true + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: "default" + + without_defaults: + + complex_to_string: + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: "default" + + string_to_complex: + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: "default" + + wrong_file_structure: + + invalid_json: + + with_value: + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: cluster_key_value diff --git a/acceptance/bundle/variables/file-defaults/output.txt b/acceptance/bundle/variables/file-defaults/output.txt new file mode 100644 index 000000000..234ddcbbd --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/output.txt @@ -0,0 +1,113 @@ + +=== variable file +>>> [CLI] bundle validate -o json +{ + "job_cluster_key": "mlops_stacks-cluster", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2 + } +} + +=== variable file and variable flag +>>> [CLI] bundle validate -o json --var=cluster_key=mlops_stacks-cluster-overriden +{ + "job_cluster_key": "mlops_stacks-cluster-overriden", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2 + } +} + +=== variable file and environment variable +>>> BUNDLE_VAR_cluster_key=mlops_stacks-cluster-overriden [CLI] bundle validate -o json +{ + "job_cluster_key": "mlops_stacks-cluster-overriden", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2 + } +} + +=== variable has value in config file +>>> [CLI] bundle validate -o json --target with_value +{ + "job_cluster_key": "mlops_stacks-cluster-from-file", + "new_cluster": { + "node_type_id": "default", + "num_workers": 1 + } +} + +=== file cannot be parsed +>>> errcode [CLI] bundle validate -o json --target invalid_json +Error: failed to parse variables file [TMPDIR]/.databricks/bundle/invalid_json/variable-overrides.json: error decoding JSON at :0:0: invalid character 'o' in literal false (expecting 'a') + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} + +=== file has wrong structure +>>> errcode [CLI] bundle validate -o json --target wrong_file_structure +Error: failed to parse variables file [TMPDIR]/.databricks/bundle/wrong_file_structure/variable-overrides.json: invalid format + +Variables file must be a JSON object with the following format: +{"var1": "value1", "var2": "value2"} + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} + +=== file has variable that is complex but default is string +>>> errcode [CLI] bundle validate -o json --target complex_to_string +Error: variable cluster_key is not of type complex, but the value in the variable file is a complex type + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} + +=== file has variable that is string but default is complex +>>> errcode [CLI] bundle validate -o json --target string_to_complex +Error: variable cluster is of type complex, but the value in the variable file is not a complex type + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} + +=== variable is required but it's not provided in the file +>>> errcode [CLI] bundle validate -o json --target without_defaults +Error: no value assigned to required variable cluster. Assignment can be done using "--var", by setting the BUNDLE_VAR_cluster environment variable, or in .databricks/bundle//variable-overrides.json file + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} diff --git a/acceptance/bundle/variables/file-defaults/script b/acceptance/bundle/variables/file-defaults/script new file mode 100644 index 000000000..8e6fd0d75 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/script @@ -0,0 +1,30 @@ +cluster_expr=".resources.jobs.job1.job_clusters[0]" + +# defaults from variable file, see .databricks/bundle//variable-overrides.json + +title "variable file" +trace $CLI bundle validate -o json | jq $cluster_expr + +title "variable file and variable flag" +trace $CLI bundle validate -o json --var="cluster_key=mlops_stacks-cluster-overriden" | jq $cluster_expr + +title "variable file and environment variable" +trace BUNDLE_VAR_cluster_key=mlops_stacks-cluster-overriden $CLI bundle validate -o json | jq $cluster_expr + +title "variable has value in config file" +trace $CLI bundle validate -o json --target with_value | jq $cluster_expr + +title "file cannot be parsed" +trace errcode $CLI bundle validate -o json --target invalid_json | jq $cluster_expr + +title "file has wrong structure" +trace errcode $CLI bundle validate -o json --target wrong_file_structure | jq $cluster_expr + +title "file has variable that is complex but default is string" +trace errcode $CLI bundle validate -o json --target complex_to_string | jq $cluster_expr + +title "file has variable that is string but default is complex" +trace errcode $CLI bundle validate -o json --target string_to_complex | jq $cluster_expr + +title "variable is required but it's not provided in the file" +trace errcode $CLI bundle validate -o json --target without_defaults | jq $cluster_expr diff --git a/acceptance/bundle/variables/file-defaults/test.toml b/acceptance/bundle/variables/file-defaults/test.toml new file mode 100644 index 000000000..da8854775 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/test.toml @@ -0,0 +1,4 @@ +# Fix for windows +[[Repls]] +Old = '\\' +New = '/' diff --git a/acceptance/bundle/variables/git-branch/databricks.yml b/acceptance/bundle/variables/git-branch/databricks.yml new file mode 100644 index 000000000..7cf210722 --- /dev/null +++ b/acceptance/bundle/variables/git-branch/databricks.yml @@ -0,0 +1,19 @@ +bundle: + name: git + git: + # This is currently not supported + branch: ${var.deployment_branch} + +variables: + deployment_branch: + # By setting deployment_branch to "" we set bundle.git.branch to "" which is the same unsetting it. + # This this should make CLI read branch from git and update bundle.git.branch accordingly. It should + # Also set bundle.git.inferred to true. + default: "" + +targets: + prod: + default: true + dev: + variables: + deployment_branch: dev-branch diff --git a/acceptance/bundle/variables/git-branch/output.txt b/acceptance/bundle/variables/git-branch/output.txt new file mode 100644 index 000000000..68f27a3f1 --- /dev/null +++ b/acceptance/bundle/variables/git-branch/output.txt @@ -0,0 +1,90 @@ + +>>> [CLI] bundle validate -o json +{ + "bundle": { + "environment": "prod", + "git": { + "actual_branch": "main", + "branch": "", + "bundle_root_path": "." + }, + "name": "git", + "target": "prod", + "terraform": { + "exec_path": "[TERRAFORM]" + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "deployment_branch": { + "default": "", + "value": "" + } + }, + "workspace": { + "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/git/prod/artifacts", + "file_path": "/Workspace/Users/[USERNAME]/.bundle/git/prod/files", + "resource_path": "/Workspace/Users/[USERNAME]/.bundle/git/prod/resources", + "root_path": "/Workspace/Users/[USERNAME]/.bundle/git/prod", + "state_path": "/Workspace/Users/[USERNAME]/.bundle/git/prod/state" + } +} + +>>> [CLI] bundle validate +Name: git +Target: prod +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/git/prod + +Validation OK! + +>>> [CLI] bundle validate -o json -t dev +{ + "bundle": { + "environment": "dev", + "git": { + "actual_branch": "main", + "branch": "dev-branch", + "bundle_root_path": "." + }, + "name": "git", + "target": "dev", + "terraform": { + "exec_path": "[TERRAFORM]" + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "deployment_branch": { + "default": "dev-branch", + "value": "dev-branch" + } + }, + "workspace": { + "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/git/dev/artifacts", + "file_path": "/Workspace/Users/[USERNAME]/.bundle/git/dev/files", + "resource_path": "/Workspace/Users/[USERNAME]/.bundle/git/dev/resources", + "root_path": "/Workspace/Users/[USERNAME]/.bundle/git/dev", + "state_path": "/Workspace/Users/[USERNAME]/.bundle/git/dev/state" + } +} + +>>> [CLI] bundle validate -t dev +Name: git +Target: dev +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/git/dev + +Validation OK! diff --git a/acceptance/bundle/variables/git-branch/script b/acceptance/bundle/variables/git-branch/script new file mode 100644 index 000000000..8f99cc01b --- /dev/null +++ b/acceptance/bundle/variables/git-branch/script @@ -0,0 +1,6 @@ +git-repo-init +trace $CLI bundle validate -o json | jq 'del(.workspace.current_user, .bundle.git.commit)' +trace $CLI bundle validate +trace $CLI bundle validate -o json -t dev | jq 'del(.workspace.current_user, .bundle.git.commit)' +trace $CLI bundle validate -t dev +rm -fr .git diff --git a/acceptance/bundle/variables/host/output.txt b/acceptance/bundle/variables/host/output.txt index 89342908c..df0a4527a 100644 --- a/acceptance/bundle/variables/host/output.txt +++ b/acceptance/bundle/variables/host/output.txt @@ -1,5 +1,5 @@ ->>> errcode $CLI bundle validate -o json +>>> errcode [CLI] bundle validate -o json Error: failed during request visitor: parse "https://${var.host}": invalid character "{" in host name { @@ -23,9 +23,10 @@ Error: failed during request visitor: parse "https://${var.host}": invalid chara "host": "${var.host}" } } + Exit code: 1 ->>> errcode $CLI bundle validate +>>> errcode [CLI] bundle validate Error: failed during request visitor: parse "https://${var.host}": invalid character "{" in host name Name: host diff --git a/acceptance/bundle/variables/prepend-workspace-var/databricks.yml b/acceptance/bundle/variables/prepend-workspace-var/databricks.yml new file mode 100644 index 000000000..c843752f8 --- /dev/null +++ b/acceptance/bundle/variables/prepend-workspace-var/databricks.yml @@ -0,0 +1,24 @@ +workspace: + profile: profile_name + root_path: ${var.workspace_root}/path/to/root + +variables: + workspace_root: + description: "root directory in the Databricks workspace to store the asset bundle and associated artifacts" + default: /Users/${workspace.current_user.userName} + +targets: + dev: + default: true + prod: + variables: + workspace_root: /Shared + +resources: + jobs: + my_job: + tasks: + - existing_cluster_id: 500 + python_wheel_task: + named_parameters: + conf-file: "${workspace.file_path}/path/to/config.yaml" diff --git a/acceptance/bundle/variables/prepend-workspace-var/output.txt b/acceptance/bundle/variables/prepend-workspace-var/output.txt new file mode 100644 index 000000000..a48a58fba --- /dev/null +++ b/acceptance/bundle/variables/prepend-workspace-var/output.txt @@ -0,0 +1,62 @@ +/Workspace should be prepended on all paths, but it is not the case: +{ + "bundle": { + "environment": "dev", + "git": { + "bundle_root_path": "." + }, + "target": "dev", + "terraform": { + "exec_path": "[TERRAFORM]" + } + }, + "resources": { + "jobs": { + "my_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Users/[USERNAME]/path/to/root/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "existing_cluster_id": "500", + "python_wheel_task": { + "named_parameters": { + "conf-file": "/Users/[USERNAME]/path/to/root/files/path/to/config.yaml" + } + }, + "task_key": "" + } + ] + } + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "workspace_root": { + "default": "/Users/[USERNAME]", + "description": "root directory in the Databricks workspace to store the asset bundle and associated artifacts", + "value": "/Users/[USERNAME]" + } + }, + "workspace": { + "artifact_path": "/Users/[USERNAME]/path/to/root/artifacts", + "file_path": "/Users/[USERNAME]/path/to/root/files", + "profile": "profile_name", + "resource_path": "/Users/[USERNAME]/path/to/root/resources", + "root_path": "/Users/[USERNAME]/path/to/root", + "state_path": "/Users/[USERNAME]/path/to/root/state" + } +} diff --git a/acceptance/bundle/variables/prepend-workspace-var/script b/acceptance/bundle/variables/prepend-workspace-var/script new file mode 100644 index 000000000..e30ffb9c4 --- /dev/null +++ b/acceptance/bundle/variables/prepend-workspace-var/script @@ -0,0 +1,2 @@ +echo /Workspace should be prepended on all paths, but it is not the case: #2181 +$CLI bundle validate -o json | jq 'del(.workspace.current_user)' diff --git a/acceptance/bundle/variables/resolve-builtin/output.txt b/acceptance/bundle/variables/resolve-builtin/output.txt index 2f58abd8a..f37a2a19e 100644 --- a/acceptance/bundle/variables/resolve-builtin/output.txt +++ b/acceptance/bundle/variables/resolve-builtin/output.txt @@ -1,9 +1,5 @@ { "artifact_path": "TestResolveVariableReferences/bar/artifacts", - "current_user": { - "short_name": "tester", - "userName": "tester@databricks.com" - }, "file_path": "TestResolveVariableReferences/bar/baz", "resource_path": "TestResolveVariableReferences/bar/resources", "root_path": "TestResolveVariableReferences/bar", diff --git a/acceptance/bundle/variables/resolve-builtin/script b/acceptance/bundle/variables/resolve-builtin/script index fefd9abe6..558d0a7ca 100644 --- a/acceptance/bundle/variables/resolve-builtin/script +++ b/acceptance/bundle/variables/resolve-builtin/script @@ -1 +1 @@ -$CLI bundle validate -o json | jq .workspace +$CLI bundle validate -o json | jq .workspace | jq 'del(.current_user)' diff --git a/acceptance/bundle/variables/resolve-nonstrings/databricks.yml b/acceptance/bundle/variables/resolve-nonstrings/databricks.yml new file mode 100644 index 000000000..a02c78a7e --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/databricks.yml @@ -0,0 +1,23 @@ +bundle: + name: TestResolveVariableReferencesForPrimitiveNonStringFields + +variables: + no_alert_for_canceled_runs: {} + no_alert_for_skipped_runs: {} + min_workers: {} + max_workers: {} + spot_bid_max_price: {} + +resources: + jobs: + job1: + notification_settings: + no_alert_for_canceled_runs: ${var.no_alert_for_canceled_runs} + no_alert_for_skipped_runs: ${var.no_alert_for_skipped_runs} + tasks: + - new_cluster: + autoscale: + min_workers: ${var.min_workers} + max_workers: ${var.max_workers} + azure_attributes: + spot_bid_max_price: ${var.spot_bid_max_price} diff --git a/acceptance/bundle/variables/resolve-nonstrings/output.txt b/acceptance/bundle/variables/resolve-nonstrings/output.txt new file mode 100644 index 000000000..951ad7a0d --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/output.txt @@ -0,0 +1,52 @@ +{ + "variables": { + "max_workers": { + "value": "2" + }, + "min_workers": { + "value": "1" + }, + "no_alert_for_canceled_runs": { + "value": "true" + }, + "no_alert_for_skipped_runs": { + "value": "false" + }, + "spot_bid_max_price": { + "value": "0.5" + } + }, + "jobs": { + "job1": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/TestResolveVariableReferencesForPrimitiveNonStringFields/default/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "notification_settings": { + "no_alert_for_canceled_runs": true, + "no_alert_for_skipped_runs": false + }, + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "new_cluster": { + "autoscale": { + "max_workers": 2, + "min_workers": 1 + }, + "azure_attributes": { + "spot_bid_max_price": 0.5 + } + }, + "task_key": "" + } + ] + } + } +} diff --git a/acceptance/bundle/variables/resolve-nonstrings/script b/acceptance/bundle/variables/resolve-nonstrings/script new file mode 100644 index 000000000..cb9e45b61 --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/script @@ -0,0 +1,4 @@ +export BUNDLE_VAR_no_alert_for_skipped_runs=false +export BUNDLE_VAR_max_workers=2 +export BUNDLE_VAR_min_workers=3 # shadowed by --var below +$CLI bundle validate -o json --var no_alert_for_canceled_runs=true --var min_workers=1 --var spot_bid_max_price=0.5 | jq '{ variables, jobs: .resources.jobs }' diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml b/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml new file mode 100644 index 000000000..6a45de330 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml @@ -0,0 +1,9 @@ +bundle: + name: TestResolveVariableReferencesToBundleVariables + +workspace: + root_path: "${bundle.name}/${var.foo}" + +variables: + foo: + value: "bar" diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt b/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt new file mode 100644 index 000000000..fb828d826 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt @@ -0,0 +1,7 @@ +{ + "artifact_path": "TestResolveVariableReferencesToBundleVariables/bar/artifacts", + "file_path": "TestResolveVariableReferencesToBundleVariables/bar/files", + "resource_path": "TestResolveVariableReferencesToBundleVariables/bar/resources", + "root_path": "TestResolveVariableReferencesToBundleVariables/bar", + "state_path": "TestResolveVariableReferencesToBundleVariables/bar/state" +} diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/script b/acceptance/bundle/variables/resolve-vars-in-root-path/script new file mode 100644 index 000000000..558d0a7ca --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .workspace | jq 'del(.current_user)' diff --git a/acceptance/bundle/variables/test.toml b/acceptance/bundle/variables/test.toml new file mode 100644 index 000000000..8ed716ad0 --- /dev/null +++ b/acceptance/bundle/variables/test.toml @@ -0,0 +1,3 @@ +# The tests here intend to test variable interpolation via "bundle validate". +# Even though "bundle validate" does a few API calls, that's not the focus there. +Cloud = false diff --git a/acceptance/bundle/variables/vanilla/output.txt b/acceptance/bundle/variables/vanilla/output.txt index 69b358a3f..3958c39b9 100644 --- a/acceptance/bundle/variables/vanilla/output.txt +++ b/acceptance/bundle/variables/vanilla/output.txt @@ -1,15 +1,15 @@ ->>> BUNDLE_VAR_b=def $CLI bundle validate -o json +>>> BUNDLE_VAR_b=def [CLI] bundle validate -o json "abc def" ->>> errcode $CLI bundle validate -Error: no value assigned to required variable b. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_b environment variable +>>> errcode [CLI] bundle validate +Error: no value assigned to required variable b. Assignment can be done using "--var", by setting the BUNDLE_VAR_b environment variable, or in .databricks/bundle//variable-overrides.json file Name: ${var.a} ${var.b} Target: default Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/${var.a} ${var.b}/default + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/${var.a} ${var.b}/default Found 1 error diff --git a/acceptance/bundle/variables/variable_overrides_in_target/output.txt b/acceptance/bundle/variables/variable_overrides_in_target/output.txt index de193f5b6..d112cf2de 100644 --- a/acceptance/bundle/variables/variable_overrides_in_target/output.txt +++ b/acceptance/bundle/variables/variable_overrides_in_target/output.txt @@ -1,5 +1,5 @@ ->>> $CLI bundle validate -o json -t use-default-variable-values +>>> [CLI] bundle validate -o json -t use-default-variable-values { "pipelines": { "my_pipeline": { @@ -12,7 +12,7 @@ "continuous": true, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/use-default-variable-values/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/foobar/use-default-variable-values/state/metadata.json" }, "name": "a_string", "permissions": [] @@ -20,7 +20,7 @@ } } ->>> $CLI bundle validate -o json -t override-string-variable +>>> [CLI] bundle validate -o json -t override-string-variable { "pipelines": { "my_pipeline": { @@ -33,7 +33,7 @@ "continuous": true, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-string-variable/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/foobar/override-string-variable/state/metadata.json" }, "name": "overridden_string", "permissions": [] @@ -41,7 +41,7 @@ } } ->>> $CLI bundle validate -o json -t override-int-variable +>>> [CLI] bundle validate -o json -t override-int-variable { "pipelines": { "my_pipeline": { @@ -54,7 +54,7 @@ "continuous": true, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-int-variable/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/foobar/override-int-variable/state/metadata.json" }, "name": "a_string", "permissions": [] @@ -62,7 +62,7 @@ } } ->>> $CLI bundle validate -o json -t override-both-bool-and-string-variables +>>> [CLI] bundle validate -o json -t override-both-bool-and-string-variables { "pipelines": { "my_pipeline": { @@ -75,7 +75,7 @@ "continuous": false, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-both-bool-and-string-variables/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/foobar/override-both-bool-and-string-variables/state/metadata.json" }, "name": "overridden_string", "permissions": [] diff --git a/acceptance/cmd/workspace/apps/input.json b/acceptance/cmd/workspace/apps/input.json new file mode 100644 index 000000000..76f3e589c --- /dev/null +++ b/acceptance/cmd/workspace/apps/input.json @@ -0,0 +1,14 @@ +{ + "description": "My app description.", + "resources": [ + { + "name": "api-key", + "description": "API key for external service.", + "secret": { + "scope": "my-scope", + "key": "my-key", + "permission": "READ" + } + } + ] +} diff --git a/acceptance/cmd/workspace/apps/out.requests.txt b/acceptance/cmd/workspace/apps/out.requests.txt new file mode 100644 index 000000000..04891dc74 --- /dev/null +++ b/acceptance/cmd/workspace/apps/out.requests.txt @@ -0,0 +1,19 @@ +{ + "method": "PATCH", + "path": "/api/2.0/apps/test-name", + "body": { + "description": "My app description.", + "name": "", + "resources": [ + { + "description": "API key for external service.", + "name": "api-key", + "secret": { + "key": "my-key", + "permission": "READ", + "scope": "my-scope" + } + } + ] + } +} diff --git a/acceptance/cmd/workspace/apps/output.txt b/acceptance/cmd/workspace/apps/output.txt new file mode 100644 index 000000000..4d9f80f44 --- /dev/null +++ b/acceptance/cmd/workspace/apps/output.txt @@ -0,0 +1,49 @@ + +=== Apps update with correct input +>>> [CLI] apps update test-name --json @input.json +{ + "app_status": { + "message":"Application is running.", + "state":"DEPLOYING" + }, + "compute_status": { + "message":"App compute is active.", + "state":"ERROR" + }, + "description":"My app description.", + "id":"12345", + "name":"test-name", + "resources": [ + { + "description":"API key for external service.", + "name":"api-key", + "secret": { + "key":"my-key", + "permission":"READ", + "scope":"my-scope" + } + } + ], + "url":"test-name-123.cloud.databricksapps.com" +} + +=== Apps update with missing parameter +>>> [CLI] apps update --json @input.json +Error: accepts 1 arg(s), received 0 + +Usage: + databricks apps update NAME [flags] + +Flags: + --description string The description of the app. + -h, --help help for update + --json JSON either inline JSON string or @path/to/file.json with request body (default JSON (0 bytes)) + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + + +Exit code: 1 diff --git a/acceptance/cmd/workspace/apps/script b/acceptance/cmd/workspace/apps/script new file mode 100644 index 000000000..221ffc4c0 --- /dev/null +++ b/acceptance/cmd/workspace/apps/script @@ -0,0 +1,5 @@ +title "Apps update with correct input" +trace $CLI apps update test-name --json @input.json + +title "Apps update with missing parameter" +trace $CLI apps update --json @input.json diff --git a/acceptance/cmd/workspace/apps/test.toml b/acceptance/cmd/workspace/apps/test.toml new file mode 100644 index 000000000..972ae1c50 --- /dev/null +++ b/acceptance/cmd/workspace/apps/test.toml @@ -0,0 +1,30 @@ +RecordRequests = true + +[[Server]] +Pattern = "PATCH /api/2.0/apps/test-name" +Response.Body = ''' +{ + "name": "test-name", + "description": "My app description.", + "compute_status": { + "state": "ERROR", + "message": "App compute is active." + }, + "app_status": { + "state": "DEPLOYING", + "message": "Application is running." + }, + "url": "test-name-123.cloud.databricksapps.com", + "resources": [ + { + "name": "api-key", + "description": "API key for external service.", + "secret": { + "scope": "my-scope", + "key": "my-key", + "permission": "READ" + } + } + ], + "id": "12345" +}''' diff --git a/acceptance/cmd_server_test.go b/acceptance/cmd_server_test.go new file mode 100644 index 000000000..dc48a85d7 --- /dev/null +++ b/acceptance/cmd_server_test.go @@ -0,0 +1,74 @@ +package acceptance_test + +import ( + "context" + "encoding/json" + "os" + "strings" + "testing" + + "github.com/databricks/cli/internal/testcli" + "github.com/databricks/cli/libs/testserver" + "github.com/stretchr/testify/require" +) + +func StartCmdServer(t *testing.T) *testserver.Server { + server := testserver.New(t) + server.Handle("GET", "/", func(r testserver.Request) any { + q := r.URL.Query() + args := strings.Split(q.Get("args"), " ") + + var env map[string]string + require.NoError(t, json.Unmarshal([]byte(q.Get("env")), &env)) + + for key, val := range env { + defer Setenv(t, key, val)() + } + + defer Chdir(t, q.Get("cwd"))() + + c := testcli.NewRunner(t, context.Background(), args...) + c.Verbose = false + stdout, stderr, err := c.Run() + result := map[string]any{ + "stdout": stdout.String(), + "stderr": stderr.String(), + } + exitcode := 0 + if err != nil { + exitcode = 1 + } + result["exitcode"] = exitcode + return result + }) + return server +} + +// Chdir variant that is intended to be used with defer so that it can switch back before function ends. +// This is unlike testutil.Chdir which switches back only when tests end. +func Chdir(t *testing.T, cwd string) func() { + require.NotEmpty(t, cwd) + prevDir, err := os.Getwd() + require.NoError(t, err) + err = os.Chdir(cwd) + require.NoError(t, err) + return func() { + _ = os.Chdir(prevDir) + } +} + +// Setenv variant that is intended to be used with defer so that it can switch back before function ends. +// This is unlike t.Setenv which switches back only when tests end. +func Setenv(t *testing.T, key, value string) func() { + prevVal, exists := os.LookupEnv(key) + + require.NoError(t, os.Setenv(key, value)) + + return func() { + if exists { + _ = os.Setenv(key, prevVal) + } else { + _ = os.Unsetenv(key) + } + } +} diff --git a/acceptance/config_test.go b/acceptance/config_test.go new file mode 100644 index 000000000..4edfee69d --- /dev/null +++ b/acceptance/config_test.go @@ -0,0 +1,133 @@ +package acceptance_test + +import ( + "os" + "path/filepath" + "slices" + "strings" + "testing" + + "dario.cat/mergo" + "github.com/BurntSushi/toml" + "github.com/databricks/cli/libs/testdiff" + "github.com/databricks/cli/libs/testserver" + "github.com/stretchr/testify/require" +) + +const configFilename = "test.toml" + +type TestConfig struct { + // Place to describe what's wrong with this test. Does not affect how the test is run. + Badness *string + + // Which OSes the test is enabled on. Each string is compared against runtime.GOOS. + // If absent, default to true. + GOOS map[string]bool + + // If true, run this test when running locally with a testserver + Local *bool + + // If true, run this test when running with cloud env configured + Cloud *bool + + // List of additional replacements to apply on this test. + // Old is a regexp, New is a replacement expression. + Repls []testdiff.Replacement + + // List of server stubs to load. Example configuration: + // + // [[Server]] + // Pattern = "POST /api/2.1/jobs/create" + // Response.Body = ''' + // { + // "job_id": 1111 + // } + // ''' + Server []ServerStub + + // Record the requests made to the server and write them as output to + // out.requests.txt + RecordRequests *bool + + // List of request headers to include when recording requests. + IncludeRequestHeaders []string +} + +type ServerStub struct { + // The HTTP method and path to match. Examples: + // 1. /api/2.0/clusters/list (matches all methods) + // 2. GET /api/2.0/clusters/list + Pattern string + + // The response body to return. + Response testserver.Response +} + +// FindConfigs finds all the config relevant for this test, +// ordered from the most outermost (at acceptance/) to current test directory (identified by dir). +// Argument dir must be a relative path from the root of acceptance tests (/acceptance/). +func FindConfigs(t *testing.T, dir string) []string { + configs := []string{} + for { + path := filepath.Join(dir, configFilename) + _, err := os.Stat(path) + + if err == nil { + configs = append(configs, path) + } + + if dir == "" || dir == "." { + break + } + + dir = filepath.Dir(dir) + + if err == nil || os.IsNotExist(err) { + continue + } + + t.Fatalf("Error while reading %s: %s", path, err) + } + + slices.Reverse(configs) + return configs +} + +// LoadConfig loads the config file. Non-leaf configs are cached. +func LoadConfig(t *testing.T, dir string) (TestConfig, string) { + configs := FindConfigs(t, dir) + + if len(configs) == 0 { + return TestConfig{}, "(no config)" + } + + result := DoLoadConfig(t, configs[0]) + + for _, cfgName := range configs[1:] { + cfg := DoLoadConfig(t, cfgName) + err := mergo.Merge(&result, cfg, mergo.WithOverride, mergo.WithoutDereference, mergo.WithAppendSlice) + if err != nil { + t.Fatalf("Error during config merge: %s: %s", cfgName, err) + } + } + + return result, strings.Join(configs, ", ") +} + +func DoLoadConfig(t *testing.T, path string) TestConfig { + bytes, err := os.ReadFile(path) + if err != nil { + t.Fatalf("failed to read config: %s", err) + } + + var config TestConfig + meta, err := toml.Decode(string(bytes), &config) + require.NoError(t, err) + + keys := meta.Undecoded() + if len(keys) > 0 { + t.Fatalf("Undecoded keys in %s: %#v", path, keys) + } + + return config +} diff --git a/acceptance/help/output.txt b/acceptance/help/output.txt index ed4a88ce6..18434251d 100644 --- a/acceptance/help/output.txt +++ b/acceptance/help/output.txt @@ -115,7 +115,6 @@ Marketplace Apps apps Apps run directly on a customer’s Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on. - apps Apps run directly on a customer’s Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on. Clean Rooms clean-room-assets Clean room assets are data and code objects — Tables, volumes, and notebooks that are shared with the clean room. diff --git a/acceptance/install_terraform.py b/acceptance/install_terraform.py new file mode 100755 index 000000000..4cf6a9729 --- /dev/null +++ b/acceptance/install_terraform.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +""" +Script to set up terraform and databricks terraform provider in a local directory: + +- Download terraform. +- Download databricks provider. +- Write a .terraformrc config file that uses this directory. +- The config file contains env vars that need to be set so that databricks CLI uses this terraform and provider. +""" + +import os +import platform +import zipfile +import argparse +import json +from pathlib import Path +from urllib.request import urlretrieve + +os_name = platform.system().lower() + +arch = platform.machine().lower() +arch = {"x86_64": "amd64"}.get(arch, arch) +if os_name == "windows" and arch not in ("386", "amd64"): + # terraform 1.5.5 only has builds for these two. + arch = "amd64" + +terraform_version = "1.5.5" +terraform_file = f"terraform_{terraform_version}_{os_name}_{arch}.zip" +terraform_url = f"https://releases.hashicorp.com/terraform/{terraform_version}/{terraform_file}" +terraform_binary = "terraform.exe" if os_name == "windows" else "terraform" + + +def retrieve(url, path): + if not path.exists(): + print(f"Downloading {url} -> {path}") + urlretrieve(url, path) + + +def read_version(path): + for line in path.open(): + if "ProviderVersion" in line: + # Expecting 'const ProviderVersion = "1.64.1"' + items = line.strip().split() + assert len(items) >= 3, items + assert items[-3:-1] == ["ProviderVersion", "="], items + version = items[-1].strip('"') + assert version, items + return version + raise SystemExit(f"Could not find ProviderVersion in {path}") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--targetdir", default="build", type=Path) + parser.add_argument("--provider-version") + args = parser.parse_args() + target = args.targetdir + + if not args.provider_version: + version_file = Path(__file__).parent.parent / "bundle/internal/tf/codegen/schema/version.go" + assert version_file.exists(), version_file + terraform_provider_version = read_version(version_file) + print(f"Read version {terraform_provider_version} from {version_file}") + else: + terraform_provider_version = args.provider_version + + terraform_provider_file = f"terraform-provider-databricks_{terraform_provider_version}_{os_name}_{arch}.zip" + terraform_provider_url = ( + f"https://github.com/databricks/terraform-provider-databricks/releases/download/v{terraform_provider_version}/{terraform_provider_file}" + ) + + target.mkdir(exist_ok=True, parents=True) + + zip_path = target / terraform_file + terraform_path = target / terraform_binary + terraform_provider_path = target / terraform_provider_file + + retrieve(terraform_url, zip_path) + retrieve(terraform_provider_url, terraform_provider_path) + + if not terraform_path.exists(): + print(f"Extracting {zip_path} -> {terraform_path}") + + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(target) + + terraform_path.chmod(0o755) + + tfplugins_path = target / "tfplugins" + provider_dir = Path(tfplugins_path / f"registry.terraform.io/databricks/databricks/{terraform_provider_version}/{os_name}_{arch}") + if not provider_dir.exists(): + print(f"Extracting {terraform_provider_path} -> {provider_dir}") + os.makedirs(provider_dir, exist_ok=True) + with zipfile.ZipFile(terraform_provider_path, "r") as zip_ref: + zip_ref.extractall(provider_dir) + + files = list(provider_dir.iterdir()) + assert files, provider_dir + + for f in files: + f.chmod(0o755) + + terraformrc_path = target / ".terraformrc" + if not terraformrc_path.exists(): + path = json.dumps(str(tfplugins_path.absolute())) + text = f"""# Set these env variables before running databricks cli: +# export DATABRICKS_TF_CLI_CONFIG_FILE={terraformrc_path.absolute()} +# export DATABRICKS_TF_EXEC_PATH={terraform_path.absolute()} + +provider_installation {{ + filesystem_mirror {{ + path = {path} + include = ["registry.terraform.io/databricks/databricks"] + }} +}} +""" + print(f"Writing {terraformrc_path}:\n{text}") + terraformrc_path.write_text(text) + + +if __name__ == "__main__": + main() diff --git a/acceptance/panic/output.txt b/acceptance/panic/output.txt new file mode 100644 index 000000000..9dca41c23 --- /dev/null +++ b/acceptance/panic/output.txt @@ -0,0 +1,15 @@ + +>>> [CLI] selftest panic +The Databricks CLI unexpectedly had a fatal error. +Please report this issue to Databricks in the form of a GitHub issue at: +https://github.com/databricks/cli + +CLI Version: [DEV_VERSION] + +Panic Payload: the databricks selftest panic command always panics + +Stack Trace: +goroutine 1 [running]: +runtime/debug.Stack() + +Exit code: 1 diff --git a/acceptance/panic/script b/acceptance/panic/script new file mode 100644 index 000000000..a02466923 --- /dev/null +++ b/acceptance/panic/script @@ -0,0 +1,5 @@ +# We filter anything after runtime/debug.Stack() in the output because the stack +# trace itself is hard to perform replacements on, since it can depend upon the +# exact setup of where the modules are installed in your Go setup, memory addresses +# at runtime etc. +trace $CLI selftest panic 2>&1 | sed '/runtime\/debug\.Stack()/q' diff --git a/acceptance/panic/test.toml b/acceptance/panic/test.toml new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/script.prepare b/acceptance/script.prepare index 3f1bb2acc..ca47cdbff 100644 --- a/acceptance/script.prepare +++ b/acceptance/script.prepare @@ -1,6 +1,3 @@ -# Prevent CLI from downloading terraform in each test: -export DATABRICKS_TF_EXEC_PATH=/tmp/ - errcode() { # Temporarily disable 'set -e' to prevent the script from exiting on error set +e @@ -9,7 +6,9 @@ errcode() { local exit_code=$? # Re-enable 'set -e' if it was previously set set -e - >&2 printf "\nExit code: $exit_code\n" + if [ $exit_code -ne 0 ]; then + >&2 printf "\nExit code: $exit_code\n" + fi } trace() { @@ -34,3 +33,29 @@ trace() { return $? } + +git-repo-init() { + git init -qb main + git config core.autocrlf false + git config user.name "Tester" + git config user.email "tester@databricks.com" + git config core.hooksPath no-hooks + git add databricks.yml + git commit -qm 'Add databricks.yml' +} + +title() { + local label="$1" + printf "\n=== %s" "$label" +} + +withdir() { + local dir="$1" + shift + local orig_dir="$(pwd)" + cd "$dir" || return $? + "$@" + local exit_code=$? + cd "$orig_dir" || return $? + return $exit_code +} diff --git a/acceptance/selftest/basic/out.hello.txt b/acceptance/selftest/basic/out.hello.txt new file mode 100644 index 000000000..e427984d4 --- /dev/null +++ b/acceptance/selftest/basic/out.hello.txt @@ -0,0 +1 @@ +HELLO diff --git a/acceptance/selftest/basic/output.txt b/acceptance/selftest/basic/output.txt new file mode 100644 index 000000000..cadbdebb5 --- /dev/null +++ b/acceptance/selftest/basic/output.txt @@ -0,0 +1,39 @@ +=== Capturing STDERR +>>> python3 -c import sys; sys.stderr.write("STDERR\n") +STDERR + +=== Capturing STDOUT +>>> python3 -c import sys; sys.stderr.write("STDOUT\n") +STDOUT + +=== Capturing exit code +>>> errcode python3 -c raise SystemExit(5) + +Exit code: 5 + +=== Capturing exit code (alt) +>>> python3 -c raise SystemExit(7) + +Exit code: 7 + +=== Capturing pwd +>>> python3 -c import os; print(os.getcwd()) +[TMPDIR] + +=== Capturing subdir +>>> mkdir -p subdir/a/b/c + +>>> withdir subdir/a/b/c python3 -c import os; print(os.getcwd()) +[TMPDIR]/subdir/a/b/c + +=== Custom output files - everything starting with out is captured and compared +>>> echo HELLO + +=== Custom regex can be specified in [[Repl]] section +1234 +CUSTOM_NUMBER_REGEX +123456 + +=== Testing --version +>>> [CLI] --version +Databricks CLI v[DEV_VERSION] diff --git a/acceptance/selftest/basic/script b/acceptance/selftest/basic/script new file mode 100644 index 000000000..bccf30e71 --- /dev/null +++ b/acceptance/selftest/basic/script @@ -0,0 +1,29 @@ +printf "=== Capturing STDERR" +trace python3 -c 'import sys; sys.stderr.write("STDERR\n")' + +printf "\n=== Capturing STDOUT" +trace python3 -c 'import sys; sys.stderr.write("STDOUT\n")' + +printf "\n=== Capturing exit code" +trace errcode python3 -c 'raise SystemExit(5)' + +printf "\n=== Capturing exit code (alt)" +errcode trace python3 -c 'raise SystemExit(7)' + +printf "\n=== Capturing pwd" +trace python3 -c 'import os; print(os.getcwd())' + +printf "\n=== Capturing subdir" +trace mkdir -p subdir/a/b/c +trace withdir subdir/a/b/c python3 -c 'import os; print(os.getcwd())' + +printf "\n=== Custom output files - everything starting with out is captured and compared" +trace echo HELLO > out.hello.txt + +printf "\n=== Custom regex can be specified in [[Repl]] section\n" +echo 1234 +echo 12345 +echo 123456 + +printf "\n=== Testing --version" +trace $CLI --version diff --git a/acceptance/selftest/basic/test.toml b/acceptance/selftest/basic/test.toml new file mode 100644 index 000000000..762e28ceb --- /dev/null +++ b/acceptance/selftest/basic/test.toml @@ -0,0 +1,20 @@ +# Badness = "Brief description of what's wrong with the test output, if anything" + +#[GOOS] +# Disable on Windows +#windows = false + +# Disable on Mac +#mac = false + +# Disable on Linux +#linux = false + +[[Repls]] +Old = '\b[0-9]{5}\b' +New = "CUSTOM_NUMBER_REGEX" + +[[Repls]] +# Fix path with reverse slashes in the output for Windows. +Old = 'TMPDIR]\\subdir\\a\\b\\c' +New = 'TMPDIR]/subdir/a/b/c' diff --git a/acceptance/selftest/diff/out_dir_a/output.txt b/acceptance/selftest/diff/out_dir_a/output.txt new file mode 100644 index 000000000..303c1867b --- /dev/null +++ b/acceptance/selftest/diff/out_dir_a/output.txt @@ -0,0 +1,7 @@ +Hello! +{ + "id": "[USERID]", + "userName": "[USERNAME]" +} + +Footer \ No newline at end of file diff --git a/acceptance/selftest/diff/out_dir_b/output.txt b/acceptance/selftest/diff/out_dir_b/output.txt new file mode 100644 index 000000000..f4f01af13 --- /dev/null +++ b/acceptance/selftest/diff/out_dir_b/output.txt @@ -0,0 +1,7 @@ +Hello! +{ + "id": "[UUID]", + "userName": "[USERNAME]" +} + +Footer \ No newline at end of file diff --git a/acceptance/selftest/diff/output.txt b/acceptance/selftest/diff/output.txt new file mode 100644 index 000000000..aef99f1e3 --- /dev/null +++ b/acceptance/selftest/diff/output.txt @@ -0,0 +1,13 @@ + +>>> diff.py out_dir_a out_dir_b +Only in out_dir_a: only_in_a +Only in out_dir_b: only_in_b +--- out_dir_a/output.txt ++++ out_dir_b/output.txt +@@ -1,5 +1,5 @@ + Hello! + { +- "id": "[USERID]", ++ "id": "[UUID]", + "userName": "[USERNAME]" + } diff --git a/acceptance/selftest/diff/script b/acceptance/selftest/diff/script new file mode 100644 index 000000000..a7b8706e6 --- /dev/null +++ b/acceptance/selftest/diff/script @@ -0,0 +1,17 @@ +mkdir out_dir_a +mkdir out_dir_b + +touch out_dir_a/only_in_a +touch out_dir_b/only_in_b + +echo Hello! >> out_dir_a/output.txt +echo Hello! >> out_dir_b/output.txt + +curl -s $DATABRICKS_HOST/api/2.0/preview/scim/v2/Me >> out_dir_a/output.txt +printf "\n\nFooter" >> out_dir_a/output.txt +printf '{\n "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n "userName": "[USERNAME]"\n}\n\nFooter' >> out_dir_b/output.txt + +# Unlike regular diff, diff.py will apply replacements first before doing the comparison +errcode trace diff.py out_dir_a out_dir_b + +rm out_dir_a/only_in_a out_dir_b/only_in_b diff --git a/acceptance/selftest/server/out.requests.txt b/acceptance/selftest/server/out.requests.txt new file mode 100644 index 000000000..34f4c4899 --- /dev/null +++ b/acceptance/selftest/server/out.requests.txt @@ -0,0 +1,12 @@ +{ + "method": "GET", + "path": "/api/2.0/preview/scim/v2/Me" +} +{ + "method": "GET", + "path": "/custom/endpoint" +} +{ + "method": "GET", + "path": "/api/2.0/workspace/get-status" +} diff --git a/acceptance/selftest/server/output.txt b/acceptance/selftest/server/output.txt new file mode 100644 index 000000000..7147f9c9b --- /dev/null +++ b/acceptance/selftest/server/output.txt @@ -0,0 +1,21 @@ + +>>> curl -s [DATABRICKS_URL]/api/2.0/preview/scim/v2/Me +{ + "id": "[USERID]", + "userName": "[USERNAME]" +} +>>> curl -sD - [DATABRICKS_URL]/custom/endpoint?query=param +HTTP/1.1 201 Created +X-Custom-Header: hello +Date: (redacted) +Content-Length: (redacted) +Content-Type: text/plain; charset=utf-8 + +custom +--- +response + +>>> errcode [CLI] workspace get-status /a/b/c +Error: Workspace path not found + +Exit code: 1 diff --git a/acceptance/selftest/server/script b/acceptance/selftest/server/script new file mode 100644 index 000000000..810ea64b6 --- /dev/null +++ b/acceptance/selftest/server/script @@ -0,0 +1,4 @@ +trace curl -s $DATABRICKS_HOST/api/2.0/preview/scim/v2/Me +trace curl -sD - $DATABRICKS_HOST/custom/endpoint?query=param + +trace errcode $CLI workspace get-status /a/b/c diff --git a/acceptance/selftest/server/test.toml b/acceptance/selftest/server/test.toml new file mode 100644 index 000000000..8fc7b3cac --- /dev/null +++ b/acceptance/selftest/server/test.toml @@ -0,0 +1,23 @@ +RecordRequests = true + +[[Server]] +Pattern = "GET /custom/endpoint" +Response.Body = '''should not see this response, latter response takes precedence''' + +[[Server]] +Pattern = "GET /custom/endpoint" +Response.Body = '''custom +--- +response +''' +Response.StatusCode = 201 +[Server.Response.Headers] +"X-Custom-Header" = ["hello"] + +[[Repls]] +Old = 'Date: .*' +New = 'Date: (redacted)' + +[[Repls]] +Old = 'Content-Length: [0-9]*' +New = 'Content-Length: (redacted)' diff --git a/acceptance/server_test.go b/acceptance/server_test.go index 7b21e198f..402e3ca5f 100644 --- a/acceptance/server_test.go +++ b/acceptance/server_test.go @@ -2,76 +2,32 @@ package acceptance_test import ( "encoding/json" - "net" + "fmt" "net/http" - "net/http/httptest" - "testing" + + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/databricks/databricks-sdk-go/service/jobs" + + "github.com/databricks/cli/libs/testserver" "github.com/databricks/databricks-sdk-go/service/workspace" ) -type TestServer struct { - *httptest.Server - Mux *http.ServeMux - Port int +var testUser = iam.User{ + Id: "1000012345", + UserName: "tester@databricks.com", } -type HandlerFunc func(r *http.Request) (any, error) - -func NewTestServer() *TestServer { - mux := http.NewServeMux() - server := httptest.NewServer(mux) - port := server.Listener.Addr().(*net.TCPAddr).Port - - return &TestServer{ - Server: server, - Mux: mux, - Port: port, - } +var testMetastore = catalog.MetastoreAssignment{ + DefaultCatalogName: "hive_metastore", + MetastoreId: "120efa64-9b68-46ba-be38-f319458430d2", + WorkspaceId: 470123456789500, } -func (s *TestServer) Handle(pattern string, handler HandlerFunc) { - s.Mux.HandleFunc(pattern, func(w http.ResponseWriter, r *http.Request) { - resp, err := handler(r) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - - var respBytes []byte - - respString, ok := resp.(string) - if ok { - respBytes = []byte(respString) - } else { - respBytes, err = json.MarshalIndent(resp, "", " ") - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - } - - if _, err := w.Write(respBytes); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - }) -} - -func StartServer(t *testing.T) *TestServer { - server := NewTestServer() - t.Cleanup(func() { - server.Close() - }) - return server -} - -func AddHandlers(server *TestServer) { - server.Handle("/api/2.0/policies/clusters/list", func(r *http.Request) (any, error) { +func AddHandlers(server *testserver.Server) { + server.Handle("GET", "/api/2.0/policies/clusters/list", func(req testserver.Request) any { return compute.ListPoliciesResponse{ Policies: []compute.Policy{ { @@ -83,10 +39,10 @@ func AddHandlers(server *TestServer) { Name: "some-test-cluster-policy", }, }, - }, nil + } }) - server.Handle("/api/2.0/instance-pools/list", func(r *http.Request) (any, error) { + server.Handle("GET", "/api/2.0/instance-pools/list", func(req testserver.Request) any { return compute.ListInstancePools{ InstancePools: []compute.InstancePoolAndStats{ { @@ -94,10 +50,10 @@ func AddHandlers(server *TestServer) { InstancePoolId: "1234", }, }, - }, nil + } }) - server.Handle("/api/2.1/clusters/list", func(r *http.Request) (any, error) { + server.Handle("GET", "/api/2.1/clusters/list", func(req testserver.Request) any { return compute.ListClustersResponse{ Clusters: []compute.ClusterDetails{ { @@ -109,21 +65,108 @@ func AddHandlers(server *TestServer) { ClusterId: "9876", }, }, - }, nil + } }) - server.Handle("/api/2.0/preview/scim/v2/Me", func(r *http.Request) (any, error) { - return iam.User{ - UserName: "tester@databricks.com", - }, nil + server.Handle("GET", "/api/2.0/preview/scim/v2/Me", func(req testserver.Request) any { + return testserver.Response{ + Headers: map[string][]string{"X-Databricks-Org-Id": {"900800700600"}}, + Body: testUser, + } }) - server.Handle("/api/2.0/workspace/get-status", func(r *http.Request) (any, error) { - return workspace.ObjectInfo{ - ObjectId: 1001, + server.Handle("GET", "/api/2.0/workspace/get-status", func(req testserver.Request) any { + path := req.URL.Query().Get("path") + return req.Workspace.WorkspaceGetStatus(path) + }) + + server.Handle("POST", "/api/2.0/workspace/mkdirs", func(req testserver.Request) any { + var request workspace.Mkdirs + if err := json.Unmarshal(req.Body, &request); err != nil { + return testserver.Response{ + Body: fmt.Sprintf("internal error: %s", err), + StatusCode: http.StatusInternalServerError, + } + } + + req.Workspace.WorkspaceMkdirs(request) + return "" + }) + + server.Handle("GET", "/api/2.0/workspace/export", func(req testserver.Request) any { + path := req.URL.Query().Get("path") + return req.Workspace.WorkspaceExport(path) + }) + + server.Handle("POST", "/api/2.0/workspace/delete", func(req testserver.Request) any { + path := req.URL.Query().Get("path") + recursive := req.URL.Query().Get("recursive") == "true" + req.Workspace.WorkspaceDelete(path, recursive) + return "" + }) + + server.Handle("POST", "/api/2.0/workspace-files/import-file/{path:.*}", func(req testserver.Request) any { + path := req.Vars["path"] + req.Workspace.WorkspaceFilesImportFile(path, req.Body) + return "" + }) + + server.Handle("GET", "/api/2.1/unity-catalog/current-metastore-assignment", func(req testserver.Request) any { + return testMetastore + }) + + server.Handle("GET", "/api/2.0/permissions/directories/{objectId}", func(req testserver.Request) any { + objectId := req.Vars["objectId"] + return workspace.WorkspaceObjectPermissions{ + ObjectId: objectId, ObjectType: "DIRECTORY", - Path: "", - ResourceId: "1001", - }, nil + AccessControlList: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "tester@databricks.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + { + PermissionLevel: "CAN_MANAGE", + }, + }, + }, + }, + } + }) + + server.Handle("POST", "/api/2.1/jobs/create", func(req testserver.Request) any { + var request jobs.CreateJob + if err := json.Unmarshal(req.Body, &request); err != nil { + return testserver.Response{ + Body: fmt.Sprintf("internal error: %s", err), + StatusCode: 500, + } + } + + return req.Workspace.JobsCreate(request) + }) + + server.Handle("GET", "/api/2.1/jobs/get", func(req testserver.Request) any { + jobId := req.URL.Query().Get("job_id") + return req.Workspace.JobsGet(jobId) + }) + + server.Handle("GET", "/api/2.1/jobs/list", func(req testserver.Request) any { + return req.Workspace.JobsList() + }) + + server.Handle("GET", "/oidc/.well-known/oauth-authorization-server", func(_ testserver.Request) any { + return map[string]string{ + "authorization_endpoint": server.URL + "oidc/v1/authorize", + "token_endpoint": server.URL + "/oidc/v1/token", + } + }) + + server.Handle("POST", "/oidc/v1/token", func(_ testserver.Request) any { + return map[string]string{ + "access_token": "oauth-token", + "expires_in": "3600", + "scope": "all-apis", + "token_type": "Bearer", + } }) } diff --git a/acceptance/terraform/main.tf b/acceptance/terraform/main.tf new file mode 100644 index 000000000..674b41a3c --- /dev/null +++ b/acceptance/terraform/main.tf @@ -0,0 +1,25 @@ +terraform { + required_providers { + databricks = { + source = "databricks/databricks" + version = "1.65.1" + } + } + + required_version = "= 1.5.5" +} + +provider "databricks" { + # Optionally, specify the Databricks host and token + # host = "https://" + # token = "" +} + +data "databricks_current_user" "me" { + # Retrieves the current user's information +} + +output "username" { + description = "Username" + value = "${data.databricks_current_user.me.user_name}" +} diff --git a/acceptance/terraform/output.txt b/acceptance/terraform/output.txt new file mode 100644 index 000000000..851785827 --- /dev/null +++ b/acceptance/terraform/output.txt @@ -0,0 +1,51 @@ + +>>> [TERRAFORM] init -no-color -get=false + +Initializing the backend... + +Initializing provider plugins... +- Finding databricks/databricks versions matching "1.65.1"... +- Installing databricks/databricks v1.65.1... +- Installed databricks/databricks v1.65.1 (unauthenticated) + +Terraform has created a lock file .terraform.lock.hcl to record the provider +selections it made above. Include this file in your version control repository +so that Terraform can guarantee to make the same selections by default when +you run "terraform init" in the future. + + +Warning: Incomplete lock file information for providers + +Due to your customized provider installation methods, Terraform was forced to +calculate lock file checksums locally for the following providers: + - databricks/databricks + + +To calculate additional checksums for another platform, run: + terraform providers lock -platform=linux_amd64 +(where linux_amd64 is the platform to generate) + +Terraform has been successfully initialized! + +You may now begin working with Terraform. Try running "terraform plan" to see +any changes that are required for your infrastructure. All Terraform commands +should now work. + +If you ever set or change modules or backend configuration for Terraform, +rerun this command to reinitialize your working directory. If you forget, other +commands will detect it and remind you to do so if necessary. + +>>> [TERRAFORM] plan -no-color +data.databricks_current_user.me: Reading... +data.databricks_current_user.me: Read complete after (redacted) [id=[USERID]] + +Changes to Outputs: + + username = "[USERNAME]" + +You can apply this plan to save these new output values to the Terraform +state, without changing any real infrastructure. + +───────────────────────────────────────────────────────────────────────────── + +Note: You didn't use the -out option to save this plan, so Terraform can't +guarantee to take exactly these actions if you run "terraform apply" now. diff --git a/acceptance/terraform/script b/acceptance/terraform/script new file mode 100644 index 000000000..78e35049d --- /dev/null +++ b/acceptance/terraform/script @@ -0,0 +1,14 @@ +# Want to filter out these message: +# Mac: +# The current .terraform.lock.hcl file only includes checksums for +# darwin_arm64, so Terraform running on another platform will fail to install +# these providers. +# +# Linux: +# The current .terraform.lock.hcl file only includes checksums for linux_amd64, +# so Terraform running on another platform will fail to install these +# providers. + +trace $TERRAFORM init -no-color -get=false | grep -v 'includes checksums for' | grep -v 'so Terraform running on another' | grep -v 'providers\.' +trace $TERRAFORM plan -no-color +rm -fr .terraform.lock.hcl .terraform diff --git a/acceptance/terraform/test.toml b/acceptance/terraform/test.toml new file mode 100644 index 000000000..9fbd70943 --- /dev/null +++ b/acceptance/terraform/test.toml @@ -0,0 +1,6 @@ +Local = true +Cloud = true + +[[Repls]] +Old = 'Read complete after [^\s]+' +New = 'Read complete after (redacted)' diff --git a/acceptance/test.toml b/acceptance/test.toml new file mode 100644 index 000000000..0a009f397 --- /dev/null +++ b/acceptance/test.toml @@ -0,0 +1,3 @@ +# Default settings that apply to all tests unless overriden by test.toml files in inner directories. +Local = true +Cloud = false diff --git a/acceptance/workspace/jobs/create-error/out.requests.txt b/acceptance/workspace/jobs/create-error/out.requests.txt new file mode 100644 index 000000000..30f104fd1 --- /dev/null +++ b/acceptance/workspace/jobs/create-error/out.requests.txt @@ -0,0 +1,7 @@ +{ + "method": "POST", + "path": "/api/2.1/jobs/create", + "body": { + "name": "abc" + } +} diff --git a/acceptance/workspace/jobs/create-error/output.txt b/acceptance/workspace/jobs/create-error/output.txt new file mode 100644 index 000000000..0e69eeb4b --- /dev/null +++ b/acceptance/workspace/jobs/create-error/output.txt @@ -0,0 +1,5 @@ + +>>> [CLI] jobs create --json {"name":"abc"} +Error: Invalid access token. + +Exit code: 1 diff --git a/acceptance/workspace/jobs/create-error/script b/acceptance/workspace/jobs/create-error/script new file mode 100644 index 000000000..9ff7b5b87 --- /dev/null +++ b/acceptance/workspace/jobs/create-error/script @@ -0,0 +1 @@ +trace $CLI jobs create --json '{"name":"abc"}' diff --git a/acceptance/workspace/jobs/create-error/test.toml b/acceptance/workspace/jobs/create-error/test.toml new file mode 100644 index 000000000..a7b86accb --- /dev/null +++ b/acceptance/workspace/jobs/create-error/test.toml @@ -0,0 +1,11 @@ +RecordRequests = true + +[[Server]] +Pattern = "POST /api/2.1/jobs/create" +Response.Body = ''' +{ + "error_code": "PERMISSION_DENIED", + "message": "Invalid access token." +} +''' +Response.StatusCode = 403 diff --git a/acceptance/workspace/jobs/create/out.requests.txt b/acceptance/workspace/jobs/create/out.requests.txt new file mode 100644 index 000000000..1d200a547 --- /dev/null +++ b/acceptance/workspace/jobs/create/out.requests.txt @@ -0,0 +1,15 @@ +{ + "headers": { + "Authorization": [ + "Bearer [DATABRICKS_TOKEN]" + ], + "User-Agent": [ + "cli/[DEV_VERSION] databricks-sdk-go/[SDK_VERSION] go/[GO_VERSION] os/[OS] cmd/jobs_create cmd-exec-id/[UUID] auth/pat" + ] + }, + "method": "POST", + "path": "/api/2.1/jobs/create", + "body": { + "name": "abc" + } +} diff --git a/acceptance/workspace/jobs/create/output.txt b/acceptance/workspace/jobs/create/output.txt new file mode 100644 index 000000000..50b823aa0 --- /dev/null +++ b/acceptance/workspace/jobs/create/output.txt @@ -0,0 +1,5 @@ + +>>> [CLI] jobs create --json {"name":"abc"} +{ + "job_id":1111 +} diff --git a/acceptance/workspace/jobs/create/script b/acceptance/workspace/jobs/create/script new file mode 100644 index 000000000..9ff7b5b87 --- /dev/null +++ b/acceptance/workspace/jobs/create/script @@ -0,0 +1 @@ +trace $CLI jobs create --json '{"name":"abc"}' diff --git a/acceptance/workspace/jobs/create/test.toml b/acceptance/workspace/jobs/create/test.toml new file mode 100644 index 000000000..f08bc0e63 --- /dev/null +++ b/acceptance/workspace/jobs/create/test.toml @@ -0,0 +1,26 @@ +RecordRequests = true +IncludeRequestHeaders = ["Authorization", "User-Agent"] + +[[Server]] +Pattern = "POST /api/2.1/jobs/create" +Response.Body = ''' +{ + "job_id": 1111 +} +''' + +[[Repls]] +Old = "(linux|darwin|windows)" +New = "[OS]" + +[[Repls]] +Old = " upstream/[A-Za-z0-9.-]+" +New = "" + +[[Repls]] +Old = " upstream-version/[A-Za-z0-9.-]+" +New = "" + +[[Repls]] +Old = " cicd/[A-Za-z0-9.-]+" +New = "" diff --git a/bundle/apps/interpolate_variables_test.go b/bundle/apps/interpolate_variables_test.go index a2909006f..b6c424a95 100644 --- a/bundle/apps/interpolate_variables_test.go +++ b/bundle/apps/interpolate_variables_test.go @@ -44,6 +44,6 @@ func TestAppInterpolateVariables(t *testing.T) { diags := bundle.Apply(context.Background(), b, InterpolateVariables()) require.Empty(t, diags) - require.Equal(t, []any([]any{map[string]any{"name": "JOB_ID", "value": "123"}}), b.Config.Resources.Apps["my_app_1"].Config["env"]) + require.Equal(t, []any{map[string]any{"name": "JOB_ID", "value": "123"}}, b.Config.Resources.Apps["my_app_1"].Config["env"]) require.Nil(t, b.Config.Resources.Apps["my_app_2"].Config) } diff --git a/bundle/apps/upload_config_test.go b/bundle/apps/upload_config_test.go index a1a6b3afb..1087508f2 100644 --- a/bundle/apps/upload_config_test.go +++ b/bundle/apps/upload_config_test.go @@ -70,6 +70,6 @@ env: bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(root, "databricks.yml")}}) - diags := bundle.Apply(context.Background(), b, bundle.Seq(mutator.TranslatePaths(), &u)) + diags := bundle.ApplySeq(context.Background(), b, mutator.TranslatePaths(), &u) require.NoError(t, diags.Error()) } diff --git a/bundle/apps/validate_test.go b/bundle/apps/validate_test.go index 6c3a88191..11270198e 100644 --- a/bundle/apps/validate_test.go +++ b/bundle/apps/validate_test.go @@ -51,7 +51,7 @@ func TestAppsValidate(t *testing.T) { bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) - diags := bundle.Apply(context.Background(), b, bundle.Seq(mutator.TranslatePaths(), Validate())) + diags := bundle.ApplySeq(context.Background(), b, mutator.TranslatePaths(), Validate()) require.Len(t, diags, 1) require.Equal(t, "app.yml detected", diags[0].Summary) require.Contains(t, diags[0].Detail, "app.yml and use 'config' property for app resource") @@ -90,7 +90,7 @@ func TestAppsValidateSameSourcePath(t *testing.T) { bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) - diags := bundle.Apply(context.Background(), b, bundle.Seq(mutator.TranslatePaths(), Validate())) + diags := bundle.ApplySeq(context.Background(), b, mutator.TranslatePaths(), Validate()) require.Len(t, diags, 1) require.Equal(t, "Duplicate app source code path", diags[0].Summary) require.Contains(t, diags[0].Detail, "has the same source code path as app resource") diff --git a/bundle/artifacts/all.go b/bundle/artifacts/all.go index 768ccdfe3..b78e7c100 100644 --- a/bundle/artifacts/all.go +++ b/bundle/artifacts/all.go @@ -38,5 +38,5 @@ func (m *all) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { } } - return bundle.Apply(ctx, b, bundle.Seq(out...)) + return bundle.ApplySeq(ctx, b, out...) } diff --git a/bundle/artifacts/autodetect.go b/bundle/artifacts/autodetect.go deleted file mode 100644 index c8d235616..000000000 --- a/bundle/artifacts/autodetect.go +++ /dev/null @@ -1,32 +0,0 @@ -package artifacts - -import ( - "context" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/artifacts/whl" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/log" -) - -func DetectPackages() bundle.Mutator { - return &autodetect{} -} - -type autodetect struct{} - -func (m *autodetect) Name() string { - return "artifacts.DetectPackages" -} - -func (m *autodetect) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - // If artifacts section explicitly defined, do not try to auto detect packages - if b.Config.Artifacts != nil { - log.Debugf(ctx, "artifacts block is defined, skipping auto-detecting") - return nil - } - - return bundle.Apply(ctx, b, bundle.Seq( - whl.DetectPackage(), - )) -} diff --git a/bundle/artifacts/build.go b/bundle/artifacts/build.go index 0446135b6..94880bc2c 100644 --- a/bundle/artifacts/build.go +++ b/bundle/artifacts/build.go @@ -53,5 +53,5 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { // if we do it before, any files that are generated by build command will // not be included into artifact.Files and thus will not be uploaded. mutators = append(mutators, &expandGlobs{name: m.name}) - return bundle.Apply(ctx, b, bundle.Seq(mutators...)) + return bundle.ApplySeq(ctx, b, mutators...) } diff --git a/bundle/artifacts/expand_globs_test.go b/bundle/artifacts/expand_globs_test.go index 264c52c50..7bf886330 100644 --- a/bundle/artifacts/expand_globs_test.go +++ b/bundle/artifacts/expand_globs_test.go @@ -39,11 +39,11 @@ func TestExpandGlobs_Nominal(t *testing.T) { bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) ctx := context.Background() - diags := bundle.Apply(ctx, b, bundle.Seq( + diags := bundle.ApplySeq(ctx, b, // Run prepare first to make paths absolute. &prepare{"test"}, &expandGlobs{"test"}, - )) + ) require.NoError(t, diags.Error()) // Assert that the expanded paths are correct. @@ -80,11 +80,11 @@ func TestExpandGlobs_InvalidPattern(t *testing.T) { bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) ctx := context.Background() - diags := bundle.Apply(ctx, b, bundle.Seq( + diags := bundle.ApplySeq(ctx, b, // Run prepare first to make paths absolute. &prepare{"test"}, &expandGlobs{"test"}, - )) + ) assert.Len(t, diags, 4) assert.Equal(t, filepath.Clean("a[.txt")+": syntax error in pattern", diags[0].Summary) @@ -128,11 +128,11 @@ func TestExpandGlobs_NoMatches(t *testing.T) { bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) ctx := context.Background() - diags := bundle.Apply(ctx, b, bundle.Seq( + diags := bundle.ApplySeq(ctx, b, // Run prepare first to make paths absolute. &prepare{"test"}, &expandGlobs{"test"}, - )) + ) assert.Len(t, diags, 2) assert.Equal(t, "c*.txt: no matching files", diags[0].Summary) diff --git a/bundle/artifacts/upload.go b/bundle/artifacts/upload.go index c69939e8c..d4625d85d 100644 --- a/bundle/artifacts/upload.go +++ b/bundle/artifacts/upload.go @@ -29,7 +29,7 @@ func (m *cleanUp) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics // We intentionally ignore the error because it is not critical to the deployment err := client.Delete(ctx, ".", filer.DeleteRecursively) if err != nil { - log.Errorf(ctx, "failed to delete %s: %v", uploadPath, err) + log.Debugf(ctx, "failed to delete %s: %v", uploadPath, err) } err = client.Mkdir(ctx, ".") diff --git a/bundle/artifacts/whl/autodetect.go b/bundle/artifacts/whl/autodetect.go index 202ea12bc..9eead83b7 100644 --- a/bundle/artifacts/whl/autodetect.go +++ b/bundle/artifacts/whl/autodetect.go @@ -2,11 +2,8 @@ package whl import ( "context" - "fmt" "os" "path/filepath" - "regexp" - "time" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" @@ -26,11 +23,17 @@ func (m *detectPkg) Name() string { } func (m *detectPkg) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if b.Config.Artifacts != nil { + log.Debugf(ctx, "artifacts block is defined, skipping auto-detecting") + return nil + } + tasks := libraries.FindTasksWithLocalLibraries(b) if len(tasks) == 0 { log.Infof(ctx, "No local tasks in databricks.yml config, skipping auto detect") return nil } + log.Infof(ctx, "Detecting Python wheel project...") // checking if there is setup.py in the bundle root @@ -42,39 +45,18 @@ func (m *detectPkg) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic } log.Infof(ctx, "Found Python wheel project at %s", b.BundleRootPath) - module := extractModuleName(setupPy) - - if b.Config.Artifacts == nil { - b.Config.Artifacts = make(map[string]*config.Artifact) - } pkgPath, err := filepath.Abs(b.BundleRootPath) if err != nil { return diag.FromErr(err) } - b.Config.Artifacts[module] = &config.Artifact{ + + b.Config.Artifacts = make(map[string]*config.Artifact) + b.Config.Artifacts["python_artifact"] = &config.Artifact{ Path: pkgPath, Type: config.ArtifactPythonWheel, + // BuildCommand will be set by bundle/artifacts/whl/infer.go to "python3 setup.py bdist_wheel" } return nil } - -func extractModuleName(setupPy string) string { - bytes, err := os.ReadFile(setupPy) - if err != nil { - return randomName() - } - - content := string(bytes) - r := regexp.MustCompile(`name=['"](.*)['"]`) - matches := r.FindStringSubmatch(content) - if len(matches) == 0 { - return randomName() - } - return matches[1] -} - -func randomName() string { - return fmt.Sprintf("artifact%d", time.Now().Unix()) -} diff --git a/bundle/artifacts/whl/autodetect_test.go b/bundle/artifacts/whl/autodetect_test.go deleted file mode 100644 index b53289b2a..000000000 --- a/bundle/artifacts/whl/autodetect_test.go +++ /dev/null @@ -1,22 +0,0 @@ -package whl - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestExtractModuleName(t *testing.T) { - moduleName := extractModuleName("./testdata/setup.py") - assert.Equal(t, "my_test_code", moduleName) -} - -func TestExtractModuleNameMinimal(t *testing.T) { - moduleName := extractModuleName("./testdata/setup_minimal.py") - assert.Equal(t, "my_test_code", moduleName) -} - -func TestExtractModuleNameIncorrect(t *testing.T) { - moduleName := extractModuleName("./testdata/setup_incorrect.py") - assert.Contains(t, moduleName, "artifact") -} diff --git a/bundle/bundle.go b/bundle/bundle.go index 3bf4ffb62..9cb8916f5 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -17,6 +17,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/metadata" + "github.com/databricks/cli/libs/auth" "github.com/databricks/cli/libs/fileset" "github.com/databricks/cli/libs/locker" "github.com/databricks/cli/libs/log" @@ -24,7 +25,6 @@ import ( "github.com/databricks/cli/libs/terraform" "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go" - sdkconfig "github.com/databricks/databricks-sdk-go/config" "github.com/hashicorp/terraform-exec/tfexec" ) @@ -72,6 +72,7 @@ type Bundle struct { // It can be initialized on demand after loading the configuration. clientOnce sync.Once client *databricks.WorkspaceClient + clientErr error // Files that are synced to the workspace.file_path Files []fileset.File @@ -134,23 +135,25 @@ func TryLoad(ctx context.Context) (*Bundle, error) { return Load(ctx, root) } -func (b *Bundle) InitializeWorkspaceClient() (*databricks.WorkspaceClient, error) { - client, err := b.Config.Workspace.Client() - if err != nil { - return nil, fmt.Errorf("cannot resolve bundle auth configuration: %w", err) - } - return client, nil +func (b *Bundle) WorkspaceClientE() (*databricks.WorkspaceClient, error) { + b.clientOnce.Do(func() { + var err error + b.client, err = b.Config.Workspace.Client() + if err != nil { + b.clientErr = fmt.Errorf("cannot resolve bundle auth configuration: %w", err) + } + }) + + return b.client, b.clientErr } func (b *Bundle) WorkspaceClient() *databricks.WorkspaceClient { - b.clientOnce.Do(func() { - var err error - b.client, err = b.InitializeWorkspaceClient() - if err != nil { - panic(err) - } - }) - return b.client + client, err := b.WorkspaceClientE() + if err != nil { + panic(err) + } + + return client } // SetWorkpaceClient sets the workspace client for this bundle. @@ -242,21 +245,5 @@ func (b *Bundle) AuthEnv() (map[string]string, error) { } cfg := b.client.Config - out := make(map[string]string) - for _, attr := range sdkconfig.ConfigAttributes { - // Ignore profile so that downstream tools don't try and reload - // the profile even though we know the current configuration is valid. - if attr.Name == "profile" { - continue - } - if len(attr.EnvVars) == 0 { - continue - } - if attr.IsZero(cfg) { - continue - } - out[attr.EnvVars[0]] = attr.GetString(cfg) - } - - return out, nil + return auth.Env(cfg), nil } diff --git a/bundle/config/generate/job.go b/bundle/config/generate/job.go index 0cdcbf3ad..934eda2cf 100644 --- a/bundle/config/generate/job.go +++ b/bundle/config/generate/job.go @@ -13,7 +13,6 @@ var ( func ConvertJobToValue(job *jobs.Job) (dyn.Value, error) { value := make(map[string]dyn.Value) - if job.Settings.Tasks != nil { tasks := make([]dyn.Value, 0) for _, task := range job.Settings.Tasks { diff --git a/bundle/config/git.go b/bundle/config/git.go index f9f2f83e5..4b89bc2d2 100644 --- a/bundle/config/git.go +++ b/bundle/config/git.go @@ -8,9 +8,6 @@ type Git struct { // Path to bundle root relative to the git repository root. BundleRootPath string `json:"bundle_root_path,omitempty" bundle:"readonly"` - // Inferred is set to true if the Git details were inferred and weren't set explicitly - Inferred bool `json:"inferred,omitempty" bundle:"readonly"` - // The actual branch according to Git (may be different from the configured branch) ActualBranch string `json:"actual_branch,omitempty" bundle:"readonly"` } diff --git a/bundle/config/loader/process_root_includes.go b/bundle/config/loader/process_root_includes.go index c608a3de6..1e1215b30 100644 --- a/bundle/config/loader/process_root_includes.go +++ b/bundle/config/loader/process_root_includes.go @@ -2,6 +2,7 @@ package loader import ( "context" + "fmt" "path/filepath" "slices" "strings" @@ -36,6 +37,7 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag. // Maintain list of files in order of files being loaded. // This is stored in the bundle configuration for observability. var files []string + var diags diag.Diagnostics // For each glob, find all files to load. // Ordering of the list of globs is maintained in the output. @@ -60,7 +62,7 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag. // Filter matches to ones we haven't seen yet. var includes []string - for _, match := range matches { + for i, match := range matches { rel, err := filepath.Rel(b.BundleRootPath, match) if err != nil { return diag.FromErr(err) @@ -69,9 +71,22 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag. continue } seen[rel] = true + if filepath.Ext(rel) != ".yaml" && filepath.Ext(rel) != ".yml" && filepath.Ext(rel) != ".json" { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Error, + Summary: "Files in the 'include' configuration section must be YAML or JSON files.", + Detail: fmt.Sprintf("The file %s in the 'include' configuration section is not a YAML or JSON file, and only such files are supported. To include files to sync, specify them in the 'sync.include' configuration section instead.", rel), + Locations: b.Config.GetLocations(fmt.Sprintf("include[%d]", i)), + }) + continue + } includes = append(includes, rel) } + if len(diags) > 0 { + return diags + } + // Add matches to list of mutators to return. slices.Sort(includes) files = append(files, includes...) @@ -83,5 +98,5 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag. // Swap out the original includes list with the expanded globs. b.Config.Include = files - return bundle.Apply(ctx, b, bundle.Seq(out...)) + return bundle.ApplySeq(ctx, b, out...) } diff --git a/bundle/config/mutator/apply_presets.go b/bundle/config/mutator/apply_presets.go index b402053e7..c8e7bf9e8 100644 --- a/bundle/config/mutator/apply_presets.go +++ b/bundle/config/mutator/apply_presets.go @@ -84,7 +84,7 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos // Pipelines presets: Prefix, PipelinesDevelopment for key, p := range r.Pipelines { - if p.PipelineSpec == nil { + if p.CreatePipeline == nil { diags = diags.Extend(diag.Errorf("pipeline %s is not defined", key)) continue } diff --git a/bundle/config/mutator/apply_source_linked_deployment_preset.go b/bundle/config/mutator/apply_source_linked_deployment_preset.go index 839648301..570ca72cf 100644 --- a/bundle/config/mutator/apply_source_linked_deployment_preset.go +++ b/bundle/config/mutator/apply_source_linked_deployment_preset.go @@ -72,17 +72,18 @@ func (m *applySourceLinkedDeploymentPreset) Apply(ctx context.Context, b *bundle return diags } + // This mutator runs before workspace paths are defaulted so it's safe to check for the user-defined value if b.Config.Workspace.FilePath != "" && config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { - path := dyn.NewPath(dyn.Key("targets"), dyn.Key(target), dyn.Key("workspace"), dyn.Key("file_path")) - + path := dyn.NewPath(dyn.Key("workspace"), dyn.Key("file_path")) diags = diags.Append( diag.Diagnostic{ Severity: diag.Warning, Summary: "workspace.file_path setting will be ignored in source-linked deployment mode", + Detail: "In source-linked deployment files are not copied to the destination and resources use source files instead", Paths: []dyn.Path{ - path[2:], + path, }, - Locations: b.Config.GetLocations(path[2:].String()), + Locations: b.Config.GetLocations(path.String()), }, ) } diff --git a/bundle/config/mutator/capture_schema_dependency.go b/bundle/config/mutator/capture_schema_dependency.go new file mode 100644 index 000000000..2e17a8175 --- /dev/null +++ b/bundle/config/mutator/capture_schema_dependency.go @@ -0,0 +1,100 @@ +package mutator + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" +) + +type captureSchemaDependency struct{} + +// If a user defines a UC schema in the bundle, they can refer to it in DLT pipelines +// or UC Volumes using the `${resources.schemas..name}` syntax. Using this +// syntax allows TF to capture the deploy time dependency this DLT pipeline or UC Volume +// has on the schema and deploy changes to the schema before deploying the pipeline or volume. +// +// This mutator translates any implicit schema references in DLT pipelines or UC Volumes +// to the explicit syntax. +func CaptureSchemaDependency() bundle.Mutator { + return &captureSchemaDependency{} +} + +func (m *captureSchemaDependency) Name() string { + return "CaptureSchemaDependency" +} + +func schemaNameRef(key string) string { + return fmt.Sprintf("${resources.schemas.%s.name}", key) +} + +func findSchema(b *bundle.Bundle, catalogName, schemaName string) (string, *resources.Schema) { + if catalogName == "" || schemaName == "" { + return "", nil + } + + for k, s := range b.Config.Resources.Schemas { + if s != nil && s.CreateSchema != nil && s.CatalogName == catalogName && s.Name == schemaName { + return k, s + } + } + return "", nil +} + +func resolveVolume(v *resources.Volume, b *bundle.Bundle) { + if v == nil || v.CreateVolumeRequestContent == nil { + return + } + schemaK, schema := findSchema(b, v.CatalogName, v.SchemaName) + if schema == nil { + return + } + + v.SchemaName = schemaNameRef(schemaK) +} + +func resolvePipelineSchema(p *resources.Pipeline, b *bundle.Bundle) { + if p == nil || p.CreatePipeline == nil { + return + } + if p.Schema == "" { + return + } + schemaK, schema := findSchema(b, p.Catalog, p.Schema) + if schema == nil { + return + } + + p.Schema = schemaNameRef(schemaK) +} + +func resolvePipelineTarget(p *resources.Pipeline, b *bundle.Bundle) { + if p == nil || p.CreatePipeline == nil { + return + } + if p.Target == "" { + return + } + schemaK, schema := findSchema(b, p.Catalog, p.Target) + if schema == nil { + return + } + p.Target = schemaNameRef(schemaK) +} + +func (m *captureSchemaDependency) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + for _, p := range b.Config.Resources.Pipelines { + // "schema" and "target" have the same semantics in the DLT API but are mutually + // exclusive i.e. only one can be set at a time. If schema is set, the pipeline + // is in direct publishing mode and can write tables to multiple schemas + // (vs target which is limited to a single schema). + resolvePipelineTarget(p, b) + resolvePipelineSchema(p, b) + } + for _, v := range b.Config.Resources.Volumes { + resolveVolume(v, b) + } + return nil +} diff --git a/bundle/config/mutator/capture_schema_dependency_test.go b/bundle/config/mutator/capture_schema_dependency_test.go new file mode 100644 index 000000000..16fa636ee --- /dev/null +++ b/bundle/config/mutator/capture_schema_dependency_test.go @@ -0,0 +1,277 @@ +package mutator + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCaptureSchemaDependencyForVolume(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Volumes: map[string]*resources.Volume{ + "volume1": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "foobar", + }, + }, + "volume2": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog2", + SchemaName: "foobar", + }, + }, + "volume3": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "barfoo", + }, + }, + "volume4": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalogX", + SchemaName: "foobar", + }, + }, + "volume5": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "schemaX", + }, + }, + "nilVolume": nil, + "emptyVolume": {}, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Volumes["volume1"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Volumes["volume2"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Volumes["volume3"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "foobar", b.Config.Resources.Volumes["volume4"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "schemaX", b.Config.Resources.Volumes["volume5"].CreateVolumeRequestContent.SchemaName) + + assert.Nil(t, b.Config.Resources.Volumes["nilVolume"]) + assert.Nil(t, b.Config.Resources.Volumes["emptyVolume"].CreateVolumeRequestContent) +} + +func TestCaptureSchemaDependencyForPipelinesWithTarget(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "catalog1", + Schema: "foobar", + }, + }, + "pipeline2": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "catalog2", + Schema: "foobar", + }, + }, + "pipeline3": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "catalog1", + Schema: "barfoo", + }, + }, + "pipeline4": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "catalogX", + Schema: "foobar", + }, + }, + "pipeline5": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "catalog1", + Schema: "schemaX", + }, + }, + "pipeline6": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "", + Schema: "foobar", + }, + }, + "pipeline7": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "", + Schema: "", + Name: "whatever", + }, + }, + "nilPipeline": nil, + "emptyPipeline": {}, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Pipelines["pipeline1"].Schema) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Pipelines["pipeline2"].Schema) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Pipelines["pipeline3"].Schema) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline4"].Schema) + assert.Equal(t, "schemaX", b.Config.Resources.Pipelines["pipeline5"].Schema) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline6"].Schema) + assert.Equal(t, "", b.Config.Resources.Pipelines["pipeline7"].Schema) + + assert.Nil(t, b.Config.Resources.Pipelines["nilPipeline"]) + assert.Nil(t, b.Config.Resources.Pipelines["emptyPipeline"].CreatePipeline) + + for _, k := range []string{"pipeline1", "pipeline2", "pipeline3", "pipeline4", "pipeline5", "pipeline6", "pipeline7"} { + assert.Empty(t, b.Config.Resources.Pipelines[k].Target) + } +} + +func TestCaptureSchemaDependencyForPipelinesWithSchema(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "catalog1", + Target: "foobar", + }, + }, + "pipeline2": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "catalog2", + Target: "foobar", + }, + }, + "pipeline3": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "catalog1", + Target: "barfoo", + }, + }, + "pipeline4": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "catalogX", + Target: "foobar", + }, + }, + "pipeline5": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "catalog1", + Target: "schemaX", + }, + }, + "pipeline6": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "", + Target: "foobar", + }, + }, + "pipeline7": { + CreatePipeline: &pipelines.CreatePipeline{ + Catalog: "", + Target: "", + Name: "whatever", + }, + }, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Pipelines["pipeline1"].Target) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Pipelines["pipeline2"].Target) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Pipelines["pipeline3"].Target) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline4"].Target) + assert.Equal(t, "schemaX", b.Config.Resources.Pipelines["pipeline5"].Target) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline6"].Target) + assert.Equal(t, "", b.Config.Resources.Pipelines["pipeline7"].Target) + + for _, k := range []string{"pipeline1", "pipeline2", "pipeline3", "pipeline4", "pipeline5", "pipeline6", "pipeline7"} { + assert.Empty(t, b.Config.Resources.Pipelines[k].Schema) + } +} diff --git a/bundle/config/mutator/expand_pipeline_glob_paths_test.go b/bundle/config/mutator/expand_pipeline_glob_paths_test.go index 7cf3c9f3e..c5b1ad39d 100644 --- a/bundle/config/mutator/expand_pipeline_glob_paths_test.go +++ b/bundle/config/mutator/expand_pipeline_glob_paths_test.go @@ -47,7 +47,7 @@ func TestExpandGlobPathsInPipelines(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Libraries: []pipelines.PipelineLibrary{ { Notebook: &pipelines.NotebookLibrary{ diff --git a/bundle/config/mutator/initialize_urls_test.go b/bundle/config/mutator/initialize_urls_test.go index f07a7deb3..8c751079b 100644 --- a/bundle/config/mutator/initialize_urls_test.go +++ b/bundle/config/mutator/initialize_urls_test.go @@ -31,8 +31,8 @@ func TestInitializeURLs(t *testing.T) { }, Pipelines: map[string]*resources.Pipeline{ "pipeline1": { - ID: "3", - PipelineSpec: &pipelines.PipelineSpec{Name: "pipeline1"}, + ID: "3", + CreatePipeline: &pipelines.CreatePipeline{Name: "pipeline1"}, }, }, Experiments: map[string]*resources.MlflowExperiment{ diff --git a/bundle/config/mutator/initialize_workspace_client.go b/bundle/config/mutator/initialize_workspace_client.go deleted file mode 100644 index 5c905f40c..000000000 --- a/bundle/config/mutator/initialize_workspace_client.go +++ /dev/null @@ -1,26 +0,0 @@ -package mutator - -import ( - "context" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/libs/diag" -) - -type initializeWorkspaceClient struct{} - -func InitializeWorkspaceClient() bundle.Mutator { - return &initializeWorkspaceClient{} -} - -func (m *initializeWorkspaceClient) Name() string { - return "InitializeWorkspaceClient" -} - -// Apply initializes the workspace client for the bundle. We do this here so -// downstream calls to b.WorkspaceClient() do not panic if there's an error in the -// auth configuration. -func (m *initializeWorkspaceClient) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { - _, err := b.InitializeWorkspaceClient() - return diag.FromErr(err) -} diff --git a/bundle/config/mutator/load_git_details.go b/bundle/config/mutator/load_git_details.go index 5c263ac03..dea948fcb 100644 --- a/bundle/config/mutator/load_git_details.go +++ b/bundle/config/mutator/load_git_details.go @@ -32,7 +32,7 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn } if info.WorktreeRoot == "" { - b.WorktreeRoot = b.BundleRoot + b.WorktreeRoot = b.SyncRoot } else { b.WorktreeRoot = vfs.MustNew(info.WorktreeRoot) } @@ -40,7 +40,6 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn b.Config.Bundle.Git.ActualBranch = info.CurrentBranch if b.Config.Bundle.Git.Branch == "" { // Only load branch if there's no user defined value - b.Config.Bundle.Git.Inferred = true b.Config.Bundle.Git.Branch = info.CurrentBranch } diff --git a/bundle/config/mutator/merge_pipeline_clusters_test.go b/bundle/config/mutator/merge_pipeline_clusters_test.go index f117d9399..97ec44eea 100644 --- a/bundle/config/mutator/merge_pipeline_clusters_test.go +++ b/bundle/config/mutator/merge_pipeline_clusters_test.go @@ -19,7 +19,7 @@ func TestMergePipelineClusters(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "foo": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Clusters: []pipelines.PipelineCluster{ { NodeTypeId: "i3.xlarge", @@ -68,7 +68,7 @@ func TestMergePipelineClustersCaseInsensitive(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "foo": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Clusters: []pipelines.PipelineCluster{ { Label: "default", diff --git a/bundle/config/mutator/mutator.go b/bundle/config/mutator/mutator.go index 5fd9f53e5..1e6d1f59d 100644 --- a/bundle/config/mutator/mutator.go +++ b/bundle/config/mutator/mutator.go @@ -1,16 +1,19 @@ package mutator import ( + "context" + "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/loader" pythonmutator "github.com/databricks/cli/bundle/config/mutator/python" "github.com/databricks/cli/bundle/config/validate" "github.com/databricks/cli/bundle/scripts" + "github.com/databricks/cli/libs/diag" ) -func DefaultMutators() []bundle.Mutator { - return []bundle.Mutator{ +func DefaultMutators(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + return bundle.ApplySeq(ctx, b, loader.EntryPoint(), // Execute preinit script before processing includes. @@ -31,5 +34,5 @@ func DefaultMutators() []bundle.Mutator { // Note: This mutator must run before the target overrides are merged. // See the mutator for more details. validate.UniqueResourceKeys(), - } + ) } diff --git a/bundle/config/mutator/prepend_workspace_prefix_test.go b/bundle/config/mutator/prepend_workspace_prefix_test.go index 31393e6bd..d6741f868 100644 --- a/bundle/config/mutator/prepend_workspace_prefix_test.go +++ b/bundle/config/mutator/prepend_workspace_prefix_test.go @@ -80,7 +80,7 @@ func TestPrependWorkspaceForDefaultConfig(t *testing.T) { }, }, } - diags := bundle.Apply(context.Background(), b, bundle.Seq(DefineDefaultWorkspaceRoot(), ExpandWorkspaceRoot(), DefineDefaultWorkspacePaths(), PrependWorkspacePrefix())) + diags := bundle.ApplySeq(context.Background(), b, DefineDefaultWorkspaceRoot(), ExpandWorkspaceRoot(), DefineDefaultWorkspacePaths(), PrependWorkspacePrefix()) require.Empty(t, diags) require.Equal(t, "/Workspace/Users/jane@doe.com/.bundle/test/dev", b.Config.Workspace.RootPath) require.Equal(t, "/Workspace/Users/jane@doe.com/.bundle/test/dev/artifacts", b.Config.Workspace.ArtifactPath) diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index 0fe6bd54f..8ee59d809 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -73,7 +73,7 @@ func validateDevelopmentMode(b *bundle.Bundle) diag.Diagnostics { // this could be surprising since most users (and tools) expect triggers // to be paused in development. // (Note that there still is an exceptional case where users set the trigger - // status to UNPAUSED at the level of an individual object, whic hwas + // status to UNPAUSED at the level of an individual object, which was // historically allowed.) if p.TriggerPauseStatus == config.Unpaused { diags = diags.Append(diag.Diagnostic{ @@ -134,12 +134,7 @@ func findNonUserPath(b *bundle.Bundle) string { return "" } -func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUsed bool) diag.Diagnostics { - if b.Config.Bundle.Git.Inferred { - env := b.Config.Bundle.Target - log.Warnf(ctx, "target with 'mode: production' should specify an explicit 'targets.%s.git' configuration", env) - } - +func validateProductionMode(b *bundle.Bundle, isPrincipalUsed bool) diag.Diagnostics { r := b.Config.Resources for i := range r.Pipelines { if r.Pipelines[i].Development { @@ -149,8 +144,11 @@ func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUs // We need to verify that there is only a single deployment of the current target. // The best way to enforce this is to explicitly set root_path. - advice := fmt.Sprintf( - "set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/%s/.bundle/${bundle.name}/${bundle.target}", + advice := "set 'workspace.root_path' to make sure only one copy is deployed" + adviceDetail := fmt.Sprintf( + "A common practice is to use a username or principal name in this path, i.e. use\n"+ + "\n"+ + " root_path: /Workspace/Users/%s/.bundle/${bundle.name}/${bundle.target}", b.Config.Workspace.CurrentUser.UserName, ) if !isExplicitRootSet(b) { @@ -159,9 +157,21 @@ func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUs // and neither is setting a principal. // We only show a warning for these cases since we didn't historically // report an error for them. - return diag.Recommendationf("target with 'mode: production' should %s", advice) + return diag.Diagnostics{ + { + Severity: diag.Recommendation, + Summary: "target with 'mode: production' should " + advice, + Detail: adviceDetail, + }, + } + } + return diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "target with 'mode: production' must " + advice, + Detail: adviceDetail, + }, } - return diag.Errorf("target with 'mode: production' must %s", advice) } return nil } @@ -193,7 +203,7 @@ func (m *processTargetMode) Apply(ctx context.Context, b *bundle.Bundle) diag.Di return diags case config.Production: isPrincipal := iamutil.IsServicePrincipal(b.Config.Workspace.CurrentUser.User) - return validateProductionMode(ctx, b, isPrincipal) + return validateProductionMode(b, isPrincipal) case "": // No action default: diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 723b01ee3..d63f3ec86 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -88,7 +88,7 @@ func mockBundle(mode config.Mode) *bundle.Bundle { }, }, Pipelines: map[string]*resources.Pipeline{ - "pipeline1": {PipelineSpec: &pipelines.PipelineSpec{Name: "pipeline1", Continuous: true}}, + "pipeline1": {CreatePipeline: &pipelines.CreatePipeline{Name: "pipeline1", Continuous: true}}, }, Experiments: map[string]*resources.MlflowExperiment{ "experiment1": {Experiment: &ml.Experiment{Name: "/Users/lennart.kats@databricks.com/experiment1"}}, @@ -163,8 +163,7 @@ func mockBundle(mode config.Mode) *bundle.Bundle { func TestProcessTargetModeDevelopment(t *testing.T) { b := mockBundle(config.Development) - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) // Job 1 @@ -181,7 +180,7 @@ func TestProcessTargetModeDevelopment(t *testing.T) { // Pipeline 1 assert.Equal(t, "[dev lennart] pipeline1", b.Config.Resources.Pipelines["pipeline1"].Name) assert.False(t, b.Config.Resources.Pipelines["pipeline1"].Continuous) - assert.True(t, b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) + assert.True(t, b.Config.Resources.Pipelines["pipeline1"].CreatePipeline.Development) // Experiment 1 assert.Equal(t, "/Users/lennart.kats@databricks.com/[dev lennart] experiment1", b.Config.Resources.Experiments["experiment1"].Name) @@ -224,8 +223,7 @@ func TestProcessTargetModeDevelopmentTagNormalizationForAws(t *testing.T) { }) b.Config.Workspace.CurrentUser.ShortName = "Héllö wörld?!" - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) // Assert that tag normalization took place. @@ -239,8 +237,7 @@ func TestProcessTargetModeDevelopmentTagNormalizationForAzure(t *testing.T) { }) b.Config.Workspace.CurrentUser.ShortName = "Héllö wörld?!" - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) // Assert that tag normalization took place (Azure allows more characters than AWS). @@ -254,8 +251,7 @@ func TestProcessTargetModeDevelopmentTagNormalizationForGcp(t *testing.T) { }) b.Config.Workspace.CurrentUser.ShortName = "Héllö wörld?!" - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) // Assert that tag normalization took place. @@ -311,12 +307,11 @@ func TestValidateDevelopmentMode(t *testing.T) { func TestProcessTargetModeDefault(t *testing.T) { b := mockBundle("") - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) assert.Equal(t, "job1", b.Config.Resources.Jobs["job1"].Name) assert.Equal(t, "pipeline1", b.Config.Resources.Pipelines["pipeline1"].Name) - assert.False(t, b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) + assert.False(t, b.Config.Resources.Pipelines["pipeline1"].CreatePipeline.Development) assert.Equal(t, "servingendpoint1", b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Name) assert.Equal(t, "registeredmodel1", b.Config.Resources.RegisteredModels["registeredmodel1"].Name) assert.Equal(t, "qualityMonitor1", b.Config.Resources.QualityMonitors["qualityMonitor1"].TableName) @@ -328,16 +323,16 @@ func TestProcessTargetModeDefault(t *testing.T) { func TestProcessTargetModeProduction(t *testing.T) { b := mockBundle(config.Production) - diags := validateProductionMode(context.Background(), b, false) - require.ErrorContains(t, diags.Error(), "target with 'mode: production' must set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}") + diags := validateProductionMode(b, false) + require.ErrorContains(t, diags.Error(), "A common practice is to use a username or principal name in this path, i.e. use\n\n root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}") b.Config.Workspace.StatePath = "/Shared/.bundle/x/y/state" b.Config.Workspace.ArtifactPath = "/Shared/.bundle/x/y/artifacts" b.Config.Workspace.FilePath = "/Shared/.bundle/x/y/files" b.Config.Workspace.ResourcePath = "/Shared/.bundle/x/y/resources" - diags = validateProductionMode(context.Background(), b, false) - require.ErrorContains(t, diags.Error(), "target with 'mode: production' must set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}") + diags = validateProductionMode(b, false) + require.ErrorContains(t, diags.Error(), "A common practice is to use a username or principal name in this path, i.e. use\n\n root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}") permissions := []resources.Permission{ { @@ -357,12 +352,12 @@ func TestProcessTargetModeProduction(t *testing.T) { b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Permissions = permissions b.Config.Resources.Clusters["cluster1"].Permissions = permissions - diags = validateProductionMode(context.Background(), b, false) + diags = validateProductionMode(b, false) require.NoError(t, diags.Error()) assert.Equal(t, "job1", b.Config.Resources.Jobs["job1"].Name) assert.Equal(t, "pipeline1", b.Config.Resources.Pipelines["pipeline1"].Name) - assert.False(t, b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) + assert.False(t, b.Config.Resources.Pipelines["pipeline1"].CreatePipeline.Development) assert.Equal(t, "servingendpoint1", b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Name) assert.Equal(t, "registeredmodel1", b.Config.Resources.RegisteredModels["registeredmodel1"].Name) assert.Equal(t, "qualityMonitor1", b.Config.Resources.QualityMonitors["qualityMonitor1"].TableName) @@ -375,11 +370,11 @@ func TestProcessTargetModeProductionOkForPrincipal(t *testing.T) { b := mockBundle(config.Production) // Our target has all kinds of problems when not using service principals ... - diags := validateProductionMode(context.Background(), b, false) + diags := validateProductionMode(b, false) require.Error(t, diags.Error()) // ... but we're much less strict when a principal is used - diags = validateProductionMode(context.Background(), b, true) + diags = validateProductionMode(b, true) require.NoError(t, diags.Error()) } @@ -387,7 +382,7 @@ func TestProcessTargetModeProductionOkWithRootPath(t *testing.T) { b := mockBundle(config.Production) // Our target has all kinds of problems when not using service principals ... - diags := validateProductionMode(context.Background(), b, false) + diags := validateProductionMode(b, false) require.Error(t, diags.Error()) // ... but we're okay if we specify a root path @@ -396,7 +391,7 @@ func TestProcessTargetModeProductionOkWithRootPath(t *testing.T) { RootPath: "some-root-path", }, } - diags = validateProductionMode(context.Background(), b, false) + diags = validateProductionMode(b, false) require.NoError(t, diags.Error()) } @@ -429,8 +424,7 @@ func TestAllNonUcResourcesAreRenamed(t *testing.T) { reflect.TypeOf(&resources.Volume{}), } - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) resources := reflect.ValueOf(b.Config.Resources) @@ -484,8 +478,7 @@ func TestPrefixAlreadySet(t *testing.T) { b := mockBundle(config.Development) b.Config.Presets.NamePrefix = "custom_lennart_deploy_" - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) assert.Equal(t, "custom_lennart_deploy_job1", b.Config.Resources.Jobs["job1"].Name) @@ -498,8 +491,7 @@ func TestTagsAlreadySet(t *testing.T) { "dev": "foo", } - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) assert.Equal(t, "tag", b.Config.Resources.Jobs["job1"].Tags["custom"]) @@ -510,8 +502,7 @@ func TestTagsNil(t *testing.T) { b := mockBundle(config.Development) b.Config.Presets.Tags = nil - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) assert.Equal(t, "lennart", b.Config.Resources.Jobs["job2"].Tags["dev"]) @@ -521,8 +512,7 @@ func TestTagsEmptySet(t *testing.T) { b := mockBundle(config.Development) b.Config.Presets.Tags = map[string]string{} - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) assert.Equal(t, "lennart", b.Config.Resources.Jobs["job2"].Tags["dev"]) @@ -532,8 +522,7 @@ func TestJobsMaxConcurrentRunsAlreadySet(t *testing.T) { b := mockBundle(config.Development) b.Config.Presets.JobsMaxConcurrentRuns = 10 - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) assert.Equal(t, 10, b.Config.Resources.Jobs["job1"].MaxConcurrentRuns) @@ -543,8 +532,7 @@ func TestJobsMaxConcurrentRunsDisabled(t *testing.T) { b := mockBundle(config.Development) b.Config.Presets.JobsMaxConcurrentRuns = 1 - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) assert.Equal(t, 1, b.Config.Resources.Jobs["job1"].MaxConcurrentRuns) @@ -554,8 +542,7 @@ func TestTriggerPauseStatusWhenUnpaused(t *testing.T) { b := mockBundle(config.Development) b.Config.Presets.TriggerPauseStatus = config.Unpaused - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.ErrorContains(t, diags.Error(), "target with 'mode: development' cannot set trigger pause status to UNPAUSED by default") } @@ -564,9 +551,8 @@ func TestPipelinesDevelopmentDisabled(t *testing.T) { notEnabled := false b.Config.Presets.PipelinesDevelopment = ¬Enabled - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(context.Background(), b, m) + diags := bundle.ApplySeq(context.Background(), b, ProcessTargetMode(), ApplyPresets()) require.NoError(t, diags.Error()) - assert.False(t, b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) + assert.False(t, b.Config.Resources.Pipelines["pipeline1"].CreatePipeline.Development) } diff --git a/bundle/config/mutator/python/python_diagnostics.go b/bundle/config/mutator/python/python_diagnostics.go index 12822065b..7a1e13b4e 100644 --- a/bundle/config/mutator/python/python_diagnostics.go +++ b/bundle/config/mutator/python/python_diagnostics.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/libs/dyn" ) +// pythonDiagnostic is a single entry in diagnostics.json type pythonDiagnostic struct { Severity pythonSeverity `json:"severity"` Summary string `json:"summary"` diff --git a/bundle/config/mutator/python/python_locations.go b/bundle/config/mutator/python/python_locations.go new file mode 100644 index 000000000..9cb65c302 --- /dev/null +++ b/bundle/config/mutator/python/python_locations.go @@ -0,0 +1,201 @@ +package python + +import ( + "encoding/json" + "fmt" + "io" + pathlib "path" + "path/filepath" + + "github.com/databricks/cli/libs/dyn" +) + +// generatedFileName is used as the virtual file name for YAML generated by Python code. +// +// mergePythonLocations replaces dyn.Location with generatedFileName with locations loaded +// from locations.json +const generatedFileName = "__generated_by_python__.yml" + +// pythonLocations is data structure for efficient location lookup for a given path +// +// Locations form a tree, and we assign locations of the closest ancestor to each dyn.Value based on its path. +// We implement it as a trie (prefix tree) where keys are components of the path. With that, lookups are O(n) +// where n is the number of components in the path. +// +// For example, with locations.json: +// +// {"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5} +// {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5} +// {"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7} +// +// - resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5 +// +// - resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5, +// because we use the location of the job as the most precise approximation. +// +// See pythonLocationEntry for the structure of a single entry in locations.json +type pythonLocations struct { + // descendants referenced by index, e.g. '.foo' + keys map[string]*pythonLocations + + // descendants referenced by key, e.g. '[0]' + indexes map[int]*pythonLocations + + // location for the current node if it exists + location dyn.Location + + // if true, location is present + exists bool +} + +// pythonLocationEntry is a single entry in locations.json +type pythonLocationEntry struct { + Path string `json:"path"` + File string `json:"file"` + Line int `json:"line"` + Column int `json:"column"` +} + +// mergePythonLocations applies locations from Python mutator into given dyn.Value +// +// The primary use-case is to merge locations.json with output.json, so that any +// validation errors will point to Python source code instead of generated YAML. +func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) { + return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) { + newLocation, ok := findPythonLocation(locations, path) + if !ok { + return value, nil + } + + // The first item in the list is the "last" location used for error reporting + // + // Loaded YAML uses virtual file path as location, we remove any of such references, + // because they should use 'newLocation' instead. + // + // We preserve any previous non-virtual locations in case when Python function modified + // resource defined in YAML. + newLocations := append( + []dyn.Location{newLocation}, + removeVirtualLocations(value.Locations())..., + ) + + return value.WithLocations(newLocations), nil + }) +} + +func removeVirtualLocations(locations []dyn.Location) []dyn.Location { + var newLocations []dyn.Location + + for _, location := range locations { + if filepath.Base(location.File) == generatedFileName { + continue + } + + newLocations = append(newLocations, location) + } + + return newLocations +} + +// parsePythonLocations parses locations.json from the Python mutator. +// +// locations file is newline-separated JSON objects with pythonLocationEntry structure. +func parsePythonLocations(bundleRoot string, input io.Reader) (*pythonLocations, error) { + decoder := json.NewDecoder(input) + locations := newPythonLocations() + + for decoder.More() { + var entry pythonLocationEntry + + err := decoder.Decode(&entry) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + path, err := dyn.NewPathFromString(entry.Path) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + // Output can contain both relative paths and absolute paths outside of bundle root. + // Mutator pipeline expects all path to be absolute at this point, so make all paths absolute. + if !pathlib.IsAbs(entry.File) { + entry.File = filepath.Join(bundleRoot, entry.File) + } + + location := dyn.Location{ + File: entry.File, + Line: entry.Line, + Column: entry.Column, + } + + putPythonLocation(locations, path, location) + } + + return locations, nil +} + +// putPythonLocation puts the location to the trie for the given path +func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) { + currentNode := trie + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + currentNode.keys[key] = newPythonLocations() + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + currentNode.indexes[index] = newPythonLocations() + } + + currentNode = currentNode.indexes[index] + } + } + + currentNode.location = location + currentNode.exists = true +} + +// newPythonLocations creates a new trie node +func newPythonLocations() *pythonLocations { + return &pythonLocations{ + keys: make(map[string]*pythonLocations), + indexes: make(map[int]*pythonLocations), + } +} + +// findPythonLocation finds the location or closest ancestor location in the trie for the given path +// if no ancestor or exact location is found, false is returned. +func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) { + currentNode := locations + lastLocation := locations.location + exists := locations.exists + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + break + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + break + } + + currentNode = currentNode.indexes[index] + } + + if currentNode.exists { + lastLocation = currentNode.location + exists = true + } + } + + return lastLocation, exists +} diff --git a/bundle/config/mutator/python/python_locations_test.go b/bundle/config/mutator/python/python_locations_test.go new file mode 100644 index 000000000..2860af820 --- /dev/null +++ b/bundle/config/mutator/python/python_locations_test.go @@ -0,0 +1,195 @@ +package python + +import ( + "bytes" + "path/filepath" + "testing" + + "github.com/databricks/cli/libs/diag" + "github.com/stretchr/testify/require" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestMergeLocations(t *testing.T) { + pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1} + generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1} + yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation) + + input := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + "baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}), + "qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}), + }, + []dyn.Location{}, + ), + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + expected := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + // pythonLocation is appended to the beginning of the list if absent + "baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + // generatedLocation is replaced by pythonLocation + "qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + }, + []dyn.Location{pythonLocation}, + ), + // if location is unknown, we keep it as-is + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + actual, err := mergePythonLocations(input, locations) + + assert.NoError(t, err) + assert.Equal(t, expected, actual) +} + +func TestFindLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_indexPathComponent(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]")) + + assert.True(t, exists) + assert.Equal(t, location2, actual) +} + +func TestFindLocation_closestAncestorLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_unknownLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + _, exists := findPythonLocation(locations, dyn.MustPathFromString("bar")) + + assert.False(t, exists) +} + +func TestLoadOutput(t *testing.T) { + location := dyn.Location{File: "my_job.py", Line: 1, Column: 1} + bundleRoot := t.TempDir() + output := `{ + "resources": { + "jobs": { + "my_job": { + "name": "my_job", + "tasks": [ + { + "task_key": "my_task", + "notebook_task": { + "notebook_path": "my_notebook" + } + } + ] + } + } + } + }` + + locations := newPythonLocations() + putPythonLocation( + locations, + dyn.MustPathFromString("resources.jobs.my_job"), + location, + ) + + value, diags := loadOutput( + bundleRoot, + bytes.NewReader([]byte(output)), + locations, + ) + + assert.Equal(t, diag.Diagnostics{}, diags) + + name, err := dyn.Get(value, "resources.jobs.my_job.name") + require.NoError(t, err) + require.Equal(t, []dyn.Location{location}, name.Locations()) + + // until we implement path normalization, we have to keep locations of values + // that change semantic depending on their location + // + // note: it's important to have absolute path including 'bundleRoot' + // because mutator pipeline already has expanded locations into absolute path + notebookPath, err := dyn.Get(value, "resources.jobs.my_job.tasks[0].notebook_task.notebook_path") + require.NoError(t, err) + require.Len(t, notebookPath.Locations(), 1) + require.Equal(t, filepath.Join(bundleRoot, generatedFileName), notebookPath.Locations()[0].File) +} + +func TestParsePythonLocations_absolutePath(t *testing.T) { + // output can contain absolute path that is outside of the bundle root + expected := dyn.Location{File: "/Shared/foo.py", Line: 1, Column: 2} + + input := `{"path": "foo", "file": "/Shared/foo.py", "line": 1, "column": 2}` + reader := bytes.NewReader([]byte(input)) + locations, err := parsePythonLocations("/tmp/", reader) + + assert.NoError(t, err) + + assert.True(t, locations.keys["foo"].exists) + assert.Equal(t, expected, locations.keys["foo"].location) +} + +func TestParsePythonLocations_relativePath(t *testing.T) { + // output can contain relative paths, we expect all locations to be absolute + // at this stage of mutator pipeline + expected := dyn.Location{File: filepath.Clean("/tmp/my_project/foo.py"), Line: 1, Column: 2} + + input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}` + reader := bytes.NewReader([]byte(input)) + locations, err := parsePythonLocations(filepath.Clean("/tmp/my_project"), reader) + + assert.NoError(t, err) + + assert.True(t, locations.keys["foo"].exists) + assert.Equal(t, expected, locations.keys["foo"].location) +} diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index 8009ab243..f75f111cf 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -7,11 +7,14 @@ import ( "errors" "fmt" "io" + "io/fs" "os" "path/filepath" "reflect" "strings" + "github.com/databricks/cli/bundle/config/mutator/paths" + "github.com/databricks/databricks-sdk-go/logger" "github.com/fatih/color" @@ -124,6 +127,15 @@ type opts struct { enabled bool venvPath string + + loadLocations bool +} + +type runPythonMutatorOpts struct { + cacheDir string + bundleRootPath string + pythonPath string + loadLocations bool } // getOpts adapts deprecated PyDABs and upcoming Python configuration @@ -148,8 +160,9 @@ func getOpts(b *bundle.Bundle, phase phase) (opts, error) { // don't execute for phases for 'python' section if phase == PythonMutatorPhaseInit || phase == PythonMutatorPhaseLoad { return opts{ - enabled: true, - venvPath: experimental.PyDABs.VEnvPath, + enabled: true, + venvPath: experimental.PyDABs.VEnvPath, + loadLocations: false, // not supported in PyDABs }, nil } else { return opts{}, nil @@ -158,8 +171,9 @@ func getOpts(b *bundle.Bundle, phase phase) (opts, error) { // don't execute for phases for 'pydabs' section if phase == PythonMutatorPhaseLoadResources || phase == PythonMutatorPhaseApplyMutators { return opts{ - enabled: true, - venvPath: experimental.Python.VEnvPath, + enabled: true, + venvPath: experimental.Python.VEnvPath, + loadLocations: true, }, nil } else { return opts{}, nil @@ -194,7 +208,12 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err) } - rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.BundleRootPath, pythonPath, leftRoot) + rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{ + cacheDir: cacheDir, + bundleRootPath: b.BundleRootPath, + pythonPath: pythonPath, + loadLocations: opts.loadLocations, + }) mutateDiags = diags if diags.HasError() { return dyn.InvalidValue, mutateDiagsHasError @@ -238,13 +257,14 @@ func createCacheDir(ctx context.Context) (string, error) { return os.MkdirTemp("", "-python") } -func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) { - inputPath := filepath.Join(cacheDir, "input.json") - outputPath := filepath.Join(cacheDir, "output.json") - diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") +func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) { + inputPath := filepath.Join(opts.cacheDir, "input.json") + outputPath := filepath.Join(opts.cacheDir, "output.json") + diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json") + locationsPath := filepath.Join(opts.cacheDir, "locations.json") args := []string{ - pythonPath, + opts.pythonPath, "-m", "databricks.bundles.build", "--phase", @@ -257,6 +277,10 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath diagnosticsPath, } + if opts.loadLocations { + args = append(args, "--locations", locationsPath) + } + if err := writeInputFile(inputPath, root); err != nil { return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) } @@ -271,7 +295,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath _, processErr := process.Background( ctx, args, - process.WithDir(rootPath), + process.WithDir(opts.bundleRootPath), process.WithStderrWriter(stderrWriter), process.WithStdoutWriter(stdoutWriter), ) @@ -307,7 +331,12 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) } - output, outputDiags := loadOutputFile(rootPath, outputPath) + locations, err := loadLocationsFile(opts.bundleRootPath, locationsPath) + if err != nil { + return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err) + } + + output, outputDiags := loadOutputFile(opts.bundleRootPath, outputPath, locations) pythonDiagnostics = pythonDiagnostics.Extend(outputDiags) // we pass through pythonDiagnostic because it contains warnings @@ -351,7 +380,21 @@ func writeInputFile(inputPath string, input dyn.Value) error { return os.WriteFile(inputPath, rootConfigJson, 0o600) } -func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) { +// loadLocationsFile loads locations.json containing source locations for generated YAML. +func loadLocationsFile(bundleRoot, locationsPath string) (*pythonLocations, error) { + locationsFile, err := os.Open(locationsPath) + if errors.Is(err, fs.ErrNotExist) { + return newPythonLocations(), nil + } else if err != nil { + return nil, fmt.Errorf("failed to open locations file: %w", err) + } + + defer locationsFile.Close() + + return parsePythonLocations(bundleRoot, locationsFile) +} + +func loadOutputFile(rootPath, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) { outputFile, err := os.Open(outputPath) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err)) @@ -359,15 +402,19 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) { defer outputFile.Close() + return loadOutput(rootPath, outputFile, locations) +} + +func loadOutput(rootPath string, outputFile io.Reader, locations *pythonLocations) (dyn.Value, diag.Diagnostics) { // we need absolute path because later parts of pipeline assume all paths are absolute // and this file will be used as location to resolve relative paths. // - // virtualPath has to stay in rootPath, because locations outside root path are not allowed: + // virtualPath has to stay in bundleRootPath, because locations outside root path are not allowed: // // Error: path /var/folders/.../python/dist/*.whl is not contained in bundle root path // // for that, we pass virtualPath instead of outputPath as file location - virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_python__.yml")) + virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName)) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err)) } @@ -377,7 +424,29 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err)) } - return strictNormalize(config.Root{}, generated) + // paths are resolved relative to locations of their values, if we change location + // we have to update each path, until we simplify that, we don't update locations + // for such values, so we don't change how paths are resolved + // + // we can remove this once we: + // - add variable interpolation before and after PythonMutator + // - implement path normalization (aka path normal form) + _, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) { + putPythonLocation(locations, p, v.Location()) + return v, nil + }) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + // generated has dyn.Location as if it comes from generated YAML file + // earlier we loaded locations.json with source locations in Python code + generatedWithLocations, err := mergePythonLocations(generated, locations) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + return strictNormalize(config.Root{}, generatedWithLocations) } func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) { diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index d51572c8a..9d957e797 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -7,7 +7,6 @@ import ( "os" "os/exec" "path/filepath" - "reflect" "runtime" "testing" @@ -55,6 +54,8 @@ func TestPythonMutator_Name_applyMutators(t *testing.T) { func TestPythonMutator_loadResources(t *testing.T) { withFakeVEnv(t, ".venv") + rootPath := filepath.Join(t.TempDir(), "my_project") + b := loadYaml("databricks.yml", ` experimental: python: @@ -65,6 +66,9 @@ func TestPythonMutator_loadResources(t *testing.T) { job0: name: job_0`) + // set rootPath so that we can make absolute paths in dyn.Location + b.BundleRootPath = rootPath + ctx := withProcessStub( t, []string{ @@ -93,6 +97,8 @@ func TestPythonMutator_loadResources(t *testing.T) { } }`, `{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`, + `{"path": "resources.jobs.job0", "file": "src/examples/job0.py", "line": 3, "column": 5} + {"path": "resources.jobs.job1", "file": "src/examples/job1.py", "line": 5, "column": 7}`, ) mutator := PythonMutator(PythonMutatorPhaseLoadResources) @@ -110,6 +116,25 @@ func TestPythonMutator_loadResources(t *testing.T) { assert.Equal(t, "job_1", job1.Name) } + // output of locations.json should be applied to underlying dyn.Value + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + name1, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job1.name")) + if err != nil { + return dyn.InvalidValue, err + } + + assert.Equal(t, []dyn.Location{ + { + File: filepath.Join(rootPath, "src/examples/job1.py"), + Line: 5, + Column: 7, + }, + }, name1.Locations()) + + return v, nil + }) + assert.NoError(t, err) + assert.Equal(t, 1, len(diags)) assert.Equal(t, "job doesn't have any tasks", diags[0].Summary) assert.Equal(t, []dyn.Location{ @@ -157,7 +182,7 @@ func TestPythonMutator_loadResources_disallowed(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseLoadResources) diag := bundle.Apply(ctx, b, mutator) @@ -202,7 +227,7 @@ func TestPythonMutator_applyMutators(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseApplyMutators) diag := bundle.Apply(ctx, b, mutator) @@ -224,7 +249,7 @@ func TestPythonMutator_applyMutators(t *testing.T) { description, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job0.description")) require.NoError(t, err) - expectedVirtualPath, err := filepath.Abs("__generated_by_python__.yml") + expectedVirtualPath, err := filepath.Abs(generatedFileName) require.NoError(t, err) assert.Equal(t, expectedVirtualPath, description.Location().File) @@ -263,7 +288,7 @@ func TestPythonMutator_badOutput(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseLoadResources) diag := bundle.Apply(ctx, b, mutator) @@ -312,7 +337,7 @@ func TestGetOps_Python(t *testing.T) { }, PythonMutatorPhaseLoadResources) assert.NoError(t, err) - assert.Equal(t, opts{venvPath: ".venv", enabled: true}, actual) + assert.Equal(t, opts{venvPath: ".venv", enabled: true, loadLocations: true}, actual) } func TestGetOps_PyDABs(t *testing.T) { @@ -328,7 +353,7 @@ func TestGetOps_PyDABs(t *testing.T) { }, PythonMutatorPhaseInit) assert.NoError(t, err) - assert.Equal(t, opts{venvPath: ".venv", enabled: true}, actual) + assert.Equal(t, opts{venvPath: ".venv", enabled: true, loadLocations: false}, actual) } func TestGetOps_empty(t *testing.T) { @@ -661,7 +686,7 @@ or activate the environment before running CLI commands: assert.Equal(t, expected, out) } -func withProcessStub(t *testing.T, args []string, output, diagnostics string) context.Context { +func withProcessStub(t *testing.T, args []string, output, diagnostics, locations string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) @@ -673,32 +698,51 @@ func withProcessStub(t *testing.T, args []string, output, diagnostics string) co inputPath := filepath.Join(cacheDir, "input.json") outputPath := filepath.Join(cacheDir, "output.json") + locationsPath := filepath.Join(cacheDir, "locations.json") diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") - args = append(args, "--input", inputPath) - args = append(args, "--output", outputPath) - args = append(args, "--diagnostics", diagnosticsPath) - stub.WithCallback(func(actual *exec.Cmd) error { _, err := os.Stat(inputPath) assert.NoError(t, err) - if reflect.DeepEqual(actual.Args, args) { - err := os.WriteFile(outputPath, []byte(output), 0o600) - require.NoError(t, err) + actualInputPath := getArg(actual.Args, "--input") + actualOutputPath := getArg(actual.Args, "--output") + actualDiagnosticsPath := getArg(actual.Args, "--diagnostics") + actualLocationsPath := getArg(actual.Args, "--locations") - err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600) - require.NoError(t, err) + require.Equal(t, inputPath, actualInputPath) + require.Equal(t, outputPath, actualOutputPath) + require.Equal(t, diagnosticsPath, actualDiagnosticsPath) - return nil - } else { - return fmt.Errorf("unexpected command: %v", actual.Args) + // locations is an optional argument + if locations != "" { + require.Equal(t, locationsPath, actualLocationsPath) + + err = os.WriteFile(locationsPath, []byte(locations), 0o600) + require.NoError(t, err) } + + err = os.WriteFile(outputPath, []byte(output), 0o600) + require.NoError(t, err) + + err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600) + require.NoError(t, err) + + return nil }) return ctx } +func getArg(args []string, name string) string { + for i := range args { + if args[i] == name { + return args[i+1] + } + } + return "" +} + func loadYaml(name, content string) *bundle.Bundle { v, diag := config.LoadFromBytes(name, []byte(content)) diff --git a/bundle/config/mutator/resolve_resource_references_test.go b/bundle/config/mutator/resolve_resource_references_test.go index 624e337c7..6bd974199 100644 --- a/bundle/config/mutator/resolve_resource_references_test.go +++ b/bundle/config/mutator/resolve_resource_references_test.go @@ -176,7 +176,7 @@ func TestResolveVariableReferencesInVariableLookups(t *testing.T) { {ClusterId: "9876-5432-xywz", ClusterName: "some other cluster"}, }, nil) - diags := bundle.Apply(context.Background(), b, bundle.Seq(ResolveVariableReferencesInLookup(), ResolveResourceReferences())) + diags := bundle.ApplySeq(context.Background(), b, ResolveVariableReferencesInLookup(), ResolveResourceReferences()) require.NoError(t, diags.Error()) require.Equal(t, "cluster-bar-dev", b.Config.Variables["lookup"].Lookup.Cluster) require.Equal(t, "1234-5678-abcd", b.Config.Variables["lookup"].Value) @@ -203,7 +203,7 @@ func TestResolveLookupVariableReferencesInVariableLookups(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) - diags := bundle.Apply(context.Background(), b, bundle.Seq(ResolveVariableReferencesInLookup(), ResolveResourceReferences())) + diags := bundle.ApplySeq(context.Background(), b, ResolveVariableReferencesInLookup(), ResolveResourceReferences()) require.ErrorContains(t, diags.Error(), "lookup variables cannot contain references to another lookup variables") } @@ -229,7 +229,7 @@ func TestNoResolveLookupIfVariableSetWithEnvVariable(t *testing.T) { ctx := context.Background() ctx = env.Set(ctx, "BUNDLE_VAR_lookup", "1234-5678-abcd") - diags := bundle.Apply(ctx, b, bundle.Seq(SetVariables(), ResolveVariableReferencesInLookup(), ResolveResourceReferences())) + diags := bundle.ApplySeq(ctx, b, SetVariables(), ResolveVariableReferencesInLookup(), ResolveResourceReferences()) require.NoError(t, diags.Error()) require.Equal(t, "1234-5678-abcd", b.Config.Variables["lookup"].Value) } diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 7ad3dfd8d..9aa93791f 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -3,6 +3,7 @@ package mutator import ( "context" "errors" + "fmt" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" @@ -13,15 +14,37 @@ import ( "github.com/databricks/cli/libs/dyn/dynvar" ) +/* +For pathological cases, output and time grow exponentially. + +On my laptop, timings for acceptance/bundle/variables/complex-cycle: +rounds time + + 9 0.10s + 10 0.13s + 11 0.27s + 12 0.68s + 13 1.98s + 14 6.28s + 15 21.70s + 16 78.16s +*/ +const maxResolutionRounds = 11 + type resolveVariableReferences struct { - prefixes []string - pattern dyn.Pattern - lookupFn func(dyn.Value, dyn.Path, *bundle.Bundle) (dyn.Value, error) - skipFn func(dyn.Value) bool + prefixes []string + pattern dyn.Pattern + lookupFn func(dyn.Value, dyn.Path, *bundle.Bundle) (dyn.Value, error) + skipFn func(dyn.Value) bool + extraRounds int } func ResolveVariableReferences(prefixes ...string) bundle.Mutator { - return &resolveVariableReferences{prefixes: prefixes, lookupFn: lookup} + return &resolveVariableReferences{ + prefixes: prefixes, + lookupFn: lookup, + extraRounds: maxResolutionRounds - 1, + } } func ResolveVariableReferencesInLookup() bundle.Mutator { @@ -32,19 +55,6 @@ func ResolveVariableReferencesInLookup() bundle.Mutator { }, pattern: dyn.NewPattern(dyn.Key("variables"), dyn.AnyKey(), dyn.Key("lookup")), lookupFn: lookupForVariables} } -func ResolveVariableReferencesInComplexVariables() bundle.Mutator { - return &resolveVariableReferences{ - prefixes: []string{ - "bundle", - "workspace", - "variables", - }, - pattern: dyn.NewPattern(dyn.Key("variables"), dyn.AnyKey(), dyn.Key("value")), - lookupFn: lookupForComplexVariables, - skipFn: skipResolvingInNonComplexVariables, - } -} - func lookup(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { if path.String() == "workspace.file_path" { @@ -57,38 +67,6 @@ func lookup(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { return dyn.GetByPath(v, path) } -func lookupForComplexVariables(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { - if path[0].Key() != "variables" { - return lookup(v, path, b) - } - - varV, err := dyn.GetByPath(v, path[:len(path)-1]) - if err != nil { - return dyn.InvalidValue, err - } - - var vv variable.Variable - err = convert.ToTyped(&vv, varV) - if err != nil { - return dyn.InvalidValue, err - } - - if vv.Type == variable.VariableTypeComplex { - return dyn.InvalidValue, errors.New("complex variables cannot contain references to another complex variables") - } - - return lookup(v, path, b) -} - -func skipResolvingInNonComplexVariables(v dyn.Value) bool { - switch v.Kind() { - case dyn.KindMap, dyn.KindSequence: - return false - default: - return true - } -} - func lookupForVariables(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { if path[0].Key() != "variables" { return lookup(v, path, b) @@ -131,7 +109,36 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) varPath := dyn.NewPath(dyn.Key("var")) var diags diag.Diagnostics + maxRounds := 1 + m.extraRounds + for round := range maxRounds { + hasUpdates, newDiags := m.resolveOnce(b, prefixes, varPath) + + diags = diags.Extend(newDiags) + + if diags.HasError() { + break + } + + if !hasUpdates { + break + } + + if round >= maxRounds-1 { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: fmt.Sprintf("Detected unresolved variables after %d resolution rounds", round+1), + // Would be nice to include names of the variables there, but that would complicate things more + }) + break + } + } + return diags +} + +func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn.Path, varPath dyn.Path) (bool, diag.Diagnostics) { + var diags diag.Diagnostics + hasUpdates := false err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { // Synthesize a copy of the root that has all fields that are present in the type // but not set in the dynamic value set to their corresponding empty value. @@ -174,6 +181,7 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) if m.skipFn != nil && m.skipFn(v) { return dyn.InvalidValue, dynvar.ErrSkipResolution } + hasUpdates = true return m.lookupFn(normalized, path, b) } } @@ -194,5 +202,6 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) if err != nil { diags = diags.Extend(diag.FromErr(err)) } - return diags + + return hasUpdates, diags } diff --git a/bundle/config/mutator/resolve_variable_references_test.go b/bundle/config/mutator/resolve_variable_references_test.go index 18bb022aa..30969dc49 100644 --- a/bundle/config/mutator/resolve_variable_references_test.go +++ b/bundle/config/mutator/resolve_variable_references_test.go @@ -7,321 +7,10 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/bundle/config/variable" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func TestResolveVariableReferencesToBundleVariables(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Workspace: config.Workspace{ - RootPath: "${bundle.name}/${var.foo}", - }, - Variables: map[string]*variable.Variable{ - "foo": { - Value: "bar", - }, - }, - }, - } - - // Apply with a valid prefix. This should change the workspace root path. - diags := bundle.Apply(context.Background(), b, ResolveVariableReferences("bundle", "variables")) - require.NoError(t, diags.Error()) - require.Equal(t, "example/bar", b.Config.Workspace.RootPath) -} - -func TestResolveVariableReferencesForPrimitiveNonStringFields(t *testing.T) { - var diags diag.Diagnostics - - b := &bundle.Bundle{ - Config: config.Root{ - Variables: map[string]*variable.Variable{ - "no_alert_for_canceled_runs": {}, - "no_alert_for_skipped_runs": {}, - "min_workers": {}, - "max_workers": {}, - "spot_bid_max_price": {}, - }, - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - NotificationSettings: &jobs.JobNotificationSettings{ - NoAlertForCanceledRuns: false, - NoAlertForSkippedRuns: false, - }, - Tasks: []jobs.Task{ - { - NewCluster: &compute.ClusterSpec{ - Autoscale: &compute.AutoScale{ - MinWorkers: 0, - MaxWorkers: 0, - }, - AzureAttributes: &compute.AzureAttributes{ - SpotBidMaxPrice: 0.0, - }, - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Initialize the variables. - diags = bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.InitializeVariables([]string{ - "no_alert_for_canceled_runs=true", - "no_alert_for_skipped_runs=true", - "min_workers=1", - "max_workers=2", - "spot_bid_max_price=0.5", - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - // Assign the variables to the dynamic configuration. - diags = bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - // Set the notification settings. - p = dyn.MustPathFromString("resources.jobs.job1.notification_settings") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("no_alert_for_canceled_runs")), dyn.V("${var.no_alert_for_canceled_runs}")) - require.NoError(t, err) - v, err = dyn.SetByPath(v, p.Append(dyn.Key("no_alert_for_skipped_runs")), dyn.V("${var.no_alert_for_skipped_runs}")) - require.NoError(t, err) - - // Set the min and max workers. - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0].new_cluster.autoscale") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("min_workers")), dyn.V("${var.min_workers}")) - require.NoError(t, err) - v, err = dyn.SetByPath(v, p.Append(dyn.Key("max_workers")), dyn.V("${var.max_workers}")) - require.NoError(t, err) - - // Set the spot bid max price. - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0].new_cluster.azure_attributes") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("spot_bid_max_price")), dyn.V("${var.spot_bid_max_price}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - // Apply for the variable prefix. This should resolve the variables to their values. - diags = bundle.Apply(context.Background(), b, ResolveVariableReferences("variables")) - require.NoError(t, diags.Error()) - assert.True(t, b.Config.Resources.Jobs["job1"].JobSettings.NotificationSettings.NoAlertForCanceledRuns) - assert.True(t, b.Config.Resources.Jobs["job1"].JobSettings.NotificationSettings.NoAlertForSkippedRuns) - assert.Equal(t, 1, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.Autoscale.MinWorkers) - assert.Equal(t, 2, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.Autoscale.MaxWorkers) - assert.InDelta(t, 0.5, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.AzureAttributes.SpotBidMaxPrice, 0.0001) -} - -func TestResolveComplexVariable(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "cluster": { - Value: map[string]any{ - "node_type_id": "Standard_DS3_v2", - "num_workers": 2, - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - JobClusters: []jobs.JobCluster{ - { - NewCluster: compute.ClusterSpec{ - NodeTypeId: "random", - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.job_clusters[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("new_cluster")), dyn.V("${var.cluster}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, ResolveVariableReferences("bundle", "workspace", "variables")) - require.NoError(t, diags.Error()) - require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["job1"].JobSettings.JobClusters[0].NewCluster.NodeTypeId) - require.Equal(t, 2, b.Config.Resources.Jobs["job1"].JobSettings.JobClusters[0].NewCluster.NumWorkers) -} - -func TestResolveComplexVariableReferencesWithComplexVariablesError(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "cluster": { - Value: map[string]any{ - "node_type_id": "Standard_DS3_v2", - "num_workers": 2, - "spark_conf": "${var.spark_conf}", - }, - Type: variable.VariableTypeComplex, - }, - "spark_conf": { - Value: map[string]any{ - "spark.executor.memory": "4g", - "spark.executor.cores": "2", - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - JobClusters: []jobs.JobCluster{ - { - NewCluster: compute.ClusterSpec{ - NodeTypeId: "random", - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.job_clusters[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("new_cluster")), dyn.V("${var.cluster}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, bundle.Seq(ResolveVariableReferencesInComplexVariables(), ResolveVariableReferences("bundle", "workspace", "variables"))) - require.ErrorContains(t, diags.Error(), "complex variables cannot contain references to another complex variables") -} - -func TestResolveComplexVariableWithVarReference(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "package_version": { - Value: "1.0.0", - }, - "cluster_libraries": { - Value: [](map[string]any){ - { - "pypi": map[string]string{ - "package": "cicd_template==${var.package_version}", - }, - }, - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - Tasks: []jobs.Task{ - { - Libraries: []compute.Library{}, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("libraries")), dyn.V("${var.cluster_libraries}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, bundle.Seq( - ResolveVariableReferencesInComplexVariables(), - ResolveVariableReferences("bundle", "workspace", "variables"), - )) - require.NoError(t, diags.Error()) - require.Equal(t, "cicd_template==1.0.0", b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].Libraries[0].Pypi.Package) -} - func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { testCases := []struct { enabled bool @@ -331,7 +20,7 @@ func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { true, func(t *testing.T, b *bundle.Bundle) { // Variables that use workspace file path should have SyncRootValue during resolution phase - require.Equal(t, "sync/root/path", b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Configuration["source"]) + require.Equal(t, "sync/root/path", b.Config.Resources.Pipelines["pipeline1"].CreatePipeline.Configuration["source"]) // The file path itself should remain the same require.Equal(t, "file/path", b.Config.Workspace.FilePath) @@ -340,7 +29,7 @@ func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { { false, func(t *testing.T, b *bundle.Bundle) { - require.Equal(t, "file/path", b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Configuration["source"]) + require.Equal(t, "file/path", b.Config.Resources.Pipelines["pipeline1"].CreatePipeline.Configuration["source"]) require.Equal(t, "file/path", b.Config.Workspace.FilePath) }, }, @@ -359,7 +48,7 @@ func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "pipeline1": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Configuration: map[string]string{ "source": "${workspace.file_path}", }, diff --git a/bundle/config/mutator/set_variables.go b/bundle/config/mutator/set_variables.go index 9e9f2dcfe..ac2f660a9 100644 --- a/bundle/config/mutator/set_variables.go +++ b/bundle/config/mutator/set_variables.go @@ -3,11 +3,14 @@ package mutator import ( "context" "fmt" + "os" + "path/filepath" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/variable" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/jsonloader" "github.com/databricks/cli/libs/env" ) @@ -23,7 +26,11 @@ func (m *setVariables) Name() string { return "SetVariables" } -func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, name string) (dyn.Value, error) { +func getDefaultVariableFilePath(target string) string { + return ".databricks/bundle/" + target + "/variable-overrides.json" +} + +func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, name string, fileDefault dyn.Value) (dyn.Value, error) { // case: variable already has value initialized, so skip if variable.HasValue() { return v, nil @@ -49,6 +56,26 @@ func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, return v, nil } + // case: Set the variable to the default value from the variable file + if fileDefault.Kind() != dyn.KindInvalid && fileDefault.Kind() != dyn.KindNil { + hasComplexType := variable.IsComplex() + hasComplexValue := fileDefault.Kind() == dyn.KindMap || fileDefault.Kind() == dyn.KindSequence + + if hasComplexType && !hasComplexValue { + return dyn.InvalidValue, fmt.Errorf(`variable %s is of type complex, but the value in the variable file is not a complex type`, name) + } + if !hasComplexType && hasComplexValue { + return dyn.InvalidValue, fmt.Errorf(`variable %s is not of type complex, but the value in the variable file is a complex type`, name) + } + + v, err := dyn.Set(v, "value", fileDefault) + if err != nil { + return dyn.InvalidValue, fmt.Errorf(`failed to assign default value from variable file to variable %s with error: %v`, name, err) + } + + return v, nil + } + // case: Set the variable to its default value if variable.HasDefault() { vDefault, err := dyn.Get(v, "default") @@ -64,10 +91,43 @@ func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, } // We should have had a value to set for the variable at this point. - return dyn.InvalidValue, fmt.Errorf(`no value assigned to required variable %s. Assignment can be done through the "--var" flag or by setting the %s environment variable`, name, bundleVarPrefix+name) + return dyn.InvalidValue, fmt.Errorf(`no value assigned to required variable %s. Assignment can be done using "--var", by setting the %s environment variable, or in %s file`, name, bundleVarPrefix+name, getDefaultVariableFilePath("")) +} + +func readVariablesFromFile(b *bundle.Bundle) (dyn.Value, diag.Diagnostics) { + var diags diag.Diagnostics + + filePath := filepath.Join(b.BundleRootPath, getDefaultVariableFilePath(b.Config.Bundle.Target)) + if _, err := os.Stat(filePath); err != nil { + return dyn.InvalidValue, nil + } + + f, err := os.ReadFile(filePath) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to read variables file: %w", err)) + } + + val, err := jsonloader.LoadJSON(f, filePath) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse variables file %s: %w", filePath, err)) + } + + if val.Kind() != dyn.KindMap { + return dyn.InvalidValue, diags.Append(diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("failed to parse variables file %s: invalid format", filePath), + Detail: "Variables file must be a JSON object with the following format:\n{\"var1\": \"value1\", \"var2\": \"value2\"}", + }) + } + + return val, nil } func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + defaults, diags := readVariablesFromFile(b) + if diags.HasError() { + return diags + } err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { return dyn.Map(v, "variables", dyn.Foreach(func(p dyn.Path, variable dyn.Value) (dyn.Value, error) { name := p[1].Key() @@ -76,9 +136,10 @@ func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos return dyn.InvalidValue, fmt.Errorf(`variable "%s" is not defined`, name) } - return setVariable(ctx, variable, v, name) + fileDefault, _ := dyn.Get(defaults, name) + return setVariable(ctx, variable, v, name, fileDefault) })) }) - return diag.FromErr(err) + return diags.Extend(diag.FromErr(err)) } diff --git a/bundle/config/mutator/set_variables_test.go b/bundle/config/mutator/set_variables_test.go index 07a5c8214..d904d5be3 100644 --- a/bundle/config/mutator/set_variables_test.go +++ b/bundle/config/mutator/set_variables_test.go @@ -25,7 +25,7 @@ func TestSetVariableFromProcessEnvVar(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) @@ -43,7 +43,7 @@ func TestSetVariableUsingDefaultValue(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) @@ -65,7 +65,7 @@ func TestSetVariableWhenAlreadyAValueIsAssigned(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) @@ -90,7 +90,7 @@ func TestSetVariableEnvVarValueDoesNotOverridePresetValue(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) @@ -107,8 +107,8 @@ func TestSetVariablesErrorsIfAValueCouldNotBeResolved(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - _, err = setVariable(context.Background(), v, &variable, "foo") - assert.ErrorContains(t, err, "no value assigned to required variable foo. Assignment can be done through the \"--var\" flag or by setting the BUNDLE_VAR_foo environment variable") + _, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) + assert.ErrorContains(t, err, "no value assigned to required variable foo. Assignment can be done using \"--var\", by setting the BUNDLE_VAR_foo environment variable, or in .databricks/bundle//variable-overrides.json file") } func TestSetVariablesMutator(t *testing.T) { @@ -157,6 +157,6 @@ func TestSetComplexVariablesViaEnvVariablesIsNotAllowed(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - _, err = setVariable(context.Background(), v, &variable, "foo") + _, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) assert.ErrorContains(t, err, "setting via environment variables (BUNDLE_VAR_foo) is not supported for complex variable foo") } diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 1915cf36e..1eda578fa 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -6,6 +6,7 @@ import ( "fmt" "io/fs" "net/url" + "os" "path" "path/filepath" "strings" @@ -17,6 +18,47 @@ import ( "github.com/databricks/cli/libs/notebook" ) +// TranslateMode specifies how a path should be translated. +type TranslateMode int + +const ( + // TranslateModeNotebook translates a path to a remote notebook. + TranslateModeNotebook TranslateMode = iota + + // TranslateModeFile translates a path to a remote regular file. + TranslateModeFile + + // TranslateModeDirectory translates a path to a remote directory. + TranslateModeDirectory + + // TranslateModeLocalAbsoluteFile translates a path to the local absolute file path. + // It returns an error if the path does not exist or is a directory. + TranslateModeLocalAbsoluteFile + + // TranslateModeLocalAbsoluteDirectory translates a path to the local absolute directory path. + // It returns an error if the path does not exist or is not a directory. + TranslateModeLocalAbsoluteDirectory + + // TranslateModeLocalRelative translates a path to be relative to the bundle sync root path. + // It does not check if the path exists, nor care if it is a file or directory. + TranslateModeLocalRelative + + // TranslateModeLocalRelativeWithPrefix translates a path to be relative to the bundle sync root path. + // It a "./" prefix to the path if it does not already have one. + // This allows for disambiguating between paths and PyPI package names. + TranslateModeLocalRelativeWithPrefix +) + +// translateOptions control path translation behavior. +type translateOptions struct { + // Mode specifies how the path should be translated. + Mode TranslateMode + + // AllowPathOutsideSyncRoot can be set for paths that are not tied to the sync root path. + // This is the case for artifact paths, for example. + AllowPathOutsideSyncRoot bool +} + type ErrIsNotebook struct { path string } @@ -44,8 +86,6 @@ func (m *translatePaths) Name() string { return "TranslatePaths" } -type rewriteFunc func(literal, localFullPath, localRelPath, remotePath string) (string, error) - // translateContext is a context for rewriting paths in a config. // It is freshly instantiated on every mutator apply call. // It provides access to the underlying bundle object such that @@ -56,77 +96,97 @@ type translateContext struct { // seen is a map of local paths to their corresponding remote paths. // If a local path has already been successfully resolved, we do not need to resolve it again. seen map[string]string + + // remoteRoot is the root path of the remote workspace. + // It is equal to ${workspace.file_path} for regular deployments. + // It points to the source root path for source-linked deployments. + remoteRoot string } // rewritePath converts a given relative path from the loaded config to a new path based on the passed rewriting function // // It takes these arguments: -// - The argument `dir` is the directory relative to which the given relative path is. -// - The given relative path is both passed and written back through `*p`. -// - The argument `fn` is a function that performs the actual rewriting logic. -// This logic is different between regular files or notebooks. +// - The context in which the function is called. +// - The argument `dir` is the directory relative to which the relative path should be interpreted. +// - The argument `input` is the relative path to rewrite. +// - The argument `opts` is a struct that specifies how the path should be rewritten. +// It contains a `Mode` field that specifies how the path should be rewritten. // -// The function returns an error if it is impossible to rewrite the given relative path. +// The function returns the rewritten path if successful, or an error if the path could not be rewritten. +// The returned path is an empty string if the path was not rewritten. func (t *translateContext) rewritePath( + ctx context.Context, dir string, - p *string, - fn rewriteFunc, -) error { + input string, + opts translateOptions, +) (string, error) { // We assume absolute paths point to a location in the workspace - if path.IsAbs(*p) { - return nil + if path.IsAbs(input) { + return "", nil } - url, err := url.Parse(*p) + url, err := url.Parse(input) if err != nil { - return err + return "", err } // If the file path has scheme, it's a full path and we don't need to transform it if url.Scheme != "" { - return nil + return "", nil } // Local path is relative to the directory the resource was defined in. - localPath := filepath.Join(dir, filepath.FromSlash(*p)) + localPath := filepath.Join(dir, input) if interp, ok := t.seen[localPath]; ok { - *p = interp - return nil + return interp, nil } // Local path must be contained in the sync root. // If it isn't, it won't be synchronized into the workspace. localRelPath, err := filepath.Rel(t.b.SyncRootPath, localPath) if err != nil { - return err + return "", err } - if strings.HasPrefix(localRelPath, "..") { - return fmt.Errorf("path %s is not contained in sync root path", localPath) + if !opts.AllowPathOutsideSyncRoot && !filepath.IsLocal(localRelPath) { + return "", fmt.Errorf("path %s is not contained in sync root path", localPath) } - var workspacePath string - if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { - workspacePath = t.b.SyncRootPath - } else { - workspacePath = t.b.Config.Workspace.FilePath - } - remotePath := path.Join(workspacePath, filepath.ToSlash(localRelPath)) + // Normalize paths to separated by forward slashes. + localPath = filepath.ToSlash(localPath) + localRelPath = filepath.ToSlash(localRelPath) // Convert local path into workspace path via specified function. - interp, err := fn(*p, localPath, localRelPath, remotePath) + var interp string + switch opts.Mode { + case TranslateModeNotebook: + interp, err = t.translateNotebookPath(ctx, input, localPath, localRelPath) + case TranslateModeFile: + interp, err = t.translateFilePath(ctx, input, localPath, localRelPath) + case TranslateModeDirectory: + interp, err = t.translateDirectoryPath(ctx, input, localPath, localRelPath) + case TranslateModeLocalAbsoluteFile: + interp, err = t.translateLocalAbsoluteFilePath(ctx, input, localPath, localRelPath) + case TranslateModeLocalAbsoluteDirectory: + interp, err = t.translateLocalAbsoluteDirectoryPath(ctx, input, localPath, localRelPath) + case TranslateModeLocalRelative: + interp, err = t.translateLocalRelativePath(ctx, input, localPath, localRelPath) + case TranslateModeLocalRelativeWithPrefix: + interp, err = t.translateLocalRelativeWithPrefixPath(ctx, input, localPath, localRelPath) + default: + return "", fmt.Errorf("unsupported translate mode: %d", opts.Mode) + } if err != nil { - return err + return "", err } - *p = interp t.seen[localPath] = interp - return nil + return interp, nil } -func (t *translateContext) translateNotebookPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) +func (t *translateContext) translateNotebookPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, localRelPath) if errors.Is(err, fs.ErrNotExist) { - if filepath.Ext(localFullPath) != notebook.ExtensionNone { + if path.Ext(localFullPath) != notebook.ExtensionNone { return "", fmt.Errorf("notebook %s not found", literal) } @@ -142,7 +202,7 @@ func (t *translateContext) translateNotebookPath(literal, localFullPath, localRe // way we can provide a more targeted error message. for _, ext := range extensions { literalWithExt := literal + ext - localRelPathWithExt := filepath.ToSlash(localRelPath + ext) + localRelPathWithExt := localRelPath + ext if _, err := fs.Stat(t.b.SyncRoot, localRelPathWithExt); err == nil { return "", fmt.Errorf(`notebook %s not found. Did you mean %s? Local notebook references are expected to contain one of the following @@ -162,45 +222,42 @@ to contain one of the following file extensions: [%s]`, literal, strings.Join(ex } // Upon import, notebooks are stripped of their extension. - return strings.TrimSuffix(remotePath, filepath.Ext(localFullPath)), nil + localRelPathNoExt := strings.TrimSuffix(localRelPath, path.Ext(localRelPath)) + return path.Join(t.remoteRoot, localRelPathNoExt), nil } -func (t *translateContext) translateFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) +func (t *translateContext) translateFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, localRelPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", filepath.FromSlash(localFullPath), err) } if nb { return "", ErrIsNotebook{localFullPath} } - return remotePath, nil + return path.Join(t.remoteRoot, localRelPath), nil } -func (t *translateContext) translateDirectoryPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) +func (t *translateContext) translateDirectoryPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + info, err := t.b.SyncRoot.Stat(localRelPath) if err != nil { return "", err } if !info.IsDir() { - return "", fmt.Errorf("%s is not a directory", localFullPath) + return "", fmt.Errorf("%s is not a directory", filepath.FromSlash(localFullPath)) } - return remotePath, nil + return path.Join(t.remoteRoot, localRelPath), nil } -func (t *translateContext) translateNoOp(literal, localFullPath, localRelPath, remotePath string) (string, error) { - return localRelPath, nil -} - -func (t *translateContext) retainLocalAbsoluteFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) +func (t *translateContext) translateLocalAbsoluteFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + info, err := t.b.SyncRoot.Stat(localRelPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is a file: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is a file: %w", filepath.FromSlash(localFullPath), err) } if info.IsDir() { return "", fmt.Errorf("expected %s to be a file but found a directory", literal) @@ -208,16 +265,33 @@ func (t *translateContext) retainLocalAbsoluteFilePath(literal, localFullPath, l return localFullPath, nil } -func (t *translateContext) translateNoOpWithPrefix(literal, localFullPath, localRelPath, remotePath string) (string, error) { +func (t *translateContext) translateLocalAbsoluteDirectoryPath(ctx context.Context, literal, localFullPath, _ string) (string, error) { + info, err := os.Stat(filepath.FromSlash(localFullPath)) + if errors.Is(err, fs.ErrNotExist) { + return "", fmt.Errorf("directory %s not found", literal) + } + if err != nil { + return "", fmt.Errorf("unable to determine if %s is a directory: %w", filepath.FromSlash(localFullPath), err) + } + if !info.IsDir() { + return "", fmt.Errorf("expected %s to be a directory but found a file", literal) + } + return localFullPath, nil +} + +func (t *translateContext) translateLocalRelativePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + return localRelPath, nil +} + +func (t *translateContext) translateLocalRelativeWithPrefixPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { if !strings.HasPrefix(localRelPath, ".") { - localRelPath = "." + string(filepath.Separator) + localRelPath + localRelPath = "./" + localRelPath } return localRelPath, nil } -func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc, dir string) (dyn.Value, error) { - out := v.MustString() - err := t.rewritePath(dir, &out, fn) +func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.Value, dir string, opts translateOptions) (dyn.Value, error) { + out, err := t.rewritePath(ctx, dir, v.MustString(), opts) if err != nil { if target := (&ErrIsNotebook{}); errors.As(err, target) { return dyn.InvalidValue, fmt.Errorf(`expected a file for "%s" but got a notebook: %w`, p, target) @@ -228,43 +302,38 @@ func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc, return dyn.InvalidValue, err } + // If the path was not rewritten, return the original value. + if out == "" { + return v, nil + } + return dyn.NewValue(out, v.Locations()), nil } -func (t *translateContext) rewriteRelativeTo(p dyn.Path, v dyn.Value, fn rewriteFunc, dir, fallback string) (dyn.Value, error) { - nv, err := t.rewriteValue(p, v, fn, dir) - if err == nil { - return nv, nil - } - - // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. - if fallback != "" { - nv, nerr := t.rewriteValue(p, v, fn, fallback) - if nerr == nil { - // TODO: Emit a warning that this path should be rewritten. - return nv, nil - } - } - - return dyn.InvalidValue, err -} - -func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { +func (m *translatePaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { t := &translateContext{ b: b, seen: make(map[string]string), } + // Set the remote root to the sync root if source-linked deployment is enabled. + // Otherwise, set it to the workspace file path. + if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { + t.remoteRoot = t.b.SyncRootPath + } else { + t.remoteRoot = t.b.Config.Workspace.FilePath + } + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { var err error - for _, fn := range []func(dyn.Value) (dyn.Value, error){ + for _, fn := range []func(context.Context, dyn.Value) (dyn.Value, error){ t.applyJobTranslations, t.applyPipelineTranslations, t.applyArtifactTranslations, t.applyDashboardTranslations, t.applyAppsTranslations, } { - v, err = fn(v) + v, err = fn(ctx, v) if err != nil { return dyn.InvalidValue, err } @@ -275,6 +344,8 @@ func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnos return diag.FromErr(err) } +// gatherFallbackPaths collects the fallback paths for relative paths in the configuration. +// Read more about the motivation for this functionality in the "fallback" path translation tests. func gatherFallbackPaths(v dyn.Value, typ string) (map[string]string, error) { fallback := make(map[string]string) pattern := dyn.NewPattern(dyn.Key("resources"), dyn.Key(typ), dyn.AnyKey()) diff --git a/bundle/config/mutator/translate_paths_apps.go b/bundle/config/mutator/translate_paths_apps.go index 0ed7e1928..6117ee43f 100644 --- a/bundle/config/mutator/translate_paths_apps.go +++ b/bundle/config/mutator/translate_paths_apps.go @@ -1,12 +1,13 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" ) -func (t *translateContext) applyAppsTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyAppsTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { // Convert the `source_code_path` field to a remote absolute path. // We use this path for app deployment to point to the source code. pattern := dyn.NewPattern( @@ -16,6 +17,10 @@ func (t *translateContext) applyAppsTranslations(v dyn.Value) (dyn.Value, error) dyn.Key("source_code_path"), ) + opts := translateOptions{ + Mode: TranslateModeDirectory, + } + return dyn.MapByPattern(v, pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { key := p[2].Key() dir, err := v.Location().Directory() @@ -23,6 +28,6 @@ func (t *translateContext) applyAppsTranslations(v dyn.Value) (dyn.Value, error) return dyn.InvalidValue, fmt.Errorf("unable to determine directory for app %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, t.translateDirectoryPath, dir, "") + return t.rewriteValue(ctx, p, v, dir, opts) }) } diff --git a/bundle/config/mutator/translate_paths_artifacts.go b/bundle/config/mutator/translate_paths_artifacts.go index 921c00c73..8e864073f 100644 --- a/bundle/config/mutator/translate_paths_artifacts.go +++ b/bundle/config/mutator/translate_paths_artifacts.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" @@ -8,7 +9,7 @@ import ( type artifactRewritePattern struct { pattern dyn.Pattern - fn rewriteFunc + opts translateOptions } func (t *translateContext) artifactRewritePatterns() []artifactRewritePattern { @@ -22,12 +23,18 @@ func (t *translateContext) artifactRewritePatterns() []artifactRewritePattern { return []artifactRewritePattern{ { base.Append(dyn.Key("path")), - t.translateNoOp, + translateOptions{ + Mode: TranslateModeLocalAbsoluteDirectory, + + // Artifact paths may be outside the sync root. + // They are the working directory for artifact builds. + AllowPathOutsideSyncRoot: true, + }, }, } } -func (t *translateContext) applyArtifactTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyArtifactTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error for _, rewritePattern := range t.artifactRewritePatterns() { @@ -38,7 +45,7 @@ func (t *translateContext) applyArtifactTranslations(v dyn.Value) (dyn.Value, er return dyn.InvalidValue, fmt.Errorf("unable to determine directory for artifact %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, "") + return t.rewriteValue(ctx, p, v, dir, rewritePattern.opts) }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/translate_paths_artifacts_test.go b/bundle/config/mutator/translate_paths_artifacts_test.go new file mode 100644 index 000000000..0d1af6156 --- /dev/null +++ b/bundle/config/mutator/translate_paths_artifacts_test.go @@ -0,0 +1,83 @@ +package mutator_test + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTranslatePathsArtifacts_InsideSyncRoot(t *testing.T) { + tmp := t.TempDir() + dir := filepath.Join(tmp, "bundle") + lib := filepath.Join(dir, "my_lib") + _ = os.MkdirAll(lib, 0o755) + _ = os.MkdirAll(dir, 0o755) + + b := &bundle.Bundle{ + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), + Config: config.Root{ + Artifacts: map[string]*config.Artifact{ + "my_artifact": { + Type: "wheel", + + // Assume this is defined in a subdir to the sync root. + Path: "../my_lib", + }, + }, + }, + } + + bundletest.SetLocation(b, "artifacts", []dyn.Location{{ + File: filepath.Join(dir, "config/artifacts.yml"), + }}) + + diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) + require.NoError(t, diags.Error()) + + // Assert that the artifact path has been converted to a local absolute path. + assert.Equal(t, filepath.ToSlash(lib), b.Config.Artifacts["my_artifact"].Path) +} + +func TestTranslatePathsArtifacts_OutsideSyncRoot(t *testing.T) { + tmp := t.TempDir() + lib := filepath.Join(tmp, "my_lib") + dir := filepath.Join(tmp, "bundle") + _ = os.MkdirAll(lib, 0o755) + _ = os.MkdirAll(dir, 0o755) + + b := &bundle.Bundle{ + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), + Config: config.Root{ + Artifacts: map[string]*config.Artifact{ + "my_artifact": { + Type: "wheel", + + // Assume this is defined in a subdir of the bundle root. + Path: "../../my_lib", + }, + }, + }, + } + + bundletest.SetLocation(b, "artifacts", []dyn.Location{{ + File: filepath.Join(dir, "config/artifacts.yml"), + }}) + + diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) + require.NoError(t, diags.Error()) + + // Assert that the artifact path has been converted to a local absolute path. + assert.Equal(t, filepath.ToSlash(lib), b.Config.Artifacts["my_artifact"].Path) +} diff --git a/bundle/config/mutator/translate_paths_dashboards.go b/bundle/config/mutator/translate_paths_dashboards.go index 93822a599..18c4c12e2 100644 --- a/bundle/config/mutator/translate_paths_dashboards.go +++ b/bundle/config/mutator/translate_paths_dashboards.go @@ -1,12 +1,13 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" ) -func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyDashboardTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { // Convert the `file_path` field to a local absolute path. // We load the file at this path and use its contents for the dashboard contents. pattern := dyn.NewPattern( @@ -16,6 +17,10 @@ func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, e dyn.Key("file_path"), ) + opts := translateOptions{ + Mode: TranslateModeLocalAbsoluteFile, + } + return dyn.MapByPattern(v, pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { key := p[2].Key() dir, err := v.Location().Directory() @@ -23,6 +28,6 @@ func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, e return dyn.InvalidValue, fmt.Errorf("unable to determine directory for dashboard %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, t.retainLocalAbsoluteFilePath, dir, "") + return t.rewriteValue(ctx, p, v, dir, opts) }) } diff --git a/bundle/config/mutator/translate_paths_dashboards_test.go b/bundle/config/mutator/translate_paths_dashboards_test.go index 5e4e69f5d..02fba92e0 100644 --- a/bundle/config/mutator/translate_paths_dashboards_test.go +++ b/bundle/config/mutator/translate_paths_dashboards_test.go @@ -48,7 +48,7 @@ func TestTranslatePathsDashboards_FilePathRelativeSubDirectory(t *testing.T) { // Assert that the file path for the dashboard has been converted to its local absolute path. assert.Equal( t, - filepath.Join(dir, "src", "my_dashboard.lvdash.json"), + filepath.ToSlash(filepath.Join(dir, "src", "my_dashboard.lvdash.json")), b.Config.Resources.Dashboards["dashboard"].FilePath, ) } diff --git a/bundle/config/mutator/translate_paths_jobs.go b/bundle/config/mutator/translate_paths_jobs.go index c29ff0ea9..148ed4466 100644 --- a/bundle/config/mutator/translate_paths_jobs.go +++ b/bundle/config/mutator/translate_paths_jobs.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "slices" @@ -9,7 +10,7 @@ import ( "github.com/databricks/cli/libs/dyn" ) -func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyJobTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error fallback, err := gatherFallbackPaths(v, "jobs") @@ -38,28 +39,48 @@ func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) return dyn.InvalidValue, fmt.Errorf("unable to determine directory for job %s: %w", key, err) } - rewritePatternFn, err := t.getRewritePatternFn(kind) + mode, err := getJobTranslateMode(kind) if err != nil { return dyn.InvalidValue, err } - return t.rewriteRelativeTo(p, v, rewritePatternFn, dir, fallback[key]) + opts := translateOptions{ + Mode: mode, + } + + // Try to rewrite the path relative to the directory of the configuration file where the value was defined. + nv, err := t.rewriteValue(ctx, p, v, dir, opts) + if err == nil { + return nv, nil + } + + // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. + // We only do this for jobs and pipelines because of the comment in [gatherFallbackPaths]. + if fallback[key] != "" { + nv, nerr := t.rewriteValue(ctx, p, v, fallback[key], opts) + if nerr == nil { + // TODO: Emit a warning that this path should be rewritten. + return nv, nil + } + } + + return dyn.InvalidValue, err }) } -func (t *translateContext) getRewritePatternFn(kind paths.PathKind) (rewriteFunc, error) { +func getJobTranslateMode(kind paths.PathKind) (TranslateMode, error) { switch kind { case paths.PathKindLibrary: - return t.translateNoOp, nil + return TranslateModeLocalRelative, nil case paths.PathKindNotebook: - return t.translateNotebookPath, nil + return TranslateModeNotebook, nil case paths.PathKindWorkspaceFile: - return t.translateFilePath, nil + return TranslateModeFile, nil case paths.PathKindDirectory: - return t.translateDirectoryPath, nil + return TranslateModeDirectory, nil case paths.PathKindWithPrefix: - return t.translateNoOpWithPrefix, nil + return TranslateModeLocalRelativeWithPrefix, nil } - return nil, fmt.Errorf("unsupported path kind: %d", kind) + return TranslateMode(0), fmt.Errorf("unsupported path kind: %d", kind) } diff --git a/bundle/config/mutator/translate_paths_pipelines.go b/bundle/config/mutator/translate_paths_pipelines.go index 71a65e846..204808ff5 100644 --- a/bundle/config/mutator/translate_paths_pipelines.go +++ b/bundle/config/mutator/translate_paths_pipelines.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" @@ -8,7 +9,7 @@ import ( type pipelineRewritePattern struct { pattern dyn.Pattern - fn rewriteFunc + opts translateOptions } func (t *translateContext) pipelineRewritePatterns() []pipelineRewritePattern { @@ -25,16 +26,16 @@ func (t *translateContext) pipelineRewritePatterns() []pipelineRewritePattern { return []pipelineRewritePattern{ { base.Append(dyn.Key("notebook"), dyn.Key("path")), - t.translateNotebookPath, + translateOptions{Mode: TranslateModeNotebook}, }, { base.Append(dyn.Key("file"), dyn.Key("path")), - t.translateFilePath, + translateOptions{Mode: TranslateModeFile}, }, } } -func (t *translateContext) applyPipelineTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyPipelineTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error fallback, err := gatherFallbackPaths(v, "pipelines") @@ -50,7 +51,23 @@ func (t *translateContext) applyPipelineTranslations(v dyn.Value) (dyn.Value, er return dyn.InvalidValue, fmt.Errorf("unable to determine directory for pipeline %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, fallback[key]) + // Try to rewrite the path relative to the directory of the configuration file where the value was defined. + nv, err := t.rewriteValue(ctx, p, v, dir, rewritePattern.opts) + if err == nil { + return nv, nil + } + + // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. + // We only do this for jobs and pipelines because of the comment in [gatherFallbackPaths]. + if fallback[key] != "" { + nv, nerr := t.rewriteValue(ctx, p, v, fallback[key], rewritePattern.opts) + if nerr == nil { + // TODO: Emit a warning that this path should be rewritten. + return nv, nil + } + } + + return dyn.InvalidValue, err }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 493abb8c5..14d99346e 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -6,7 +6,6 @@ import ( "os" "path/filepath" "runtime" - "strings" "testing" "github.com/databricks/cli/bundle" @@ -180,7 +179,7 @@ func TestTranslatePaths(t *testing.T) { }, Pipelines: map[string]*resources.Pipeline{ "pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Libraries: []pipelines.PipelineLibrary{ { Notebook: &pipelines.NotebookLibrary{ @@ -226,7 +225,7 @@ func TestTranslatePaths(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.whl"), + "dist/task.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) assert.Equal( @@ -251,7 +250,7 @@ func TestTranslatePaths(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.jar"), + "dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[5].Libraries[0].Jar, ) assert.Equal( @@ -334,7 +333,7 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { }, Pipelines: map[string]*resources.Pipeline{ "pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Libraries: []pipelines.PipelineLibrary{ { File: &pipelines.FileLibrary{ @@ -362,7 +361,7 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { ) assert.Equal( t, - filepath.Join("job", "dist", "task.jar"), + "job/dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[1].Libraries[0].Jar, ) assert.Equal( @@ -489,7 +488,7 @@ func TestPipelineNotebookDoesNotExistError(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Libraries: []pipelines.PipelineLibrary{ { Notebook: &pipelines.NotebookLibrary{ @@ -533,7 +532,7 @@ func TestPipelineNotebookDoesNotExistErrorWithoutExtension(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Libraries: []pipelines.PipelineLibrary{ { Notebook: &pipelines.NotebookLibrary{ @@ -573,7 +572,7 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Libraries: []pipelines.PipelineLibrary{ { File: &pipelines.FileLibrary{ @@ -678,7 +677,7 @@ func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Libraries: []pipelines.PipelineLibrary{ { Notebook: &pipelines.NotebookLibrary{ @@ -713,7 +712,7 @@ func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Libraries: []pipelines.PipelineLibrary{ { File: &pipelines.FileLibrary{ @@ -774,8 +773,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) - assert.Equal(t, strings.Join([]string{".", "job", "dist", "env1.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0]) - assert.Equal(t, strings.Join([]string{".", "dist", "env2.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) + assert.Equal(t, "./job/dist/env1.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0]) + assert.Equal(t, "./dist/env2.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) assert.Equal(t, "simplejson", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[2]) assert.Equal(t, "/Workspace/Users/foo@bar.com/test.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[3]) assert.Equal(t, "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[4]) @@ -829,17 +828,15 @@ func TestTranslatePathWithComplexVariables(t *testing.T) { }) require.NoError(t, diags.Error()) - diags = bundle.Apply(ctx, b, - bundle.Seq( - mutator.SetVariables(), - mutator.ResolveVariableReferences("variables"), - mutator.TranslatePaths(), - )) + diags = bundle.ApplySeq(ctx, b, + mutator.SetVariables(), + mutator.ResolveVariableReferences("variables"), + mutator.TranslatePaths()) require.NoError(t, diags.Error()) assert.Equal( t, - filepath.Join("variables", "local", "whl.whl"), + "variables/local/whl.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) } @@ -917,7 +914,7 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { }, Pipelines: map[string]*resources.Pipeline{ "pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Libraries: []pipelines.PipelineLibrary{ { Notebook: &pipelines.NotebookLibrary{ @@ -952,34 +949,34 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { // updated to source path assert.Equal( t, - filepath.Join(dir, "my_job_notebook"), + dir+"/my_job_notebook", b.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath, ) assert.Equal( t, - filepath.Join(dir, "requirements.txt"), + dir+"/requirements.txt", b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements, ) assert.Equal( t, - filepath.Join(dir, "my_python_file.py"), + dir+"/my_python_file.py", b.Config.Resources.Jobs["job"].Tasks[3].SparkPythonTask.PythonFile, ) assert.Equal( t, - filepath.Join(dir, "my_pipeline_notebook"), + dir+"/my_pipeline_notebook", b.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path, ) assert.Equal( t, - filepath.Join(dir, "my_python_file.py"), + dir+"/my_python_file.py", b.Config.Resources.Pipelines["pipeline"].Libraries[2].File.Path, ) // left as is assert.Equal( t, - filepath.Join("dist", "task.whl"), + "dist/task.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) assert.Equal( @@ -989,7 +986,7 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.jar"), + "dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[4].Libraries[0].Jar, ) assert.Equal( diff --git a/bundle/config/resources/pipeline.go b/bundle/config/resources/pipeline.go index 5127d07ba..57d9c4f19 100644 --- a/bundle/config/resources/pipeline.go +++ b/bundle/config/resources/pipeline.go @@ -16,7 +16,7 @@ type Pipeline struct { ModifiedStatus ModifiedStatus `json:"modified_status,omitempty" bundle:"internal"` URL string `json:"url,omitempty" bundle:"internal"` - *pipelines.PipelineSpec + *pipelines.CreatePipeline } func (s *Pipeline) UnmarshalJSON(b []byte) error { @@ -59,5 +59,5 @@ func (s *Pipeline) GetURL() string { } func (s *Pipeline) IsNil() bool { - return s.PipelineSpec == nil + return s.CreatePipeline == nil } diff --git a/bundle/config/root.go b/bundle/config/root.go index 21804110a..b974bcec5 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -388,14 +388,6 @@ func (r *Root) MergeTargetOverrides(name string) error { return err } - // If the branch was overridden, we need to clear the inferred flag. - if branch := v.Get("branch"); branch.Kind() != dyn.KindInvalid { - out, err = dyn.SetByPath(out, dyn.NewPath(dyn.Key("inferred")), dyn.V(false)) - if err != nil { - return err - } - } - // Set the merged value. root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("git")), out) if err != nil { diff --git a/bundle/config/validate/single_node_cluster_test.go b/bundle/config/validate/single_node_cluster_test.go index c3ead8ef6..be93420c6 100644 --- a/bundle/config/validate/single_node_cluster_test.go +++ b/bundle/config/validate/single_node_cluster_test.go @@ -238,7 +238,7 @@ func TestValidateSingleNodeClusterFailForPipelineClusters(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "foo": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Clusters: []pipelines.PipelineCluster{ { SparkConf: tc.sparkConf, @@ -493,7 +493,7 @@ func TestValidateSingleNodeClusterPassPipelineClusters(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "foo": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Clusters: []pipelines.PipelineCluster{ { SparkConf: tc.sparkConf, diff --git a/bundle/config/validate/validate_sync_patterns.go b/bundle/config/validate/validate_sync_patterns.go index f5787a81d..04acd28ab 100644 --- a/bundle/config/validate/validate_sync_patterns.go +++ b/bundle/config/validate/validate_sync_patterns.go @@ -47,15 +47,13 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di var errs errgroup.Group var diags diag.Diagnostics - for i, pattern := range patterns { - index := i - fullPattern := pattern + for index, pattern := range patterns { // If the pattern is negated, strip the negation prefix // and check if the pattern matches any files. // Negation in gitignore syntax means "don't look at this path' // So if p matches nothing it's useless negation, but if there are matches, // it means: do not include these files into result set - p := strings.TrimPrefix(fullPattern, "!") + p := strings.TrimPrefix(pattern, "!") errs.Go(func() error { fs, err := fileset.NewGlobSet(rb.BundleRoot(), []string{p}) if err != nil { @@ -72,7 +70,7 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di mu.Lock() diags = diags.Append(diag.Diagnostic{ Severity: diag.Warning, - Summary: fmt.Sprintf("Pattern %s does not match any files", fullPattern), + Summary: fmt.Sprintf("Pattern %s does not match any files", pattern), Locations: []dyn.Location{loc.Location()}, Paths: []dyn.Path{loc.Path()}, }) diff --git a/bundle/config/variable/variable.go b/bundle/config/variable/variable.go index 95a68cfeb..d7f1cdede 100644 --- a/bundle/config/variable/variable.go +++ b/bundle/config/variable/variable.go @@ -36,11 +36,12 @@ type Variable struct { // This field stores the resolved value for the variable. The variable are // resolved in the following priority order (from highest to lowest) // - // 1. Command line flag. For example: `--var="foo=bar"` - // 2. Target variable. eg: BUNDLE_VAR_foo=bar - // 3. Default value as defined in the applicable environments block - // 4. Default value defined in variable definition - // 5. Throw error, since if no default value is defined, then the variable + // 1. Command line flag `--var="foo=bar"` + // 2. Environment variable. eg: BUNDLE_VAR_foo=bar + // 3. Load defaults from .databricks/bundle//variable-overrides.json + // 4. Default value as defined in the applicable targets block + // 5. Default value defined in variable definition + // 6. Throw error, since if no default value is defined, then the variable // is required Value VariableValue `json:"value,omitempty" bundle:"readonly"` diff --git a/bundle/deferred.go b/bundle/deferred.go deleted file mode 100644 index e7e0c2aeb..000000000 --- a/bundle/deferred.go +++ /dev/null @@ -1,30 +0,0 @@ -package bundle - -import ( - "context" - - "github.com/databricks/cli/libs/diag" -) - -type DeferredMutator struct { - mutator Mutator - finally Mutator -} - -func (d *DeferredMutator) Name() string { - return "deferred" -} - -func Defer(mutator, finally Mutator) Mutator { - return &DeferredMutator{ - mutator: mutator, - finally: finally, - } -} - -func (d *DeferredMutator) Apply(ctx context.Context, b *Bundle) diag.Diagnostics { - var diags diag.Diagnostics - diags = diags.Extend(Apply(ctx, b, d.mutator)) - diags = diags.Extend(Apply(ctx, b, d.finally)) - return diags -} diff --git a/bundle/deferred_test.go b/bundle/deferred_test.go deleted file mode 100644 index ea3df17c4..000000000 --- a/bundle/deferred_test.go +++ /dev/null @@ -1,114 +0,0 @@ -package bundle - -import ( - "context" - "testing" - - "github.com/databricks/cli/libs/diag" - "github.com/stretchr/testify/assert" -) - -type mutatorWithError struct { - applyCalled int - errorMsg string -} - -func (t *mutatorWithError) Name() string { - return "mutatorWithError" -} - -func (t *mutatorWithError) Apply(_ context.Context, b *Bundle) diag.Diagnostics { - t.applyCalled++ - return diag.Errorf(t.errorMsg) // nolint:govet -} - -func TestDeferredMutatorWhenAllMutatorsSucceed(t *testing.T) { - m1 := &testMutator{} - m2 := &testMutator{} - m3 := &testMutator{} - cleanup := &testMutator{} - deferredMutator := Defer(Seq(m1, m2, m3), cleanup) - - b := &Bundle{} - diags := Apply(context.Background(), b, deferredMutator) - assert.NoError(t, diags.Error()) - - assert.Equal(t, 1, m1.applyCalled) - assert.Equal(t, 1, m2.applyCalled) - assert.Equal(t, 1, m3.applyCalled) - assert.Equal(t, 1, cleanup.applyCalled) -} - -func TestDeferredMutatorWhenFirstFails(t *testing.T) { - m1 := &testMutator{} - m2 := &testMutator{} - mErr := &mutatorWithError{errorMsg: "mutator error occurred"} - cleanup := &testMutator{} - deferredMutator := Defer(Seq(mErr, m1, m2), cleanup) - - b := &Bundle{} - diags := Apply(context.Background(), b, deferredMutator) - assert.ErrorContains(t, diags.Error(), "mutator error occurred") - - assert.Equal(t, 1, mErr.applyCalled) - assert.Equal(t, 0, m1.applyCalled) - assert.Equal(t, 0, m2.applyCalled) - assert.Equal(t, 1, cleanup.applyCalled) -} - -func TestDeferredMutatorWhenMiddleOneFails(t *testing.T) { - m1 := &testMutator{} - m2 := &testMutator{} - mErr := &mutatorWithError{errorMsg: "mutator error occurred"} - cleanup := &testMutator{} - deferredMutator := Defer(Seq(m1, mErr, m2), cleanup) - - b := &Bundle{} - diags := Apply(context.Background(), b, deferredMutator) - assert.ErrorContains(t, diags.Error(), "mutator error occurred") - - assert.Equal(t, 1, m1.applyCalled) - assert.Equal(t, 1, mErr.applyCalled) - assert.Equal(t, 0, m2.applyCalled) - assert.Equal(t, 1, cleanup.applyCalled) -} - -func TestDeferredMutatorWhenLastOneFails(t *testing.T) { - m1 := &testMutator{} - m2 := &testMutator{} - mErr := &mutatorWithError{errorMsg: "mutator error occurred"} - cleanup := &testMutator{} - deferredMutator := Defer(Seq(m1, m2, mErr), cleanup) - - b := &Bundle{} - diags := Apply(context.Background(), b, deferredMutator) - assert.ErrorContains(t, diags.Error(), "mutator error occurred") - - assert.Equal(t, 1, m1.applyCalled) - assert.Equal(t, 1, m2.applyCalled) - assert.Equal(t, 1, mErr.applyCalled) - assert.Equal(t, 1, cleanup.applyCalled) -} - -func TestDeferredMutatorCombinesErrorMessages(t *testing.T) { - m1 := &testMutator{} - m2 := &testMutator{} - mErr := &mutatorWithError{errorMsg: "mutator error occurred"} - cleanupErr := &mutatorWithError{errorMsg: "cleanup error occurred"} - deferredMutator := Defer(Seq(m1, m2, mErr), cleanupErr) - - b := &Bundle{} - diags := Apply(context.Background(), b, deferredMutator) - - var errs []string - for _, d := range diags { - errs = append(errs, d.Summary) - } - assert.Contains(t, errs, "mutator error occurred") - assert.Contains(t, errs, "cleanup error occurred") - - assert.Equal(t, 1, m1.applyCalled) - assert.Equal(t, 1, m2.applyCalled) - assert.Equal(t, 1, mErr.applyCalled) - assert.Equal(t, 1, cleanupErr.applyCalled) -} diff --git a/bundle/deploy/metadata/annotate_pipelines.go b/bundle/deploy/metadata/annotate_pipelines.go index 990f48907..407aaea6e 100644 --- a/bundle/deploy/metadata/annotate_pipelines.go +++ b/bundle/deploy/metadata/annotate_pipelines.go @@ -20,11 +20,11 @@ func (m *annotatePipelines) Name() string { func (m *annotatePipelines) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { for _, pipeline := range b.Config.Resources.Pipelines { - if pipeline.PipelineSpec == nil { + if pipeline.CreatePipeline == nil { continue } - pipeline.PipelineSpec.Deployment = &pipelines.PipelineDeployment{ + pipeline.CreatePipeline.Deployment = &pipelines.PipelineDeployment{ Kind: pipelines.DeploymentKindBundle, MetadataFilePath: metadataFilePath(b), } diff --git a/bundle/deploy/metadata/annotate_pipelines_test.go b/bundle/deploy/metadata/annotate_pipelines_test.go index 448a022d0..606292724 100644 --- a/bundle/deploy/metadata/annotate_pipelines_test.go +++ b/bundle/deploy/metadata/annotate_pipelines_test.go @@ -21,12 +21,12 @@ func TestAnnotatePipelinesMutator(t *testing.T) { Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ "my-pipeline-1": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Name: "My Pipeline One", }, }, "my-pipeline-2": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Name: "My Pipeline Two", }, }, @@ -43,14 +43,14 @@ func TestAnnotatePipelinesMutator(t *testing.T) { Kind: pipelines.DeploymentKindBundle, MetadataFilePath: "/a/b/c/metadata.json", }, - b.Config.Resources.Pipelines["my-pipeline-1"].PipelineSpec.Deployment) + b.Config.Resources.Pipelines["my-pipeline-1"].CreatePipeline.Deployment) assert.Equal(t, &pipelines.PipelineDeployment{ Kind: pipelines.DeploymentKindBundle, MetadataFilePath: "/a/b/c/metadata.json", }, - b.Config.Resources.Pipelines["my-pipeline-2"].PipelineSpec.Deployment) + b.Config.Resources.Pipelines["my-pipeline-2"].CreatePipeline.Deployment) } func TestAnnotatePipelinesMutatorPipelineWithoutASpec(t *testing.T) { diff --git a/bundle/deploy/metadata/compute.go b/bundle/deploy/metadata/compute.go index b47baa6b2..633d97081 100644 --- a/bundle/deploy/metadata/compute.go +++ b/bundle/deploy/metadata/compute.go @@ -54,6 +54,7 @@ func (m *compute) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { // Set file upload destination of the bundle in metadata b.Metadata.Config.Workspace.FilePath = b.Config.Workspace.FilePath + // In source-linked deployment files are not copied and resources use source files, therefore we use sync path as file path in metadata if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { b.Metadata.Config.Workspace.FilePath = b.SyncRootPath } diff --git a/bundle/deploy/metadata/compute_test.go b/bundle/deploy/metadata/compute_test.go index c6fa9bddb..64f899695 100644 --- a/bundle/deploy/metadata/compute_test.go +++ b/bundle/deploy/metadata/compute_test.go @@ -31,7 +31,6 @@ func TestComputeMetadataMutator(t *testing.T) { OriginURL: "www.host.com", Commit: "abcd", BundleRootPath: "a/b/c/d", - Inferred: true, }, }, Resources: config.Resources{ @@ -72,9 +71,6 @@ func TestComputeMetadataMutator(t *testing.T) { OriginURL: "www.host.com", Commit: "abcd", BundleRootPath: "a/b/c/d", - - // Test that this field doesn't carry over into the metadata. - Inferred: false, }, }, Resources: metadata.Resources{ diff --git a/bundle/deploy/terraform/convert_test.go b/bundle/deploy/terraform/convert_test.go index ffe55db71..53d861b32 100644 --- a/bundle/deploy/terraform/convert_test.go +++ b/bundle/deploy/terraform/convert_test.go @@ -203,7 +203,7 @@ func TestBundleToTerraformForEachTaskLibraries(t *testing.T) { func TestBundleToTerraformPipeline(t *testing.T) { src := resources.Pipeline{ - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Name: "my pipeline", Libraries: []pipelines.PipelineLibrary{ { @@ -419,7 +419,7 @@ func TestBundleToTerraformModelServing(t *testing.T) { src := resources.ModelServingEndpoint{ CreateServingEndpoint: &serving.CreateServingEndpoint{ Name: "name", - Config: serving.EndpointCoreConfigInput{ + Config: &serving.EndpointCoreConfigInput{ ServedModels: []serving.ServedModelInput{ { ModelName: "model_name", @@ -474,7 +474,7 @@ func TestBundleToTerraformModelServingPermissions(t *testing.T) { // and as such observed the `omitempty` tag. // The new method leverages [dyn.Value] where any field that is not // explicitly set is not part of the value. - Config: serving.EndpointCoreConfigInput{ + Config: &serving.EndpointCoreConfigInput{ ServedModels: []serving.ServedModelInput{ { ModelName: "model_name", @@ -759,7 +759,7 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { }, Pipelines: map[string]*resources.Pipeline{ "test_pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Name: "test_pipeline", }, }, @@ -898,12 +898,12 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { }, Pipelines: map[string]*resources.Pipeline{ "test_pipeline": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Name: "test_pipeline", }, }, "test_pipeline_new": { - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Name: "test_pipeline_new", }, }, diff --git a/bundle/deploy/terraform/init.go b/bundle/deploy/terraform/init.go index d982354e1..a204222d0 100644 --- a/bundle/deploy/terraform/init.go +++ b/bundle/deploy/terraform/init.go @@ -54,7 +54,7 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con return tf.ExecPath, nil } - binDir, err := b.CacheDir(context.Background(), "bin") + binDir, err := b.CacheDir(ctx, "bin") if err != nil { return "", err } @@ -88,41 +88,43 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con return tf.ExecPath, nil } -// This function inherits some environment variables for Terraform CLI. -func inheritEnvVars(ctx context.Context, environ map[string]string) error { +var envCopy = []string{ // Include $HOME in set of environment variables to pass along. - home, ok := env.Lookup(ctx, "HOME") - if ok { - environ["HOME"] = home - } + "HOME", // Include $USERPROFILE in set of environment variables to pass along. // This variable is used by Azure CLI on Windows to find stored credentials and metadata - userProfile, ok := env.Lookup(ctx, "USERPROFILE") - if ok { - environ["USERPROFILE"] = userProfile - } + "USERPROFILE", // Include $PATH in set of environment variables to pass along. // This is necessary to ensure that our Terraform provider can use the // same auxiliary programs (e.g. `az`, or `gcloud`) as the CLI. - path, ok := env.Lookup(ctx, "PATH") - if ok { - environ["PATH"] = path - } + "PATH", - // Include $AZURE_CONFIG_FILE in set of environment variables to pass along. + // Include $AZURE_CONFIG_DIR in set of environment variables to pass along. // This is set in Azure DevOps by the AzureCLI@2 task. - azureConfigFile, ok := env.Lookup(ctx, "AZURE_CONFIG_FILE") - if ok { - environ["AZURE_CONFIG_FILE"] = azureConfigFile - } + "AZURE_CONFIG_DIR", // Include $TF_CLI_CONFIG_FILE to override terraform provider in development. // See: https://developer.hashicorp.com/terraform/cli/config/config-file#explicit-installation-method-configuration - devConfigFile, ok := env.Lookup(ctx, "TF_CLI_CONFIG_FILE") - if ok { - environ["TF_CLI_CONFIG_FILE"] = devConfigFile + "TF_CLI_CONFIG_FILE", + + // Include $USE_SDK_V2_RESOURCES and $USE_SDK_V2_DATA_SOURCES, these are used to switch back from plugin framework to SDKv2. + // This is used for mitigation issues with resource migrated to plugin framework, as recommended here: + // https://registry.terraform.io/providers/databricks/databricks/latest/docs/guides/troubleshooting#plugin-framework-migration-problems + // It is currently a workaround for deploying quality_monitors + // https://github.com/databricks/terraform-provider-databricks/issues/4229#issuecomment-2520344690 + "USE_SDK_V2_RESOURCES", + "USE_SDK_V2_DATA_SOURCES", +} + +// This function inherits some environment variables for Terraform CLI. +func inheritEnvVars(ctx context.Context, environ map[string]string) error { + for _, key := range envCopy { + value, ok := env.Lookup(ctx, key) + if ok { + environ[key] = value + } } // Map $DATABRICKS_TF_CLI_CONFIG_FILE to $TF_CLI_CONFIG_FILE diff --git a/bundle/deploy/terraform/init_test.go b/bundle/deploy/terraform/init_test.go index c7a4ffe4a..4645ed007 100644 --- a/bundle/deploy/terraform/init_test.go +++ b/bundle/deploy/terraform/init_test.go @@ -292,7 +292,7 @@ func TestInheritEnvVars(t *testing.T) { t.Setenv("HOME", "/home/testuser") t.Setenv("PATH", "/foo:/bar") t.Setenv("TF_CLI_CONFIG_FILE", "/tmp/config.tfrc") - t.Setenv("AZURE_CONFIG_FILE", "/tmp/foo/bar") + t.Setenv("AZURE_CONFIG_DIR", "/tmp/foo/bar") ctx := context.Background() env := map[string]string{} @@ -301,7 +301,7 @@ func TestInheritEnvVars(t *testing.T) { assert.Equal(t, "/home/testuser", env["HOME"]) assert.Equal(t, "/foo:/bar", env["PATH"]) assert.Equal(t, "/tmp/config.tfrc", env["TF_CLI_CONFIG_FILE"]) - assert.Equal(t, "/tmp/foo/bar", env["AZURE_CONFIG_FILE"]) + assert.Equal(t, "/tmp/foo/bar", env["AZURE_CONFIG_DIR"]) } } diff --git a/bundle/deploy/terraform/load_test.go b/bundle/deploy/terraform/load_test.go index b7243ca19..e892535fe 100644 --- a/bundle/deploy/terraform/load_test.go +++ b/bundle/deploy/terraform/load_test.go @@ -32,10 +32,10 @@ func TestLoadWithNoState(t *testing.T) { t.Setenv("DATABRICKS_TOKEN", "foobar") b.WorkspaceClient() - diags := bundle.Apply(context.Background(), b, bundle.Seq( + diags := bundle.ApplySeq(context.Background(), b, Initialize(), Load(ErrorOnEmptyState), - )) + ) require.ErrorContains(t, diags.Error(), "Did you forget to run 'databricks bundle deploy'") } diff --git a/bundle/deploy/terraform/tfdyn/convert_app.go b/bundle/deploy/terraform/tfdyn/convert_app.go index dcba0809b..b3d599f15 100644 --- a/bundle/deploy/terraform/tfdyn/convert_app.go +++ b/bundle/deploy/terraform/tfdyn/convert_app.go @@ -38,6 +38,12 @@ func (appConverter) Convert(ctx context.Context, key string, vin dyn.Value, out return err } + // We always set no_compute to true as it allows DABs not to wait for app compute to be started when app is created. + vout, err = dyn.Set(vout, "no_compute", dyn.V(true)) + if err != nil { + return err + } + // Add the converted resource to the output. out.App[key] = vout.AsAny() diff --git a/bundle/deploy/terraform/tfdyn/convert_app_test.go b/bundle/deploy/terraform/tfdyn/convert_app_test.go index be8152cc6..cdf56f8ed 100644 --- a/bundle/deploy/terraform/tfdyn/convert_app_test.go +++ b/bundle/deploy/terraform/tfdyn/convert_app_test.go @@ -63,6 +63,7 @@ func TestConvertApp(t *testing.T) { assert.Equal(t, map[string]any{ "description": "app description", "name": "app_id", + "no_compute": true, "resources": []any{ map[string]any{ "name": "job1", @@ -136,6 +137,7 @@ func TestConvertAppWithNoDescription(t *testing.T) { assert.Equal(t, map[string]any{ "name": "app_id", "description": "", // Due to Apps API always returning a description field, we set it in the output as well to avoid permanent TF drift + "no_compute": true, "resources": []any{ map[string]any{ "name": "job1", diff --git a/bundle/deploy/terraform/tfdyn/convert_model_serving_endpoint_test.go b/bundle/deploy/terraform/tfdyn/convert_model_serving_endpoint_test.go index d46350bb7..98cf2dc22 100644 --- a/bundle/deploy/terraform/tfdyn/convert_model_serving_endpoint_test.go +++ b/bundle/deploy/terraform/tfdyn/convert_model_serving_endpoint_test.go @@ -17,7 +17,7 @@ func TestConvertModelServingEndpoint(t *testing.T) { src := resources.ModelServingEndpoint{ CreateServingEndpoint: &serving.CreateServingEndpoint{ Name: "name", - Config: serving.EndpointCoreConfigInput{ + Config: &serving.EndpointCoreConfigInput{ ServedModels: []serving.ServedModelInput{ { ModelName: "model_name", diff --git a/bundle/deploy/terraform/tfdyn/convert_pipeline.go b/bundle/deploy/terraform/tfdyn/convert_pipeline.go index ea0c94d66..53a986864 100644 --- a/bundle/deploy/terraform/tfdyn/convert_pipeline.go +++ b/bundle/deploy/terraform/tfdyn/convert_pipeline.go @@ -21,6 +21,11 @@ func convertPipelineResource(ctx context.Context, vin dyn.Value) (dyn.Value, err return dyn.InvalidValue, err } + vout, err = dyn.DropKeys(vout, []string{"allow_duplicate_names", "dry_run"}) + if err != nil { + return dyn.InvalidValue, err + } + // Normalize the output value to the target schema. vout, diags := convert.Normalize(schema.ResourcePipeline{}, vout) for _, diag := range diags { diff --git a/bundle/deploy/terraform/tfdyn/convert_pipeline_test.go b/bundle/deploy/terraform/tfdyn/convert_pipeline_test.go index 0239bad18..63d023c43 100644 --- a/bundle/deploy/terraform/tfdyn/convert_pipeline_test.go +++ b/bundle/deploy/terraform/tfdyn/convert_pipeline_test.go @@ -15,8 +15,17 @@ import ( func TestConvertPipeline(t *testing.T) { src := resources.Pipeline{ - PipelineSpec: &pipelines.PipelineSpec{ + CreatePipeline: &pipelines.CreatePipeline{ Name: "my pipeline", + // This fields is not part of TF schema yet, but once we upgrade to TF version that supports it, this test will fail because run_as + // will be exposed which is expected and test will need to be updated. + RunAs: &pipelines.RunAs{ + UserName: "foo@bar.com", + }, + // We expect AllowDuplicateNames and DryRun to be ignored and not passed to the TF output. + // This is not supported by TF now, so we don't want to expose it. + AllowDuplicateNames: true, + DryRun: true, Libraries: []pipelines.PipelineLibrary{ { Notebook: &pipelines.NotebookLibrary{ @@ -113,6 +122,9 @@ func TestConvertPipeline(t *testing.T) { "num_workers": int64(1), }, }, + "run_as": map[string]any{ + "user_name": "foo@bar.com", + }, }, out.Pipeline["my_pipeline"]) // Assert equality on the permissions diff --git a/bundle/docsgen/README.md b/bundle/docsgen/README.md new file mode 100644 index 000000000..c8484ab64 --- /dev/null +++ b/bundle/docsgen/README.md @@ -0,0 +1,79 @@ +## docs-autogen + +1. Install [Golang](https://go.dev/doc/install) +2. Run `make vendor docs` from the repo +3. See generated documents in `./bundle/docsgen/output` directory +4. To change descriptions update content in `./bundle/internal/schema/annotations.yml` or `./bundle/internal/schema/annotations_openapi_overrides.yml` and re-run `make docs` + +For simpler usage run it together with copy command to move resulting files to local `docs` repo. Note that it will overwrite any local changes in affected files. Example: + +``` +make docs && cp bundle/docgen/output/*.md ../docs/source/dev-tools/bundles +``` + +To change intro sections for files update them in `templates/` directory + +### Annotation file structure + +```yaml +"": + "": + description: Description of the property, only plain text is supported + markdown_description: Description with markdown support, if defined it will override the value in docs and in JSON-schema + markdown_examples: Custom block for any example, in free form, Markdown is supported + title: JSON-schema title, not used in docs + default: Default value of the property, not used in docs + enum: Possible values of enum-type, not used in docs +``` + +Descriptions with `PLACEHOLDER` value are not displayed in docs and JSON-schema + +All relative links like `[_](/dev-tools/bundles/settings.md#cluster_id)` are kept as is in docs but converted to absolute links in JSON schema + +To change description for type itself (not its fields) use `"_"`: + +```yaml +github.com/databricks/cli/bundle/config/resources.Cluster: + "_": + "markdown_description": |- + The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). +``` + +### Example annotation + +```yaml +github.com/databricks/cli/bundle/config.Bundle: + "cluster_id": + "description": |- + The ID of a cluster to use to run the bundle. + "markdown_description": |- + The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). + "compute_id": + "description": |- + PLACEHOLDER + "databricks_cli_version": + "description": |- + The Databricks CLI version to use for the bundle. + "markdown_description": |- + The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). + "deployment": + "description": |- + The definition of the bundle deployment + "markdown_description": |- + The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). + "git": + "description": |- + The Git version control details that are associated with your bundle. + "markdown_description": |- + The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). + "name": + "description": |- + The name of the bundle. + "uuid": + "description": |- + PLACEHOLDER +``` + +### TODO + +Add file watcher to track changes in the annotation files and re-run `make docs` script automtically diff --git a/bundle/docsgen/main.go b/bundle/docsgen/main.go new file mode 100644 index 000000000..84bf4779f --- /dev/null +++ b/bundle/docsgen/main.go @@ -0,0 +1,135 @@ +package main + +import ( + "fmt" + "log" + "os" + "path" + "reflect" + "strings" + + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/internal/annotation" + "github.com/databricks/cli/libs/jsonschema" +) + +const ( + rootFileName = "reference.md" + resourcesFileName = "resources.md" +) + +func main() { + if len(os.Args) != 3 { + fmt.Println("Usage: go run main.go ") + os.Exit(1) + } + + annotationDir := os.Args[1] + docsDir := os.Args[2] + outputDir := path.Join(docsDir, "output") + templatesDir := path.Join(docsDir, "templates") + + if _, err := os.Stat(outputDir); os.IsNotExist(err) { + if err := os.MkdirAll(outputDir, 0o755); err != nil { + log.Fatal(err) + } + } + + rootHeader, err := os.ReadFile(path.Join(templatesDir, rootFileName)) + if err != nil { + log.Fatal(err) + } + err = generateDocs( + []string{path.Join(annotationDir, "annotations.yml")}, + path.Join(outputDir, rootFileName), + reflect.TypeOf(config.Root{}), + string(rootHeader), + ) + if err != nil { + log.Fatal(err) + } + resourcesHeader, err := os.ReadFile(path.Join(templatesDir, resourcesFileName)) + if err != nil { + log.Fatal(err) + } + err = generateDocs( + []string{path.Join(annotationDir, "annotations_openapi.yml"), path.Join(annotationDir, "annotations_openapi_overrides.yml"), path.Join(annotationDir, "annotations.yml")}, + path.Join(outputDir, resourcesFileName), + reflect.TypeOf(config.Resources{}), + string(resourcesHeader), + ) + if err != nil { + log.Fatal(err) + } +} + +func generateDocs(inputPaths []string, outputPath string, rootType reflect.Type, header string) error { + annotations, err := annotation.LoadAndMerge(inputPaths) + if err != nil { + log.Fatal(err) + } + + // schemas is used to resolve references to schemas + schemas := map[string]*jsonschema.Schema{} + // ownFields is used to track fields that are defined in the annotation file and should be included in the docs page + ownFields := map[string]bool{} + + s, err := jsonschema.FromType(rootType, []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ + func(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + _, isOwnField := annotations[jsonschema.TypePath(typ)] + if isOwnField { + ownFields[jsonschema.TypePath(typ)] = true + } + + refPath := getPath(typ) + shouldHandle := strings.HasPrefix(refPath, "github.com") + if !shouldHandle { + schemas[jsonschema.TypePath(typ)] = &s + return s + } + + a := annotations[refPath] + if a == nil { + a = map[string]annotation.Descriptor{} + } + + rootTypeAnnotation, ok := a["_"] + if ok { + assignAnnotation(&s, rootTypeAnnotation) + } + + for k, v := range s.Properties { + assignAnnotation(v, a[k]) + } + + schemas[jsonschema.TypePath(typ)] = &s + return s + }, + }) + if err != nil { + log.Fatal(err) + } + + nodes := buildNodes(s, schemas, ownFields) + err = buildMarkdown(nodes, outputPath, header) + if err != nil { + log.Fatal(err) + } + return nil +} + +func getPath(typ reflect.Type) string { + return typ.PkgPath() + "." + typ.Name() +} + +func assignAnnotation(s *jsonschema.Schema, a annotation.Descriptor) { + if a.Description != "" && a.Description != annotation.Placeholder { + s.Description = a.Description + } + if a.MarkdownDescription != "" { + s.MarkdownDescription = a.MarkdownDescription + } + if a.MarkdownExamples != "" { + s.Examples = []string{a.MarkdownExamples} + } +} diff --git a/bundle/docsgen/markdown.go b/bundle/docsgen/markdown.go new file mode 100644 index 000000000..b711aa0e8 --- /dev/null +++ b/bundle/docsgen/markdown.go @@ -0,0 +1,116 @@ +package main + +import ( + "fmt" + "log" + "os" + "strings" +) + +func buildMarkdown(nodes []rootNode, outputFile, header string) error { + m := newMardownRenderer() + m = m.PlainText(header) + for _, node := range nodes { + m = m.LF() + title := escapeBrackets(node.Title) + if node.TopLevel { + m = m.H2(title) + } else { + m = m.H3(title) + } + m = m.LF() + + if node.Type != "" { + m = m.PlainText(fmt.Sprintf("**`Type: %s`**", node.Type)) + m = m.LF() + } + m = m.PlainText(node.Description) + m = m.LF() + + if len(node.ObjectKeyAttributes) > 0 { + n := pickLastWord(node.Title) + n = removePluralForm(n) + m = m.CodeBlocks("yaml", fmt.Sprintf("%ss:\n <%s-name>:\n <%s-field-name>: <%s-field-value>", n, n, n, n)) + m = m.LF() + m = buildAttributeTable(m, node.ObjectKeyAttributes) + } else if len(node.ArrayItemAttributes) > 0 { + m = m.LF() + m = buildAttributeTable(m, node.ArrayItemAttributes) + } else if len(node.Attributes) > 0 { + m = m.LF() + m = buildAttributeTable(m, node.Attributes) + } + + if node.Example != "" { + m = m.LF() + m = m.PlainText("**Example**") + m = m.LF() + m = m.PlainText(node.Example) + } + } + + f, err := os.Create(outputFile) + if err != nil { + log.Fatal(err) + } + _, err = f.WriteString(m.String()) + if err != nil { + log.Fatal(err) + } + return f.Close() +} + +func pickLastWord(s string) string { + words := strings.Split(s, ".") + return words[len(words)-1] +} + +// Build a custom table which we use in Databricks website +func buildAttributeTable(m *markdownRenderer, attributes []attributeNode) *markdownRenderer { + m = m.LF() + m = m.PlainText(".. list-table::") + m = m.PlainText(" :header-rows: 1") + m = m.LF() + + m = m.PlainText(" * - Key") + m = m.PlainText(" - Type") + m = m.PlainText(" - Description") + m = m.LF() + + for _, a := range attributes { + m = m.PlainText(" * - " + fmt.Sprintf("`%s`", a.Title)) + m = m.PlainText(" - " + a.Type) + m = m.PlainText(" - " + formatDescription(a)) + m = m.LF() + } + return m +} + +func formatDescription(a attributeNode) string { + s := strings.ReplaceAll(a.Description, "\n", " ") + if a.Link != "" { + if strings.HasSuffix(s, ".") { + s += " " + } else if s != "" { + s += ". " + } + s += fmt.Sprintf("See [_](#%s).", cleanAnchor(a.Link)) + } + return s +} + +// Docs framework does not allow special characters in anchor links and strip them out by default +// We need to clean them up to make sure the links pass the validation +func cleanAnchor(s string) string { + s = strings.ReplaceAll(s, "<", "") + s = strings.ReplaceAll(s, ">", "") + s = strings.ReplaceAll(s, ".", "") + + return s +} + +func escapeBrackets(s string) string { + s = strings.ReplaceAll(s, "<", "\\<") + s = strings.ReplaceAll(s, ">", "\\>") + return s +} diff --git a/bundle/docsgen/markdown_test.go b/bundle/docsgen/markdown_test.go new file mode 100644 index 000000000..d4f32230e --- /dev/null +++ b/bundle/docsgen/markdown_test.go @@ -0,0 +1,42 @@ +package main + +import ( + "path/filepath" + "testing" + + "github.com/databricks/cli/internal/testutil" + "github.com/stretchr/testify/require" +) + +func TestBuildMarkdownAnchors(t *testing.T) { + nodes := []rootNode{ + { + Title: "some_field", + TopLevel: true, + Type: "Map", + Description: "This is a description", + Attributes: []attributeNode{ + { + Title: "my_attribute", + Type: "Map", + Description: "Desc with link", + Link: "some_field..my_attribute", + }, + }, + }, + { + Title: "some_field..my_attribute", + TopLevel: false, + Type: "Boolean", + Description: "Another description", + }, + } + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "output.md") + + err := buildMarkdown(nodes, path, "Header") + require.NoError(t, err) + + expected := testutil.ReadFile(t, "testdata/anchors.md") + testutil.AssertFileContents(t, path, expected) +} diff --git a/bundle/docsgen/nodes.go b/bundle/docsgen/nodes.go new file mode 100644 index 000000000..6645e9ccc --- /dev/null +++ b/bundle/docsgen/nodes.go @@ -0,0 +1,228 @@ +package main + +import ( + "sort" + "strings" + + "github.com/databricks/cli/libs/jsonschema" +) + +// rootNode is an intermediate representation of resolved JSON-schema item that is used to generate documentation +// Every schema node goes follows this conversion `JSON-schema -> rootNode -> markdown text` +type rootNode struct { + Title string + Description string + Attributes []attributeNode + Example string + ObjectKeyAttributes []attributeNode + ArrayItemAttributes []attributeNode + TopLevel bool + Type string +} + +type attributeNode struct { + Title string + Type string + Description string + Link string +} + +type rootProp struct { + // k is the name of the property + k string + // v is the corresponding json-schema node + v *jsonschema.Schema + // topLevel is true only for direct properties of the schema of root type (e.g. config.Root or config.Resources) + // Example: config.Root has . + topLevel bool + // circular indicates if property was added by recursive type, e.g. task.for_each_task.task.for_each_task + // These entries don't expand further and don't add any new nodes from their properties + circular bool +} + +const MapType = "Map" + +// buildNodes converts JSON-schema to a flat list of rootNode items that are then used to generate markdown documentation +// It recursively traverses the schema expanding the resulting list with new items for every properties of nodes `object` and `array` type +func buildNodes(s jsonschema.Schema, refs map[string]*jsonschema.Schema, ownFields map[string]bool) []rootNode { + rootProps := []rootProp{} + for k, v := range s.Properties { + rootProps = append(rootProps, rootProp{k, v, true, false}) + } + nodes := make([]rootNode, 0, len(rootProps)) + visited := make(map[string]bool) + + for i := 0; i < len(rootProps); i++ { + item := rootProps[i] + k := item.k + v := item.v + + if visited[k] { + continue + } + visited[k] = true + + v = resolveRefs(v, refs) + node := rootNode{ + Title: k, + Description: getDescription(v), + TopLevel: item.topLevel, + Example: getExample(v), + Type: getHumanReadableType(v.Type), + } + + hasProperties := len(v.Properties) > 0 + if hasProperties { + node.Attributes = getAttributes(v.Properties, refs, ownFields, k, item.circular) + } + + mapValueType := getMapValueType(v, refs) + if mapValueType != nil { + d := getDescription(mapValueType) + if d != "" { + node.Description = d + } + if node.Example == "" { + node.Example = getExample(mapValueType) + } + node.ObjectKeyAttributes = getAttributes(mapValueType.Properties, refs, ownFields, getMapKeyPrefix(k), item.circular) + } + + arrayItemType := resolveRefs(v.Items, refs) + if arrayItemType != nil { + node.ArrayItemAttributes = getAttributes(arrayItemType.Properties, refs, ownFields, k, item.circular) + } + + nodes = append(nodes, node) + + // Whether we should add new root props from the children of the current JSON-schema node to include their definitions to this document + shouldAddNewProps := !item.circular + if shouldAddNewProps { + newProps := []rootProp{} + // Adds node with definition for the properties. Example: + // bundle: + // prop-name: + if hasProperties { + newProps = append(newProps, extractNodes(k, v.Properties, refs, ownFields)...) + } + + // Adds node with definition for the type of array item. Example: + // permissions: + // - + if arrayItemType != nil { + newProps = append(newProps, extractNodes(k, arrayItemType.Properties, refs, ownFields)...) + } + // Adds node with definition for the type of the Map value. Example: + // targets: + // : + if mapValueType != nil { + newProps = append(newProps, extractNodes(getMapKeyPrefix(k), mapValueType.Properties, refs, ownFields)...) + } + + rootProps = append(rootProps, newProps...) + } + } + + sort.Slice(nodes, func(i, j int) bool { + return nodes[i].Title < nodes[j].Title + }) + return nodes +} + +func getMapValueType(v *jsonschema.Schema, refs map[string]*jsonschema.Schema) *jsonschema.Schema { + additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) + if ok { + return resolveRefs(additionalProps, refs) + } + return nil +} + +func getMapKeyPrefix(s string) string { + return s + "." +} + +func removePluralForm(s string) string { + if strings.HasSuffix(s, "s") { + return strings.TrimSuffix(s, "s") + } + return s +} + +func getHumanReadableType(t jsonschema.Type) string { + typesMapping := map[string]string{ + "string": "String", + "integer": "Integer", + "boolean": "Boolean", + "array": "Sequence", + "object": "Map", + } + return typesMapping[string(t)] +} + +func getAttributes(props, refs map[string]*jsonschema.Schema, ownFields map[string]bool, prefix string, circular bool) []attributeNode { + attributes := []attributeNode{} + for k, v := range props { + v = resolveRefs(v, refs) + typeString := getHumanReadableType(v.Type) + if typeString == "" { + typeString = "Any" + } + var reference string + if isReferenceType(v, refs, ownFields) && !circular { + reference = prefix + "." + k + } + attributes = append(attributes, attributeNode{ + Title: k, + Type: typeString, + Description: getDescription(v), + Link: reference, + }) + } + sort.Slice(attributes, func(i, j int) bool { + return attributes[i].Title < attributes[j].Title + }) + return attributes +} + +func getDescription(s *jsonschema.Schema) string { + if s.MarkdownDescription != "" { + return s.MarkdownDescription + } + return s.Description +} + +func shouldExtract(ref string, ownFields map[string]bool) bool { + if i := strings.Index(ref, "github.com"); i >= 0 { + ref = ref[i:] + } + _, isCustomField := ownFields[ref] + return isCustomField +} + +// extractNodes returns a list of rootProp items for all properties of the json-schema node that should be extracted based on context +// E.g. we extract all propert +func extractNodes(prefix string, props, refs map[string]*jsonschema.Schema, ownFields map[string]bool) []rootProp { + nodes := []rootProp{} + for k, v := range props { + if v.Reference != nil && !shouldExtract(*v.Reference, ownFields) { + continue + } + v = resolveRefs(v, refs) + if v.Type == "object" || v.Type == "array" { + nodes = append(nodes, rootProp{prefix + "." + k, v, false, isCycleField(k)}) + } + } + return nodes +} + +func isCycleField(field string) bool { + return field == "for_each_task" +} + +func getExample(v *jsonschema.Schema) string { + examples := getExamples(v.Examples) + if len(examples) == 0 { + return "" + } + return examples[0] +} diff --git a/bundle/docsgen/nodes_test.go b/bundle/docsgen/nodes_test.go new file mode 100644 index 000000000..bdb2ce9db --- /dev/null +++ b/bundle/docsgen/nodes_test.go @@ -0,0 +1,120 @@ +package main + +import ( + "testing" + + "github.com/databricks/cli/libs/jsonschema" + "github.com/stretchr/testify/assert" +) + +func TestBuildNodes_ChildExpansion(t *testing.T) { + tests := []struct { + name string + schema jsonschema.Schema + refs map[string]*jsonschema.Schema + ownFields map[string]bool + wantNodes []rootNode + }{ + { + name: "array expansion", + schema: jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "list": { + Type: "array", + Items: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "listSub": {Reference: strPtr("#/$defs/github.com/listSub")}, + }, + }, + }, + }, + }, + refs: map[string]*jsonschema.Schema{ + "github.com/listSub": {Type: "array", Items: &jsonschema.Schema{Type: "object", Properties: map[string]*jsonschema.Schema{"subField": {Type: "string"}}}}, + }, + ownFields: map[string]bool{"github.com/listSub": true}, + wantNodes: []rootNode{ + { + Title: "list", + TopLevel: true, + Type: "Sequence", + ArrayItemAttributes: []attributeNode{ + {Title: "listSub", Type: "Sequence", Link: "list.listSub"}, + }, + }, + { + Title: "list.listSub", + Type: "Sequence", + ArrayItemAttributes: []attributeNode{ + {Title: "subField", Type: "String"}, + }, + }, + }, + }, + { + name: "map expansion", + schema: jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "myMap": { + Type: "object", + AdditionalProperties: &jsonschema.Schema{ + Reference: strPtr("#/$defs/github.com/myMap"), + Properties: map[string]*jsonschema.Schema{ + "mapSub": {Type: "object", Reference: strPtr("#/$defs/github.com/mapSub")}, + }, + }, + }, + }, + }, + refs: map[string]*jsonschema.Schema{ + "github.com/myMap": { + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "mapSub": {Type: "boolean", Reference: strPtr("#/$defs/github.com/mapSub")}, + }, + }, + "github.com/mapSub": { + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "deepSub": {Type: "boolean"}, + }, + }, + }, + ownFields: map[string]bool{ + "github.com/myMap": true, + "github.com/mapSub": true, + }, + wantNodes: []rootNode{ + { + Title: "myMap", + TopLevel: true, + Type: "Map", + ObjectKeyAttributes: []attributeNode{ + {Title: "mapSub", Type: "Map", Link: "myMap..mapSub"}, + }, + }, + { + Title: "myMap..mapSub", + Type: "Map", + Attributes: []attributeNode{ + {Title: "deepSub", Type: "Boolean"}, + }, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := buildNodes(tt.schema, tt.refs, tt.ownFields) + assert.Equal(t, tt.wantNodes, got) + }) + } +} + +func strPtr(s string) *string { + return &s +} diff --git a/bundle/docsgen/output/reference.md b/bundle/docsgen/output/reference.md new file mode 100644 index 000000000..0de3c6f2b --- /dev/null +++ b/bundle/docsgen/output/reference.md @@ -0,0 +1,1337 @@ +--- +description: Configuration reference for databricks.yml +--- + + + +# Configuration reference + +This article provides reference for keys supported by configuration (YAML). See [_](/dev-tools/bundles/index.md). + +For complete bundle examples, see [_](/dev-tools/bundles/resource-examples.md) and the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples). + + +## artifacts + +**`Type: Map`** + +Defines the attributes to build artifacts, where each key is the name of the artifact, and the value is a Map that defines the artifact build settings. For information about the `artifacts` mapping, see [_](/dev-tools/bundles/settings.md#artifacts). + +Artifact settings defined in the top level of the bundle configuration can be overridden in the `targets` mapping. See [_](/dev-tools/bundles/artifact-overrides.md). + +```yaml +artifacts: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `build` + - String + - An optional set of non-default build commands to run locally before deployment. + + * - `executable` + - String + - The executable type. Valid values are `bash`, `sh`, and `cmd`. + + * - `files` + - Sequence + - The source files for the artifact. See [_](#artifactsnamefiles). + + * - `path` + - String + - The location where the built artifact will be saved. + + * - `type` + - String + - Required. The type of the artifact. Valid values are `whl`. + + +**Example** + +```yaml +artifacts: + default: + type: whl + build: poetry build + path: . +``` + +### artifacts.\.files + +**`Type: Sequence`** + +The source files for the artifact. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `source` + - String + - Required. The path of the files used to build the artifact. + + +## bundle + +**`Type: Map`** + +The bundle attributes when deploying to this target, + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `cluster_id` + - String + - The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). + + * - `compute_id` + - String + - + + * - `databricks_cli_version` + - String + - The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). + + * - `deployment` + - Map + - The definition of the bundle deployment. For supported attributes see [_](/dev-tools/bundles/deployment-modes.md). See [_](#bundledeployment). + + * - `git` + - Map + - The Git version control details that are associated with your bundle. For supported attributes see [_](/dev-tools/bundles/settings.md#git). See [_](#bundlegit). + + * - `name` + - String + - The name of the bundle. + + * - `uuid` + - String + - Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command). + + +### bundle.deployment + +**`Type: Map`** + +The definition of the bundle deployment. For supported attributes see [_](/dev-tools/bundles/deployment-modes.md). + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `fail_on_active_runs` + - Boolean + - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. + + * - `lock` + - Map + - The deployment lock attributes. See [_](#bundledeploymentlock). + + +### bundle.deployment.lock + +**`Type: Map`** + +The deployment lock attributes. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `enabled` + - Boolean + - Whether this lock is enabled. + + * - `force` + - Boolean + - Whether to force this lock if it is enabled. + + +### bundle.git + +**`Type: Map`** + +The Git version control details that are associated with your bundle. For supported attributes see [_](/dev-tools/bundles/settings.md#git). + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `branch` + - String + - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). + + * - `origin_url` + - String + - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). + + +## experimental + +**`Type: Map`** + +Defines attributes for experimental features. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `pydabs` + - Map + - The PyDABs configuration. See [_](#experimentalpydabs). + + * - `python` + - Map + - Configures loading of Python code defined with 'databricks-bundles' package. See [_](#experimentalpython). + + * - `python_wheel_wrapper` + - Boolean + - Whether to use a Python wheel wrapper. + + * - `scripts` + - Map + - The commands to run. + + * - `use_legacy_run_as` + - Boolean + - Whether to use the legacy run_as behavior. + + +### experimental.pydabs + +**`Type: Map`** + +The PyDABs configuration. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `enabled` + - Boolean + - Whether or not PyDABs (Private Preview) is enabled + + * - `import` + - Sequence + - The PyDABs project to import to discover resources, resource generator and mutators + + * - `venv_path` + - String + - The Python virtual environment path + + +### experimental.python + +**`Type: Map`** + +Configures loading of Python code defined with 'databricks-bundles' package. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `mutators` + - Sequence + - Mutators contains a list of fully qualified function paths to mutator functions. Example: ["my_project.mutators:add_default_cluster"] + + * - `resources` + - Sequence + - Resources contains a list of fully qualified function paths to load resources defined in Python code. Example: ["my_project.resources:load_resources"] + + * - `venv_path` + - String + - VEnvPath is path to the virtual environment. If enabled, Python code will execute within this environment. If disabled, it defaults to using the Python interpreter available in the current shell. + + +## include + +**`Type: Sequence`** + +Specifies a list of path globs that contain configuration files to include within the bundle. See [_](/dev-tools/bundles/settings.md#include) + + +## permissions + +**`Type: Sequence`** + +A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity. + +See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `group_name` + - String + - The name of the group that has the permission set in level. + + * - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + + * - `user_name` + - String + - The name of the user that has the permission set in level. + + +**Example** + +```yaml +permissions: + - level: CAN_VIEW + group_name: test-group + - level: CAN_MANAGE + user_name: someone@example.com + - level: CAN_RUN + service_principal_name: 123456-abcdef +``` + +## presets + +**`Type: Map`** + +Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.md#presets). + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `jobs_max_concurrent_runs` + - Integer + - The maximum concurrent runs for a job. + + * - `name_prefix` + - String + - The prefix for job runs of the bundle. + + * - `pipelines_development` + - Boolean + - Whether pipeline deployments should be locked in development mode. + + * - `source_linked_deployment` + - Boolean + - Whether to link the deployment to the bundle source. + + * - `tags` + - Map + - The tags for the bundle deployment. + + * - `trigger_pause_status` + - String + - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. + + +## resources + +**`Type: Map`** + +A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. For more information about supported resources, and resource definition reference, see [_](/dev-tools/bundles/resources.md). + +```yaml +resources: + : + : + : +``` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `apps` + - Map + - + + * - `clusters` + - Map + - The cluster definitions for the bundle, where each key is the name of a cluster. See [_](/dev-tools/bundles/resources.md#clusters) + + * - `dashboards` + - Map + - The dashboard definitions for the bundle, where each key is the name of the dashboard. See [_](/dev-tools/bundles/resources.md#dashboards) + + * - `experiments` + - Map + - The experiment definitions for the bundle, where each key is the name of the experiment. See [_](/dev-tools/bundles/resources.md#experiments) + + * - `jobs` + - Map + - The job definitions for the bundle, where each key is the name of the job. See [_](/dev-tools/bundles/resources.md#jobs) + + * - `model_serving_endpoints` + - Map + - The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. See [_](/dev-tools/bundles/resources.md#model_serving_endpoints) + + * - `models` + - Map + - The model definitions for the bundle, where each key is the name of the model. See [_](/dev-tools/bundles/resources.md#models) + + * - `pipelines` + - Map + - The pipeline definitions for the bundle, where each key is the name of the pipeline. See [_](/dev-tools/bundles/resources.md#pipelines) + + * - `quality_monitors` + - Map + - The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [_](/dev-tools/bundles/resources.md#quality_monitors) + + * - `registered_models` + - Map + - The registered model definitions for the bundle, where each key is the name of the registered model. See [_](/dev-tools/bundles/resources.md#registered_models) + + * - `schemas` + - Map + - The schema definitions for the bundle, where each key is the name of the schema. See [_](/dev-tools/bundles/resources.md#schemas) + + * - `volumes` + - Map + - The volume definitions for the bundle, where each key is the name of the volume. See [_](/dev-tools/bundles/resources.md#volumes) + + +## run_as + +**`Type: Map`** + +The identity to use when running workflows. See [_](/dev-tools/bundles/run-as.md). + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `service_principal_name` + - String + - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + + * - `user_name` + - String + - The email of an active workspace user. Non-admin users can only set this field to their own email. + + +## sync + +**`Type: Map`** + +The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/settings.md#sync). + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `exclude` + - Sequence + - A list of files or folders to exclude from the bundle. + + * - `include` + - Sequence + - A list of files or folders to include in the bundle. + + * - `paths` + - Sequence + - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. + + +## targets + +**`Type: Map`** + +Defines deployment targets for the bundle. See [_](/dev-tools/bundles/settings.md#targets) + +```yaml +targets: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `artifacts` + - Map + - The artifacts to include in the target deployment. See [_](#targetsnameartifacts). + + * - `bundle` + - Map + - The bundle attributes when deploying to this target. See [_](#targetsnamebundle). + + * - `cluster_id` + - String + - The ID of the cluster to use for this target. + + * - `compute_id` + - String + - Deprecated. The ID of the compute to use for this target. + + * - `default` + - Boolean + - Whether this target is the default target. + + * - `git` + - Map + - The Git version control settings for the target. See [_](#targetsnamegit). + + * - `mode` + - String + - The deployment mode for the target. Valid values are `development` or `production`. See [_](/dev-tools/bundles/deployment-modes.md). + + * - `permissions` + - Sequence + - The permissions for deploying and running the bundle in the target. See [_](#targetsnamepermissions). + + * - `presets` + - Map + - The deployment presets for the target. See [_](#targetsnamepresets). + + * - `resources` + - Map + - The resource definitions for the target. See [_](#targetsnameresources). + + * - `run_as` + - Map + - The identity to use to run the bundle, see [_](/dev-tools/bundles/run-as.md). See [_](#targetsnamerun_as). + + * - `sync` + - Map + - The local paths to sync to the target workspace when a bundle is run or deployed. See [_](#targetsnamesync). + + * - `variables` + - Map + - The custom variable definitions for the target. See [_](#targetsnamevariables). + + * - `workspace` + - Map + - The Databricks workspace for the target. See [_](#targetsnameworkspace). + + +### targets.\.artifacts + +**`Type: Map`** + +The artifacts to include in the target deployment. + +```yaml +artifacts: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `build` + - String + - An optional set of non-default build commands to run locally before deployment. + + * - `executable` + - String + - The executable type. Valid values are `bash`, `sh`, and `cmd`. + + * - `files` + - Sequence + - The source files for the artifact. See [_](#targetsnameartifactsnamefiles). + + * - `path` + - String + - The location where the built artifact will be saved. + + * - `type` + - String + - Required. The type of the artifact. Valid values are `whl`. + + +### targets.\.artifacts.\.files + +**`Type: Sequence`** + +The source files for the artifact. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `source` + - String + - Required. The path of the files used to build the artifact. + + +### targets.\.bundle + +**`Type: Map`** + +The bundle attributes when deploying to this target. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `cluster_id` + - String + - The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). + + * - `compute_id` + - String + - + + * - `databricks_cli_version` + - String + - The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version). + + * - `deployment` + - Map + - The definition of the bundle deployment. For supported attributes see [_](/dev-tools/bundles/deployment-modes.md). See [_](#targetsnamebundledeployment). + + * - `git` + - Map + - The Git version control details that are associated with your bundle. For supported attributes see [_](/dev-tools/bundles/settings.md#git). See [_](#targetsnamebundlegit). + + * - `name` + - String + - The name of the bundle. + + * - `uuid` + - String + - Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command). + + +### targets.\.bundle.deployment + +**`Type: Map`** + +The definition of the bundle deployment. For supported attributes see [_](/dev-tools/bundles/deployment-modes.md). + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `fail_on_active_runs` + - Boolean + - Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted. + + * - `lock` + - Map + - The deployment lock attributes. See [_](#targetsnamebundledeploymentlock). + + +### targets.\.bundle.deployment.lock + +**`Type: Map`** + +The deployment lock attributes. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `enabled` + - Boolean + - Whether this lock is enabled. + + * - `force` + - Boolean + - Whether to force this lock if it is enabled. + + +### targets.\.bundle.git + +**`Type: Map`** + +The Git version control details that are associated with your bundle. For supported attributes see [_](/dev-tools/bundles/settings.md#git). + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `branch` + - String + - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). + + * - `origin_url` + - String + - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). + + +### targets.\.git + +**`Type: Map`** + +The Git version control settings for the target. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `branch` + - String + - The Git branch name. See [_](/dev-tools/bundles/settings.md#git). + + * - `origin_url` + - String + - The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git). + + +### targets.\.permissions + +**`Type: Sequence`** + +The permissions for deploying and running the bundle in the target. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `group_name` + - String + - The name of the group that has the permission set in level. + + * - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + + * - `user_name` + - String + - The name of the user that has the permission set in level. + + +### targets.\.presets + +**`Type: Map`** + +The deployment presets for the target. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `jobs_max_concurrent_runs` + - Integer + - The maximum concurrent runs for a job. + + * - `name_prefix` + - String + - The prefix for job runs of the bundle. + + * - `pipelines_development` + - Boolean + - Whether pipeline deployments should be locked in development mode. + + * - `source_linked_deployment` + - Boolean + - Whether to link the deployment to the bundle source. + + * - `tags` + - Map + - The tags for the bundle deployment. + + * - `trigger_pause_status` + - String + - A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED. + + +### targets.\.resources + +**`Type: Map`** + +The resource definitions for the target. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `apps` + - Map + - + + * - `clusters` + - Map + - The cluster definitions for the bundle, where each key is the name of a cluster. See [_](/dev-tools/bundles/resources.md#clusters) + + * - `dashboards` + - Map + - The dashboard definitions for the bundle, where each key is the name of the dashboard. See [_](/dev-tools/bundles/resources.md#dashboards) + + * - `experiments` + - Map + - The experiment definitions for the bundle, where each key is the name of the experiment. See [_](/dev-tools/bundles/resources.md#experiments) + + * - `jobs` + - Map + - The job definitions for the bundle, where each key is the name of the job. See [_](/dev-tools/bundles/resources.md#jobs) + + * - `model_serving_endpoints` + - Map + - The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. See [_](/dev-tools/bundles/resources.md#model_serving_endpoints) + + * - `models` + - Map + - The model definitions for the bundle, where each key is the name of the model. See [_](/dev-tools/bundles/resources.md#models) + + * - `pipelines` + - Map + - The pipeline definitions for the bundle, where each key is the name of the pipeline. See [_](/dev-tools/bundles/resources.md#pipelines) + + * - `quality_monitors` + - Map + - The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [_](/dev-tools/bundles/resources.md#quality_monitors) + + * - `registered_models` + - Map + - The registered model definitions for the bundle, where each key is the name of the registered model. See [_](/dev-tools/bundles/resources.md#registered_models) + + * - `schemas` + - Map + - The schema definitions for the bundle, where each key is the name of the schema. See [_](/dev-tools/bundles/resources.md#schemas) + + * - `volumes` + - Map + - The volume definitions for the bundle, where each key is the name of the volume. See [_](/dev-tools/bundles/resources.md#volumes) + + +### targets.\.run_as + +**`Type: Map`** + +The identity to use to run the bundle, see [_](/dev-tools/bundles/run-as.md). + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `service_principal_name` + - String + - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + + * - `user_name` + - String + - The email of an active workspace user. Non-admin users can only set this field to their own email. + + +### targets.\.sync + +**`Type: Map`** + +The local paths to sync to the target workspace when a bundle is run or deployed. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `exclude` + - Sequence + - A list of files or folders to exclude from the bundle. + + * - `include` + - Sequence + - A list of files or folders to include in the bundle. + + * - `paths` + - Sequence + - The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed. + + +### targets.\.variables + +**`Type: Map`** + +The custom variable definitions for the target. + +```yaml +variables: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `default` + - Any + - + + * - `description` + - String + - The description of the variable. + + * - `lookup` + - Map + - The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. See [_](#targetsnamevariablesnamelookup). + + * - `type` + - String + - The type of the variable. + + +### targets.\.variables.\.lookup + +**`Type: Map`** + +The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `alert` + - String + - + + * - `cluster` + - String + - + + * - `cluster_policy` + - String + - + + * - `dashboard` + - String + - + + * - `instance_pool` + - String + - + + * - `job` + - String + - + + * - `metastore` + - String + - + + * - `notification_destination` + - String + - + + * - `pipeline` + - String + - + + * - `query` + - String + - + + * - `service_principal` + - String + - + + * - `warehouse` + - String + - + + +### targets.\.workspace + +**`Type: Map`** + +The Databricks workspace for the target. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `artifact_path` + - String + - The artifact path to use within the workspace for both deployments and workflow runs + + * - `auth_type` + - String + - The authentication type. + + * - `azure_client_id` + - String + - The Azure client ID + + * - `azure_environment` + - String + - The Azure environment + + * - `azure_login_app_id` + - String + - The Azure login app ID + + * - `azure_tenant_id` + - String + - The Azure tenant ID + + * - `azure_use_msi` + - Boolean + - Whether to use MSI for Azure + + * - `azure_workspace_resource_id` + - String + - The Azure workspace resource ID + + * - `client_id` + - String + - The client ID for the workspace + + * - `file_path` + - String + - The file path to use within the workspace for both deployments and workflow runs + + * - `google_service_account` + - String + - The Google service account name + + * - `host` + - String + - The Databricks workspace host URL + + * - `profile` + - String + - The Databricks workspace profile name + + * - `resource_path` + - String + - The workspace resource path + + * - `root_path` + - String + - The Databricks workspace root path + + * - `state_path` + - String + - The workspace state path + + +## variables + +**`Type: Map`** + +Defines a custom variable for the bundle. See [_](/dev-tools/bundles/settings.md#variables). + +```yaml +variables: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `default` + - Any + - + + * - `description` + - String + - The description of the variable + + * - `lookup` + - Map + - The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID. See [_](#variablesnamelookup). + + * - `type` + - String + - The type of the variable. + + +### variables.\.lookup + +**`Type: Map`** + +The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `alert` + - String + - + + * - `cluster` + - String + - + + * - `cluster_policy` + - String + - + + * - `dashboard` + - String + - + + * - `instance_pool` + - String + - + + * - `job` + - String + - + + * - `metastore` + - String + - + + * - `notification_destination` + - String + - + + * - `pipeline` + - String + - + + * - `query` + - String + - + + * - `service_principal` + - String + - + + * - `warehouse` + - String + - + + +## workspace + +**`Type: Map`** + +Defines the Databricks workspace for the bundle. See [_](/dev-tools/bundles/settings.md#workspace). + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `artifact_path` + - String + - The artifact path to use within the workspace for both deployments and workflow runs + + * - `auth_type` + - String + - The authentication type. + + * - `azure_client_id` + - String + - The Azure client ID + + * - `azure_environment` + - String + - The Azure environment + + * - `azure_login_app_id` + - String + - The Azure login app ID + + * - `azure_tenant_id` + - String + - The Azure tenant ID + + * - `azure_use_msi` + - Boolean + - Whether to use MSI for Azure + + * - `azure_workspace_resource_id` + - String + - The Azure workspace resource ID + + * - `client_id` + - String + - The client ID for the workspace + + * - `file_path` + - String + - The file path to use within the workspace for both deployments and workflow runs + + * - `google_service_account` + - String + - The Google service account name + + * - `host` + - String + - The Databricks workspace host URL + + * - `profile` + - String + - The Databricks workspace profile name + + * - `resource_path` + - String + - The workspace resource path + + * - `root_path` + - String + - The Databricks workspace root path + + * - `state_path` + - String + - The workspace state path + \ No newline at end of file diff --git a/bundle/docsgen/output/resources.md b/bundle/docsgen/output/resources.md new file mode 100644 index 000000000..e1bbc9672 --- /dev/null +++ b/bundle/docsgen/output/resources.md @@ -0,0 +1,8232 @@ +--- +description: Learn about resources supported by Databricks Asset Bundles and how to configure them. +--- + + + +# resources + + allows you to specify information about the resources used by the bundle in the `resources` mapping in the bundle configuration. See [resources mapping](/dev-tools/bundles/settings.md#resources) and [resources key reference](/dev-tools/bundles/reference.md#resources). + +This article outlines supported resource types for bundles and provides details and an example for each supported type. For additional examples, see [_](/dev-tools/bundles/resource-examples.md). + +## Supported resources + +The following table lists supported resource types for bundles. Some resources can be created by defining them in a bundle and deploying the bundle, and some resources only support referencing an existing resource to include in the bundle. + +Resources are defined using the corresponding [Databricks REST API](/api/workspace/introduction) object's create operation request payload, where the object's supported fields, expressed as YAML, are the resource's supported properties. Links to documentation for each resource's corresponding payloads are listed in the table. + +.. tip:: The `databricks bundle validate` command returns warnings if unknown resource properties are found in bundle configuration files. + + +.. list-table:: + :header-rows: 1 + + * - Resource + - Create support + - Corresponding REST API object + + * - [cluster](#cluster) + - ✓ + - [Cluster object](/api/workspace/clusters/create) + + * - [dashboard](#dashboard) + - + - [Dashboard object](/api/workspace/lakeview/create) + + * - [experiment](#experiment) + - ✓ + - [Experiment object](/api/workspace/experiments/createexperiment) + + * - [job](#job) + - ✓ + - [Job object](/api/workspace/jobs/create) + + * - [model (legacy)](#model-legacy) + - ✓ + - [Model (legacy) object](/api/workspace/modelregistry/createmodel) + + * - [model_serving_endpoint](#model-serving-endpoint) + - ✓ + - [Model serving endpoint object](/api/workspace/servingendpoints/create) + + * - [pipeline](#pipeline) + - ✓ + - [Pipeline object](/api/workspace/pipelines/create) + + * - [quality_monitor](#quality-monitor) + - ✓ + - [Quality monitor object](/api/workspace/qualitymonitors/create) + + * - [registered_model](#registered-model) () + - ✓ + - [Registered model object](/api/workspace/registeredmodels/create) + + * - [schema](#schema) () + - ✓ + - [Schema object](/api/workspace/schemas/create) + + * - [volume](#volume) () + - ✓ + - [Volume object](/api/workspace/volumes/create) + + +## apps + +**`Type: Map`** + + + +```yaml +apps: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `active_deployment` + - Map + - See [_](#appsnameactive_deployment). + + * - `app_status` + - Map + - See [_](#appsnameapp_status). + + * - `compute_status` + - Map + - See [_](#appsnamecompute_status). + + * - `config` + - Map + - + + * - `create_time` + - String + - + + * - `creator` + - String + - + + * - `default_source_code_path` + - String + - + + * - `description` + - String + - + + * - `name` + - String + - + + * - `pending_deployment` + - Map + - See [_](#appsnamepending_deployment). + + * - `permissions` + - Sequence + - See [_](#appsnamepermissions). + + * - `resources` + - Sequence + - See [_](#appsnameresources). + + * - `service_principal_client_id` + - String + - + + * - `service_principal_id` + - Integer + - + + * - `service_principal_name` + - String + - + + * - `source_code_path` + - String + - + + * - `update_time` + - String + - + + * - `updater` + - String + - + + * - `url` + - String + - + + +### apps.\.active_deployment + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `create_time` + - String + - + + * - `creator` + - String + - + + * - `deployment_artifacts` + - Map + - See [_](#appsnameactive_deploymentdeployment_artifacts). + + * - `deployment_id` + - String + - + + * - `mode` + - String + - + + * - `source_code_path` + - String + - + + * - `status` + - Map + - See [_](#appsnameactive_deploymentstatus). + + * - `update_time` + - String + - + + +### apps.\.active_deployment.deployment_artifacts + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `source_code_path` + - String + - + + +### apps.\.active_deployment.status + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `message` + - String + - + + * - `state` + - String + - + + +### apps.\.app_status + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `message` + - String + - + + * - `state` + - String + - + + +### apps.\.compute_status + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `message` + - String + - + + * - `state` + - String + - State of the app compute. + + +### apps.\.pending_deployment + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `create_time` + - String + - + + * - `creator` + - String + - + + * - `deployment_artifacts` + - Map + - See [_](#appsnamepending_deploymentdeployment_artifacts). + + * - `deployment_id` + - String + - + + * - `mode` + - String + - + + * - `source_code_path` + - String + - + + * - `status` + - Map + - See [_](#appsnamepending_deploymentstatus). + + * - `update_time` + - String + - + + +### apps.\.pending_deployment.deployment_artifacts + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `source_code_path` + - String + - + + +### apps.\.pending_deployment.status + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `message` + - String + - + + * - `state` + - String + - + + +### apps.\.permissions + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `group_name` + - String + - The name of the group that has the permission set in level. + + * - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + + * - `user_name` + - String + - The name of the user that has the permission set in level. + + +### apps.\.resources + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `description` + - String + - Description of the App Resource. + + * - `job` + - Map + - See [_](#appsnameresourcesjob). + + * - `name` + - String + - Name of the App Resource. + + * - `secret` + - Map + - See [_](#appsnameresourcessecret). + + * - `serving_endpoint` + - Map + - See [_](#appsnameresourcesserving_endpoint). + + * - `sql_warehouse` + - Map + - See [_](#appsnameresourcessql_warehouse). + + +### apps.\.resources.job + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + * - `permission` + - String + - + + +### apps.\.resources.secret + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `key` + - String + - + + * - `permission` + - String + - Permission to grant on the secret scope. Supported permissions are: "READ", "WRITE", "MANAGE". + + * - `scope` + - String + - + + +### apps.\.resources.serving_endpoint + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `name` + - String + - + + * - `permission` + - String + - + + +### apps.\.resources.sql_warehouse + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + * - `permission` + - String + - + + +## clusters + +**`Type: Map`** + +The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). + +```yaml +clusters: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `apply_policy_default_values` + - Boolean + - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. + + * - `autoscale` + - Map + - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [_](#clustersnameautoscale). + + * - `autotermination_minutes` + - Integer + - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. + + * - `aws_attributes` + - Map + - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [_](#clustersnameaws_attributes). + + * - `azure_attributes` + - Map + - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [_](#clustersnameazure_attributes). + + * - `cluster_log_conf` + - Map + - The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [_](#clustersnamecluster_log_conf). + + * - `cluster_name` + - String + - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. + + * - `custom_tags` + - Map + - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + + * - `data_security_mode` + - String + - Data security mode decides what data governance model to use when accessing data from a cluster. The following modes can only be used with `kind`. * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. The following modes can be used regardless of `kind`. * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. + + * - `docker_image` + - Map + - See [_](#clustersnamedocker_image). + + * - `driver_instance_pool_id` + - String + - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. + + * - `driver_node_type_id` + - String + - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + + * - `enable_elastic_disk` + - Boolean + - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. + + * - `enable_local_disk_encryption` + - Boolean + - Whether to enable LUKS on cluster VMs' local disks + + * - `gcp_attributes` + - Map + - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [_](#clustersnamegcp_attributes). + + * - `init_scripts` + - Sequence + - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [_](#clustersnameinit_scripts). + + * - `instance_pool_id` + - String + - The optional ID of the instance pool to which the cluster belongs. + + * - `is_single_node` + - Boolean + - This field can only be used with `kind`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` + + * - `kind` + - String + - + + * - `node_type_id` + - String + - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. + + * - `num_workers` + - Integer + - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. + + * - `permissions` + - Sequence + - See [_](#clustersnamepermissions). + + * - `policy_id` + - String + - The ID of the cluster policy used to create the cluster if applicable. + + * - `runtime_engine` + - String + - Determines the cluster's runtime engine, either standard or Photon. This field is not compatible with legacy `spark_version` values that contain `-photon-`. Remove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`. If left unspecified, the runtime engine defaults to standard unless the spark_version contains -photon-, in which case Photon will be used. + + * - `single_user_name` + - String + - Single user name if data_security_mode is `SINGLE_USER` + + * - `spark_conf` + - Map + - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + + * - `spark_env_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + + * - `spark_version` + - String + - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. + + * - `ssh_public_keys` + - Sequence + - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. + + * - `use_ml_runtime` + - Boolean + - This field can only be used with `kind`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. + + * - `workload_type` + - Map + - See [_](#clustersnameworkload_type). + + +**Example** + +The following example creates a cluster named `my_cluster` and sets that as the cluster to use to run the notebook in `my_job`: + +```yaml +bundle: + name: clusters + +resources: + clusters: + my_cluster: + num_workers: 2 + node_type_id: "i3.xlarge" + autoscale: + min_workers: 2 + max_workers: 7 + spark_version: "13.3.x-scala2.12" + spark_conf: + "spark.executor.memory": "2g" + + jobs: + my_job: + tasks: + - task_key: test_task + notebook_task: + notebook_path: "./src/my_notebook.py" +``` + +### clusters.\.autoscale + +**`Type: Map`** + +Parameters needed in order to automatically scale clusters up and down based on load. +Note: autoscaling works best with DB runtime versions 3.0 or later. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `max_workers` + - Integer + - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. + + * - `min_workers` + - Integer + - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. + + +### clusters.\.aws_attributes + +**`Type: Map`** + +Attributes related to clusters running on Amazon Web Services. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + + * - `ebs_volume_count` + - Integer + - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. + + * - `ebs_volume_iops` + - Integer + - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - `ebs_volume_size` + - Integer + - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. + + * - `ebs_volume_throughput` + - Integer + - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - `ebs_volume_type` + - String + - The type of EBS volumes that will be launched with this cluster. + + * - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - `instance_profile_arn` + - String + - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. + + * - `spot_bid_price_percent` + - Integer + - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + + * - `zone_id` + - String + - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. + + +### clusters.\.azure_attributes + +**`Type: Map`** + +Attributes related to clusters running on Microsoft Azure. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero (which only happens on pool clusters), this availability type will be used for the entire cluster. + + * - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - `log_analytics_info` + - Map + - Defines values necessary to configure and run Azure Log Analytics agent. See [_](#clustersnameazure_attributeslog_analytics_info). + + * - `spot_bid_max_price` + - Any + - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + + +### clusters.\.azure_attributes.log_analytics_info + +**`Type: Map`** + +Defines values necessary to configure and run Azure Log Analytics agent + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `log_analytics_primary_key` + - String + - + + * - `log_analytics_workspace_id` + - String + - + + +### clusters.\.cluster_log_conf + +**`Type: Map`** + +The configuration for delivering spark logs to a long-term storage destination. +Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified +for one cluster. If the conf is given, the logs will be delivered to the destination every +`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while +the destination of executor logs is `$destination/$clusterId/executor`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `dbfs` + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [_](#clustersnamecluster_log_confdbfs). + + * - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [_](#clustersnamecluster_log_confs3). + + +### clusters.\.cluster_log_conf.dbfs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - dbfs destination, e.g. `dbfs:/my/path` + + +### clusters.\.cluster_log_conf.s3 + +**`Type: Map`** + +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `canned_acl` + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + + * - `destination` + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + + * - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + + * - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + + * - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + * - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + + * - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + +### clusters.\.docker_image + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `basic_auth` + - Map + - See [_](#clustersnamedocker_imagebasic_auth). + + * - `url` + - String + - URL of the docker image. + + +### clusters.\.docker_image.basic_auth + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `password` + - String + - Password of the user + + * - `username` + - String + - Name of the user + + +### clusters.\.gcp_attributes + +**`Type: Map`** + +Attributes related to clusters running on Google Cloud Platform. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. + + * - `boot_disk_size` + - Integer + - boot disk size in GB + + * - `google_service_account` + - String + - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. + + * - `local_ssd_count` + - Integer + - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + + * - `use_preemptible_executors` + - Boolean + - This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. + + * - `zone_id` + - String + - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + + +### clusters.\.init_scripts + +**`Type: Sequence`** + +The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `abfss` + - Map + - destination needs to be provided. e.g. `{ "abfss" : { "destination" : "abfss://@.dfs.core.windows.net/" } }. See [_](#clustersnameinit_scriptsabfss). + + * - `dbfs` + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [_](#clustersnameinit_scriptsdbfs). + + * - `file` + - Map + - destination needs to be provided. e.g. `{ "file" : { "destination" : "file:/my/local/file.sh" } }`. See [_](#clustersnameinit_scriptsfile). + + * - `gcs` + - Map + - destination needs to be provided. e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [_](#clustersnameinit_scriptsgcs). + + * - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [_](#clustersnameinit_scriptss3). + + * - `volumes` + - Map + - destination needs to be provided. e.g. `{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }`. See [_](#clustersnameinit_scriptsvolumes). + + * - `workspace` + - Map + - destination needs to be provided. e.g. `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }`. See [_](#clustersnameinit_scriptsworkspace). + + +### clusters.\.init_scripts.abfss + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "abfss" : { "destination" : "abfss://@.dfs.core.windows.net/" } } + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. + + +### clusters.\.init_scripts.dbfs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - dbfs destination, e.g. `dbfs:/my/path` + + +### clusters.\.init_scripts.file + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "file" : { "destination" : "file:/my/local/file.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - local file destination, e.g. `file:/my/local/file.sh` + + +### clusters.\.init_scripts.gcs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` + + +### clusters.\.init_scripts.s3 + +**`Type: Map`** + +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `canned_acl` + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + + * - `destination` + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + + * - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + + * - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + + * - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + * - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + + * - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + +### clusters.\.init_scripts.volumes + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh` + + +### clusters.\.init_scripts.workspace + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + + +### clusters.\.permissions + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `group_name` + - String + - The name of the group that has the permission set in level. + + * - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + + * - `user_name` + - String + - The name of the user that has the permission set in level. + + +### clusters.\.workload_type + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `clients` + - Map + - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [_](#clustersnameworkload_typeclients). + + +### clusters.\.workload_type.clients + +**`Type: Map`** + + defined what type of clients can use the cluster. E.g. Notebooks, Jobs + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `jobs` + - Boolean + - With jobs set, the cluster can be used for jobs + + * - `notebooks` + - Boolean + - With notebooks set, this cluster can be used for notebooks + + +## dashboards + +**`Type: Map`** + +The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](/dashboards/index.md). + +```yaml +dashboards: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `create_time` + - String + - The timestamp of when the dashboard was created. + + * - `dashboard_id` + - String + - UUID identifying the dashboard. + + * - `display_name` + - String + - The display name of the dashboard. + + * - `embed_credentials` + - Boolean + - + + * - `etag` + - String + - The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. + + * - `file_path` + - String + - + + * - `lifecycle_state` + - String + - The state of the dashboard resource. Used for tracking trashed status. + + * - `parent_path` + - String + - The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. + + * - `path` + - String + - The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. + + * - `permissions` + - Sequence + - See [_](#dashboardsnamepermissions). + + * - `serialized_dashboard` + - Any + - The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. + + * - `update_time` + - String + - The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. + + * - `warehouse_id` + - String + - The warehouse ID used to run the dashboard. + + +**Example** + +The following example includes and deploys the sample __NYC Taxi Trip Analysis__ dashboard to the Databricks workspace. + +``` yaml +resources: + dashboards: + nyc_taxi_trip_analysis: + display_name: "NYC Taxi Trip Analysis" + file_path: ../src/nyc_taxi_trip_analysis.lvdash.json + warehouse_id: ${var.warehouse_id} +``` +If you use the UI to modify the dashboard, modifications made through the UI are not applied to the dashboard JSON file in the local bundle unless you explicitly update it using `bundle generate`. You can use the `--watch` option to continuously poll and retrieve changes to the dashboard. See [_](/dev-tools/cli/bundle-commands.md#generate). + +In addition, if you attempt to deploy a bundle that contains a dashboard JSON file that is different than the one in the remote workspace, an error will occur. To force the deploy and overwrite the dashboard in the remote workspace with the local one, use the `--force` option. See [_](/dev-tools/cli/bundle-commands.md#deploy). + +### dashboards.\.permissions + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `group_name` + - String + - The name of the group that has the permission set in level. + + * - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + + * - `user_name` + - String + - The name of the user that has the permission set in level. + + +## experiments + +**`Type: Map`** + +The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](/mlflow/experiments.md). + +```yaml +experiments: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `artifact_location` + - String + - Location where artifacts for the experiment are stored. + + * - `creation_time` + - Integer + - Creation time + + * - `experiment_id` + - String + - Unique identifier for the experiment. + + * - `last_update_time` + - Integer + - Last update time + + * - `lifecycle_stage` + - String + - Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. + + * - `name` + - String + - Human readable name that identifies the experiment. + + * - `permissions` + - Sequence + - See [_](#experimentsnamepermissions). + + * - `tags` + - Sequence + - Tags: Additional metadata key-value pairs. See [_](#experimentsnametags). + + +**Example** + +The following example defines an experiment that all users can view: + +```yaml +resources: + experiments: + experiment: + name: my_ml_experiment + permissions: + - level: CAN_READ + group_name: users + description: MLflow experiment used to track runs +``` + +### experiments.\.permissions + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `group_name` + - String + - The name of the group that has the permission set in level. + + * - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + + * - `user_name` + - String + - The name of the user that has the permission set in level. + + +### experiments.\.tags + +**`Type: Sequence`** + +Tags: Additional metadata key-value pairs. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `key` + - String + - The tag key. + + * - `value` + - String + - The tag value. + + +## jobs + +**`Type: Map`** + +The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a template to create a job, see [_](/dev-tools/bundles/jobs-tutorial.md). + +```yaml +jobs: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `budget_policy_id` + - String + - The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. + + * - `continuous` + - Map + - An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. See [_](#jobsnamecontinuous). + + * - `deployment` + - Map + - Deployment information for jobs managed by external sources. See [_](#jobsnamedeployment). + + * - `description` + - String + - An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. + + * - `edit_mode` + - String + - Edit mode of the job. * `UI_LOCKED`: The job is in a locked UI state and cannot be modified. * `EDITABLE`: The job is in an editable state and can be modified. + + * - `email_notifications` + - Map + - An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. See [_](#jobsnameemail_notifications). + + * - `environments` + - Sequence + - A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. See [_](#jobsnameenvironments). + + * - `format` + - String + - Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `"MULTI_TASK"`. + + * - `git_source` + - Map + - An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. See [_](#jobsnamegit_source). + + * - `health` + - Map + - An optional set of health rules that can be defined for this job. See [_](#jobsnamehealth). + + * - `job_clusters` + - Sequence + - A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. If more than 100 job clusters are available, you can paginate through them using :method:jobs/get. See [_](#jobsnamejob_clusters). + + * - `max_concurrent_runs` + - Integer + - An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. + + * - `name` + - String + - An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. + + * - `notification_settings` + - Map + - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. See [_](#jobsnamenotification_settings). + + * - `parameters` + - Sequence + - Job-level parameter definitions. See [_](#jobsnameparameters). + + * - `performance_target` + - String + - PerformanceTarget defines how performant or cost efficient the execution of run on serverless should be. + + * - `permissions` + - Sequence + - See [_](#jobsnamepermissions). + + * - `queue` + - Map + - The queue settings of the job. See [_](#jobsnamequeue). + + * - `run_as` + - Map + - Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job. Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown. See [_](#jobsnamerun_as). + + * - `schedule` + - Map + - An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. See [_](#jobsnameschedule). + + * - `tags` + - Map + - A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. + + * - `tasks` + - Sequence + - A list of task specifications to be executed by this job. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. See [_](#jobsnametasks). + + * - `timeout_seconds` + - Integer + - An optional timeout applied to each run of this job. A value of `0` means no timeout. + + * - `trigger` + - Map + - A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. See [_](#jobsnametrigger). + + * - `webhook_notifications` + - Map + - A collection of system notification IDs to notify when runs of this job begin or complete. See [_](#jobsnamewebhook_notifications). + + +**Example** + +The following example defines a job with the resource key `hello-job` with one notebook task: + +```yaml +resources: + jobs: + hello-job: + name: hello-job + tasks: + - task_key: hello-task + notebook_task: + notebook_path: ./hello.py +``` + +For information about defining job tasks and overriding job settings, see [_](/dev-tools/bundles/job-task-types.md), [_](/dev-tools/bundles/job-task-override.md), and [_](/dev-tools/bundles/cluster-override.md). + +### jobs.\.continuous + +**`Type: Map`** + +An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `pause_status` + - String + - Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. + + +### jobs.\.deployment + +**`Type: Map`** + +Deployment information for jobs managed by external sources. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `kind` + - String + - The kind of deployment that manages the job. * `BUNDLE`: The job is managed by Databricks Asset Bundle. + + * - `metadata_file_path` + - String + - Path of the file that contains deployment metadata. + + +### jobs.\.email_notifications + +**`Type: Map`** + +An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `no_alert_for_skipped_runs` + - Boolean + - If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. + + * - `on_duration_warning_threshold_exceeded` + - Sequence + - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. + + * - `on_failure` + - Sequence + - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + + * - `on_start` + - Sequence + - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + + * - `on_streaming_backlog_exceeded` + - Sequence + - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. + + * - `on_success` + - Sequence + - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + + +### jobs.\.environments + +**`Type: Sequence`** + +A list of task execution environment specifications that can be referenced by serverless tasks of this job. +An environment is required to be present for serverless tasks. +For serverless notebook tasks, the environment is accessible in the notebook environment panel. +For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `environment_key` + - String + - The key of an environment. It has to be unique within a job. + + * - `spec` + - Map + - The environment entity used to preserve serverless environment side panel and jobs' environment for non-notebook task. In this minimal environment spec, only pip dependencies are supported. See [_](#jobsnameenvironmentsspec). + + +### jobs.\.environments.spec + +**`Type: Map`** + +The environment entity used to preserve serverless environment side panel and jobs' environment for non-notebook task. +In this minimal environment spec, only pip dependencies are supported. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `client` + - String + - Client version used by the environment The client is the user-facing environment of the runtime. Each client comes with a specific set of pre-installed libraries. The version is a string, consisting of the major client version. + + * - `dependencies` + - Sequence + - List of pip dependencies, as supported by the version of pip in this environment. Each dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/ Allowed dependency could be , , (WSFS or Volumes in Databricks), E.g. dependencies: ["foo==0.0.1", "-r /Workspace/test/requirements.txt"] + + +### jobs.\.git_source + +**`Type: Map`** + +An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. + +If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. + +Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `git_branch` + - String + - Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. + + * - `git_commit` + - String + - Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. + + * - `git_provider` + - String + - Unique identifier of the service used to host the Git repository. The value is case insensitive. + + * - `git_snapshot` + - Map + - Read-only state of the remote repository at the time the job was run. This field is only included on job runs. See [_](#jobsnamegit_sourcegit_snapshot). + + * - `git_tag` + - String + - Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. + + * - `git_url` + - String + - URL of the repository to be cloned by this job. + + * - `job_source` + - Map + - The source of the job specification in the remote repository when the job is source controlled. See [_](#jobsnamegit_sourcejob_source). + + +### jobs.\.git_source.git_snapshot + +**`Type: Map`** + +Read-only state of the remote repository at the time the job was run. This field is only included on job runs. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `used_commit` + - String + - Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to. + + +### jobs.\.git_source.job_source + +**`Type: Map`** + +The source of the job specification in the remote repository when the job is source controlled. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `dirty_state` + - String + - Dirty state indicates the job is not fully synced with the job specification in the remote repository. Possible values are: * `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced. * `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced. + + * - `import_from_git_branch` + - String + - Name of the branch which the job is imported from. + + * - `job_config_path` + - String + - Path of the job YAML file that contains the job specification. + + +### jobs.\.health + +**`Type: Map`** + +An optional set of health rules that can be defined for this job. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `rules` + - Sequence + - See [_](#jobsnamehealthrules). + + +### jobs.\.health.rules + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `metric` + - String + - Specifies the health metric that is being evaluated for a particular health rule. * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. * `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. + + * - `op` + - String + - Specifies the operator used to compare the health metric value with the specified threshold. + + * - `value` + - Integer + - Specifies the threshold value that the health metric should obey to satisfy the health rule. + + +### jobs.\.job_clusters + +**`Type: Sequence`** + +A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. +If more than 100 job clusters are available, you can paginate through them using :method:jobs/get. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `job_cluster_key` + - String + - A unique name for the job cluster. This field is required and must be unique within the job. `JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution. + + * - `new_cluster` + - Map + - If new_cluster, a description of a cluster that is created for each task. See [_](#jobsnamejob_clustersnew_cluster). + + +### jobs.\.job_clusters.new_cluster + +**`Type: Map`** + +If new_cluster, a description of a cluster that is created for each task. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `apply_policy_default_values` + - Boolean + - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. + + * - `autoscale` + - Map + - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [_](#jobsnamejob_clustersnew_clusterautoscale). + + * - `autotermination_minutes` + - Integer + - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. + + * - `aws_attributes` + - Map + - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [_](#jobsnamejob_clustersnew_clusteraws_attributes). + + * - `azure_attributes` + - Map + - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [_](#jobsnamejob_clustersnew_clusterazure_attributes). + + * - `cluster_log_conf` + - Map + - The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [_](#jobsnamejob_clustersnew_clustercluster_log_conf). + + * - `cluster_name` + - String + - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. + + * - `custom_tags` + - Map + - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + + * - `data_security_mode` + - String + - Data security mode decides what data governance model to use when accessing data from a cluster. The following modes can only be used with `kind`. * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. The following modes can be used regardless of `kind`. * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. + + * - `docker_image` + - Map + - See [_](#jobsnamejob_clustersnew_clusterdocker_image). + + * - `driver_instance_pool_id` + - String + - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. + + * - `driver_node_type_id` + - String + - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + + * - `enable_elastic_disk` + - Boolean + - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. + + * - `enable_local_disk_encryption` + - Boolean + - Whether to enable LUKS on cluster VMs' local disks + + * - `gcp_attributes` + - Map + - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [_](#jobsnamejob_clustersnew_clustergcp_attributes). + + * - `init_scripts` + - Sequence + - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [_](#jobsnamejob_clustersnew_clusterinit_scripts). + + * - `instance_pool_id` + - String + - The optional ID of the instance pool to which the cluster belongs. + + * - `is_single_node` + - Boolean + - This field can only be used with `kind`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` + + * - `kind` + - String + - + + * - `node_type_id` + - String + - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. + + * - `num_workers` + - Integer + - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. + + * - `policy_id` + - String + - The ID of the cluster policy used to create the cluster if applicable. + + * - `runtime_engine` + - String + - Determines the cluster's runtime engine, either standard or Photon. This field is not compatible with legacy `spark_version` values that contain `-photon-`. Remove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`. If left unspecified, the runtime engine defaults to standard unless the spark_version contains -photon-, in which case Photon will be used. + + * - `single_user_name` + - String + - Single user name if data_security_mode is `SINGLE_USER` + + * - `spark_conf` + - Map + - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + + * - `spark_env_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + + * - `spark_version` + - String + - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. + + * - `ssh_public_keys` + - Sequence + - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. + + * - `use_ml_runtime` + - Boolean + - This field can only be used with `kind`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. + + * - `workload_type` + - Map + - See [_](#jobsnamejob_clustersnew_clusterworkload_type). + + +### jobs.\.job_clusters.new_cluster.autoscale + +**`Type: Map`** + +Parameters needed in order to automatically scale clusters up and down based on load. +Note: autoscaling works best with DB runtime versions 3.0 or later. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `max_workers` + - Integer + - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. + + * - `min_workers` + - Integer + - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. + + +### jobs.\.job_clusters.new_cluster.aws_attributes + +**`Type: Map`** + +Attributes related to clusters running on Amazon Web Services. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + + * - `ebs_volume_count` + - Integer + - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. + + * - `ebs_volume_iops` + - Integer + - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - `ebs_volume_size` + - Integer + - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. + + * - `ebs_volume_throughput` + - Integer + - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - `ebs_volume_type` + - String + - The type of EBS volumes that will be launched with this cluster. + + * - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - `instance_profile_arn` + - String + - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. + + * - `spot_bid_price_percent` + - Integer + - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + + * - `zone_id` + - String + - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. + + +### jobs.\.job_clusters.new_cluster.azure_attributes + +**`Type: Map`** + +Attributes related to clusters running on Microsoft Azure. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero (which only happens on pool clusters), this availability type will be used for the entire cluster. + + * - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - `log_analytics_info` + - Map + - Defines values necessary to configure and run Azure Log Analytics agent. See [_](#jobsnamejob_clustersnew_clusterazure_attributeslog_analytics_info). + + * - `spot_bid_max_price` + - Any + - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + + +### jobs.\.job_clusters.new_cluster.azure_attributes.log_analytics_info + +**`Type: Map`** + +Defines values necessary to configure and run Azure Log Analytics agent + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `log_analytics_primary_key` + - String + - + + * - `log_analytics_workspace_id` + - String + - + + +### jobs.\.job_clusters.new_cluster.cluster_log_conf + +**`Type: Map`** + +The configuration for delivering spark logs to a long-term storage destination. +Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified +for one cluster. If the conf is given, the logs will be delivered to the destination every +`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while +the destination of executor logs is `$destination/$clusterId/executor`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `dbfs` + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [_](#jobsnamejob_clustersnew_clustercluster_log_confdbfs). + + * - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [_](#jobsnamejob_clustersnew_clustercluster_log_confs3). + + +### jobs.\.job_clusters.new_cluster.cluster_log_conf.dbfs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - dbfs destination, e.g. `dbfs:/my/path` + + +### jobs.\.job_clusters.new_cluster.cluster_log_conf.s3 + +**`Type: Map`** + +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `canned_acl` + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + + * - `destination` + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + + * - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + + * - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + + * - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + * - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + + * - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + +### jobs.\.job_clusters.new_cluster.docker_image + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `basic_auth` + - Map + - See [_](#jobsnamejob_clustersnew_clusterdocker_imagebasic_auth). + + * - `url` + - String + - URL of the docker image. + + +### jobs.\.job_clusters.new_cluster.docker_image.basic_auth + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `password` + - String + - Password of the user + + * - `username` + - String + - Name of the user + + +### jobs.\.job_clusters.new_cluster.gcp_attributes + +**`Type: Map`** + +Attributes related to clusters running on Google Cloud Platform. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. + + * - `boot_disk_size` + - Integer + - boot disk size in GB + + * - `google_service_account` + - String + - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. + + * - `local_ssd_count` + - Integer + - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + + * - `use_preemptible_executors` + - Boolean + - This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. + + * - `zone_id` + - String + - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + + +### jobs.\.job_clusters.new_cluster.init_scripts + +**`Type: Sequence`** + +The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `abfss` + - Map + - destination needs to be provided. e.g. `{ "abfss" : { "destination" : "abfss://@.dfs.core.windows.net/" } }. See [_](#jobsnamejob_clustersnew_clusterinit_scriptsabfss). + + * - `dbfs` + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [_](#jobsnamejob_clustersnew_clusterinit_scriptsdbfs). + + * - `file` + - Map + - destination needs to be provided. e.g. `{ "file" : { "destination" : "file:/my/local/file.sh" } }`. See [_](#jobsnamejob_clustersnew_clusterinit_scriptsfile). + + * - `gcs` + - Map + - destination needs to be provided. e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [_](#jobsnamejob_clustersnew_clusterinit_scriptsgcs). + + * - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [_](#jobsnamejob_clustersnew_clusterinit_scriptss3). + + * - `volumes` + - Map + - destination needs to be provided. e.g. `{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }`. See [_](#jobsnamejob_clustersnew_clusterinit_scriptsvolumes). + + * - `workspace` + - Map + - destination needs to be provided. e.g. `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }`. See [_](#jobsnamejob_clustersnew_clusterinit_scriptsworkspace). + + +### jobs.\.job_clusters.new_cluster.init_scripts.abfss + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "abfss" : { "destination" : "abfss://@.dfs.core.windows.net/" } } + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. + + +### jobs.\.job_clusters.new_cluster.init_scripts.dbfs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - dbfs destination, e.g. `dbfs:/my/path` + + +### jobs.\.job_clusters.new_cluster.init_scripts.file + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "file" : { "destination" : "file:/my/local/file.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - local file destination, e.g. `file:/my/local/file.sh` + + +### jobs.\.job_clusters.new_cluster.init_scripts.gcs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` + + +### jobs.\.job_clusters.new_cluster.init_scripts.s3 + +**`Type: Map`** + +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `canned_acl` + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + + * - `destination` + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + + * - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + + * - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + + * - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + * - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + + * - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + +### jobs.\.job_clusters.new_cluster.init_scripts.volumes + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh` + + +### jobs.\.job_clusters.new_cluster.init_scripts.workspace + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + + +### jobs.\.job_clusters.new_cluster.workload_type + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `clients` + - Map + - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [_](#jobsnamejob_clustersnew_clusterworkload_typeclients). + + +### jobs.\.job_clusters.new_cluster.workload_type.clients + +**`Type: Map`** + + defined what type of clients can use the cluster. E.g. Notebooks, Jobs + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `jobs` + - Boolean + - With jobs set, the cluster can be used for jobs + + * - `notebooks` + - Boolean + - With notebooks set, this cluster can be used for notebooks + + +### jobs.\.notification_settings + +**`Type: Map`** + +Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `no_alert_for_canceled_runs` + - Boolean + - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. + + * - `no_alert_for_skipped_runs` + - Boolean + - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. + + +### jobs.\.parameters + +**`Type: Sequence`** + +Job-level parameter definitions + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `default` + - String + - Default value of the parameter. + + * - `name` + - String + - The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.` + + +### jobs.\.permissions + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `group_name` + - String + - The name of the group that has the permission set in level. + + * - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + + * - `user_name` + - String + - The name of the user that has the permission set in level. + + +### jobs.\.queue + +**`Type: Map`** + +The queue settings of the job. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `enabled` + - Boolean + - If true, enable queueing for the job. This is a required field. + + +### jobs.\.run_as + +**`Type: Map`** + +Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job. + +Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `service_principal_name` + - String + - The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + + * - `user_name` + - String + - The email of an active workspace user. Non-admin users can only set this field to their own email. + + +### jobs.\.schedule + +**`Type: Map`** + +An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `pause_status` + - String + - Indicate whether this schedule is paused or not. + + * - `quartz_cron_expression` + - String + - A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. + + * - `timezone_id` + - String + - A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. + + +### jobs.\.tasks + +**`Type: Sequence`** + +A list of task specifications to be executed by this job. +If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `clean_rooms_notebook_task` + - Map + - The task runs a [clean rooms](https://docs.databricks.com/en/clean-rooms/index.html) notebook when the `clean_rooms_notebook_task` field is present. See [_](#jobsnametasksclean_rooms_notebook_task). + + * - `condition_task` + - Map + - The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present. The condition task does not require a cluster to execute and does not support retries or notifications. See [_](#jobsnametaskscondition_task). + + * - `dbt_task` + - Map + - The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. See [_](#jobsnametasksdbt_task). + + * - `depends_on` + - Sequence + - An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true. The key is `task_key`, and the value is the name assigned to the dependent task. See [_](#jobsnametasksdepends_on). + + * - `description` + - String + - An optional description for this task. + + * - `disable_auto_optimization` + - Boolean + - An option to disable auto optimization in serverless + + * - `email_notifications` + - Map + - An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. See [_](#jobsnametasksemail_notifications). + + * - `environment_key` + - String + - The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute. + + * - `existing_cluster_id` + - String + - If existing_cluster_id, the ID of an existing cluster that is used for all runs. When running jobs or tasks on an existing cluster, you may need to manually restart the cluster if it stops responding. We suggest running jobs and tasks on new clusters for greater reliability + + * - `for_each_task` + - Map + - The task executes a nested task for every input provided when the `for_each_task` field is present. See [_](#jobsnametasksfor_each_task). + + * - `health` + - Map + - An optional set of health rules that can be defined for this job. See [_](#jobsnametaskshealth). + + * - `job_cluster_key` + - String + - If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`. + + * - `libraries` + - Sequence + - An optional list of libraries to be installed on the cluster. The default value is an empty list. See [_](#jobsnametaskslibraries). + + * - `max_retries` + - Integer + - An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry. + + * - `min_retry_interval_millis` + - Integer + - An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried. + + * - `new_cluster` + - Map + - If new_cluster, a description of a new cluster that is created for each run. See [_](#jobsnametasksnew_cluster). + + * - `notebook_task` + - Map + - The task runs a notebook when the `notebook_task` field is present. See [_](#jobsnametasksnotebook_task). + + * - `notification_settings` + - Map + - Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task. See [_](#jobsnametasksnotification_settings). + + * - `pipeline_task` + - Map + - The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported. See [_](#jobsnametaskspipeline_task). + + * - `python_wheel_task` + - Map + - The task runs a Python wheel when the `python_wheel_task` field is present. See [_](#jobsnametaskspython_wheel_task). + + * - `retry_on_timeout` + - Boolean + - An optional policy to specify whether to retry a job when it times out. The default behavior is to not retry on timeout. + + * - `run_if` + - String + - An optional value specifying the condition determining whether the task is run once its dependencies have been completed. * `ALL_SUCCESS`: All dependencies have executed and succeeded * `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded * `NONE_FAILED`: None of the dependencies have failed and at least one was executed * `ALL_DONE`: All dependencies have been completed * `AT_LEAST_ONE_FAILED`: At least one dependency failed * `ALL_FAILED`: ALl dependencies have failed + + * - `run_job_task` + - Map + - The task triggers another job when the `run_job_task` field is present. See [_](#jobsnametasksrun_job_task). + + * - `spark_jar_task` + - Map + - The task runs a JAR when the `spark_jar_task` field is present. See [_](#jobsnametasksspark_jar_task). + + * - `spark_python_task` + - Map + - The task runs a Python file when the `spark_python_task` field is present. See [_](#jobsnametasksspark_python_task). + + * - `spark_submit_task` + - Map + - (Legacy) The task runs the spark-submit script when the `spark_submit_task` field is present. This task can run only on new clusters and is not compatible with serverless compute. In the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations. `master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters. By default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage. The `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths. See [_](#jobsnametasksspark_submit_task). + + * - `sql_task` + - Map + - The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present. See [_](#jobsnametaskssql_task). + + * - `task_key` + - String + - A unique name for the task. This field is used to refer to this task from other tasks. This field is required and must be unique within its parent job. On Update or Reset, this field is used to reference the tasks to be updated or reset. + + * - `timeout_seconds` + - Integer + - An optional timeout applied to each run of this job task. A value of `0` means no timeout. + + * - `webhook_notifications` + - Map + - A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications. See [_](#jobsnametaskswebhook_notifications). + + +### jobs.\.tasks.clean_rooms_notebook_task + +**`Type: Map`** + +The task runs a [clean rooms](https://docs.databricks.com/en/clean-rooms/index.html) notebook +when the `clean_rooms_notebook_task` field is present. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `clean_room_name` + - String + - The clean room that the notebook belongs to. + + * - `etag` + - String + - Checksum to validate the freshness of the notebook resource (i.e. the notebook being run is the latest version). It can be fetched by calling the :method:cleanroomassets/get API. + + * - `notebook_base_parameters` + - Map + - Base parameters to be used for the clean room notebook job. + + * - `notebook_name` + - String + - Name of the notebook being run. + + +### jobs.\.tasks.condition_task + +**`Type: Map`** + +The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present. +The condition task does not require a cluster to execute and does not support retries or notifications. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `left` + - String + - The left operand of the condition task. Can be either a string value or a job state or parameter reference. + + * - `op` + - String + - * `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`. * `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`. The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison. + + * - `right` + - String + - The right operand of the condition task. Can be either a string value or a job state or parameter reference. + + +### jobs.\.tasks.dbt_task + +**`Type: Map`** + +The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `catalog` + - String + - Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks >= 1.1.1. + + * - `commands` + - Sequence + - A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided. + + * - `profiles_directory` + - String + - Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used. + + * - `project_directory` + - String + - Path to the project directory. Optional for Git sourced tasks, in which case if no value is provided, the root of the Git repository is used. + + * - `schema` + - String + - Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used. + + * - `source` + - String + - Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved from the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: Project is located in Databricks workspace. * `GIT`: Project is located in cloud Git provider. + + * - `warehouse_id` + - String + - ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument. + + +### jobs.\.tasks.depends_on + +**`Type: Sequence`** + +An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true. +The key is `task_key`, and the value is the name assigned to the dependent task. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `outcome` + - String + - Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run. + + * - `task_key` + - String + - The name of the task this task depends on. + + +### jobs.\.tasks.email_notifications + +**`Type: Map`** + +An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `no_alert_for_skipped_runs` + - Boolean + - If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. + + * - `on_duration_warning_threshold_exceeded` + - Sequence + - A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. + + * - `on_failure` + - Sequence + - A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + + * - `on_start` + - Sequence + - A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + + * - `on_streaming_backlog_exceeded` + - Sequence + - A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. + + * - `on_success` + - Sequence + - A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + + +### jobs.\.tasks.for_each_task + +**`Type: Map`** + +The task executes a nested task for every input provided when the `for_each_task` field is present. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `concurrency` + - Integer + - An optional maximum allowed number of concurrent runs of the task. Set this value if you want to be able to execute multiple runs of the task concurrently. + + * - `inputs` + - String + - Array for task to iterate on. This can be a JSON string or a reference to an array parameter. + + * - `task` + - Map + - Configuration for the task that will be run for each element in the array + + +### jobs.\.tasks.health + +**`Type: Map`** + +An optional set of health rules that can be defined for this job. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `rules` + - Sequence + - See [_](#jobsnametaskshealthrules). + + +### jobs.\.tasks.health.rules + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `metric` + - String + - Specifies the health metric that is being evaluated for a particular health rule. * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. * `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. + + * - `op` + - String + - Specifies the operator used to compare the health metric value with the specified threshold. + + * - `value` + - Integer + - Specifies the threshold value that the health metric should obey to satisfy the health rule. + + +### jobs.\.tasks.libraries + +**`Type: Sequence`** + +An optional list of libraries to be installed on the cluster. +The default value is an empty list. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `cran` + - Map + - Specification of a CRAN library to be installed as part of the library. See [_](#jobsnametaskslibrariescran). + + * - `egg` + - String + - Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above. + + * - `jar` + - String + - URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. For example: `{ "jar": "/Workspace/path/to/library.jar" }`, `{ "jar" : "/Volumes/path/to/library.jar" }` or `{ "jar": "s3://my-bucket/library.jar" }`. If S3 is used, please make sure the cluster has read access on the library. You may need to launch the cluster with an IAM role to access the S3 URI. + + * - `maven` + - Map + - Specification of a maven library to be installed. For example: `{ "coordinates": "org.jsoup:jsoup:1.7.2" }`. See [_](#jobsnametaskslibrariesmaven). + + * - `pypi` + - Map + - Specification of a PyPi library to be installed. For example: `{ "package": "simplejson" }`. See [_](#jobsnametaskslibrariespypi). + + * - `requirements` + - String + - URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported. For example: `{ "requirements": "/Workspace/path/to/requirements.txt" }` or `{ "requirements" : "/Volumes/path/to/requirements.txt" }` + + * - `whl` + - String + - URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. For example: `{ "whl": "/Workspace/path/to/library.whl" }`, `{ "whl" : "/Volumes/path/to/library.whl" }` or `{ "whl": "s3://my-bucket/library.whl" }`. If S3 is used, please make sure the cluster has read access on the library. You may need to launch the cluster with an IAM role to access the S3 URI. + + +### jobs.\.tasks.libraries.cran + +**`Type: Map`** + +Specification of a CRAN library to be installed as part of the library + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `package` + - String + - The name of the CRAN package to install. + + * - `repo` + - String + - The repository where the package can be found. If not specified, the default CRAN repo is used. + + +### jobs.\.tasks.libraries.maven + +**`Type: Map`** + +Specification of a maven library to be installed. For example: +`{ "coordinates": "org.jsoup:jsoup:1.7.2" }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `coordinates` + - String + - Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". + + * - `exclusions` + - Sequence + - List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. Maven dependency exclusions: https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. + + * - `repo` + - String + - Maven repo to install the Maven package from. If omitted, both Maven Central Repository and Spark Packages are searched. + + +### jobs.\.tasks.libraries.pypi + +**`Type: Map`** + +Specification of a PyPi library to be installed. For example: +`{ "package": "simplejson" }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `package` + - String + - The name of the pypi package to install. An optional exact version specification is also supported. Examples: "simplejson" and "simplejson==3.8.0". + + * - `repo` + - String + - The repository where the package can be found. If not specified, the default pip index is used. + + +### jobs.\.tasks.new_cluster + +**`Type: Map`** + +If new_cluster, a description of a new cluster that is created for each run. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `apply_policy_default_values` + - Boolean + - When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. + + * - `autoscale` + - Map + - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [_](#jobsnametasksnew_clusterautoscale). + + * - `autotermination_minutes` + - Integer + - Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. + + * - `aws_attributes` + - Map + - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [_](#jobsnametasksnew_clusteraws_attributes). + + * - `azure_attributes` + - Map + - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [_](#jobsnametasksnew_clusterazure_attributes). + + * - `cluster_log_conf` + - Map + - The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. See [_](#jobsnametasksnew_clustercluster_log_conf). + + * - `cluster_name` + - String + - Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. + + * - `custom_tags` + - Map + - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + + * - `data_security_mode` + - String + - Data security mode decides what data governance model to use when accessing data from a cluster. The following modes can only be used with `kind`. * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration. * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`. * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`. The following modes can be used regardless of `kind`. * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited. The following modes are deprecated starting with Databricks Runtime 15.0 and will be removed for future Databricks Runtime versions: * `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. + + * - `docker_image` + - Map + - See [_](#jobsnametasksnew_clusterdocker_image). + + * - `driver_instance_pool_id` + - String + - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. + + * - `driver_node_type_id` + - String + - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + + * - `enable_elastic_disk` + - Boolean + - Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. + + * - `enable_local_disk_encryption` + - Boolean + - Whether to enable LUKS on cluster VMs' local disks + + * - `gcp_attributes` + - Map + - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [_](#jobsnametasksnew_clustergcp_attributes). + + * - `init_scripts` + - Sequence + - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [_](#jobsnametasksnew_clusterinit_scripts). + + * - `instance_pool_id` + - String + - The optional ID of the instance pool to which the cluster belongs. + + * - `is_single_node` + - Boolean + - This field can only be used with `kind`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` + + * - `kind` + - String + - + + * - `node_type_id` + - String + - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. + + * - `num_workers` + - Integer + - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. + + * - `policy_id` + - String + - The ID of the cluster policy used to create the cluster if applicable. + + * - `runtime_engine` + - String + - Determines the cluster's runtime engine, either standard or Photon. This field is not compatible with legacy `spark_version` values that contain `-photon-`. Remove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`. If left unspecified, the runtime engine defaults to standard unless the spark_version contains -photon-, in which case Photon will be used. + + * - `single_user_name` + - String + - Single user name if data_security_mode is `SINGLE_USER` + + * - `spark_conf` + - Map + - An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + + * - `spark_env_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + + * - `spark_version` + - String + - The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. + + * - `ssh_public_keys` + - Sequence + - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. + + * - `use_ml_runtime` + - Boolean + - This field can only be used with `kind`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. + + * - `workload_type` + - Map + - See [_](#jobsnametasksnew_clusterworkload_type). + + +### jobs.\.tasks.new_cluster.autoscale + +**`Type: Map`** + +Parameters needed in order to automatically scale clusters up and down based on load. +Note: autoscaling works best with DB runtime versions 3.0 or later. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `max_workers` + - Integer + - The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. + + * - `min_workers` + - Integer + - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. + + +### jobs.\.tasks.new_cluster.aws_attributes + +**`Type: Map`** + +Attributes related to clusters running on Amazon Web Services. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + + * - `ebs_volume_count` + - Integer + - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. + + * - `ebs_volume_iops` + - Integer + - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - `ebs_volume_size` + - Integer + - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. + + * - `ebs_volume_throughput` + - Integer + - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - `ebs_volume_type` + - String + - The type of EBS volumes that will be launched with this cluster. + + * - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - `instance_profile_arn` + - String + - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. + + * - `spot_bid_price_percent` + - Integer + - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + + * - `zone_id` + - String + - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. + + +### jobs.\.tasks.new_cluster.azure_attributes + +**`Type: Map`** + +Attributes related to clusters running on Microsoft Azure. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero (which only happens on pool clusters), this availability type will be used for the entire cluster. + + * - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - `log_analytics_info` + - Map + - Defines values necessary to configure and run Azure Log Analytics agent. See [_](#jobsnametasksnew_clusterazure_attributeslog_analytics_info). + + * - `spot_bid_max_price` + - Any + - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + + +### jobs.\.tasks.new_cluster.azure_attributes.log_analytics_info + +**`Type: Map`** + +Defines values necessary to configure and run Azure Log Analytics agent + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `log_analytics_primary_key` + - String + - + + * - `log_analytics_workspace_id` + - String + - + + +### jobs.\.tasks.new_cluster.cluster_log_conf + +**`Type: Map`** + +The configuration for delivering spark logs to a long-term storage destination. +Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified +for one cluster. If the conf is given, the logs will be delivered to the destination every +`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while +the destination of executor logs is `$destination/$clusterId/executor`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `dbfs` + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [_](#jobsnametasksnew_clustercluster_log_confdbfs). + + * - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [_](#jobsnametasksnew_clustercluster_log_confs3). + + +### jobs.\.tasks.new_cluster.cluster_log_conf.dbfs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - dbfs destination, e.g. `dbfs:/my/path` + + +### jobs.\.tasks.new_cluster.cluster_log_conf.s3 + +**`Type: Map`** + +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `canned_acl` + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + + * - `destination` + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + + * - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + + * - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + + * - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + * - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + + * - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + +### jobs.\.tasks.new_cluster.docker_image + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `basic_auth` + - Map + - See [_](#jobsnametasksnew_clusterdocker_imagebasic_auth). + + * - `url` + - String + - URL of the docker image. + + +### jobs.\.tasks.new_cluster.docker_image.basic_auth + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `password` + - String + - Password of the user + + * - `username` + - String + - Name of the user + + +### jobs.\.tasks.new_cluster.gcp_attributes + +**`Type: Map`** + +Attributes related to clusters running on Google Cloud Platform. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. + + * - `boot_disk_size` + - Integer + - boot disk size in GB + + * - `google_service_account` + - String + - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. + + * - `local_ssd_count` + - Integer + - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + + * - `use_preemptible_executors` + - Boolean + - This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. + + * - `zone_id` + - String + - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + + +### jobs.\.tasks.new_cluster.init_scripts + +**`Type: Sequence`** + +The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `abfss` + - Map + - destination needs to be provided. e.g. `{ "abfss" : { "destination" : "abfss://@.dfs.core.windows.net/" } }. See [_](#jobsnametasksnew_clusterinit_scriptsabfss). + + * - `dbfs` + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [_](#jobsnametasksnew_clusterinit_scriptsdbfs). + + * - `file` + - Map + - destination needs to be provided. e.g. `{ "file" : { "destination" : "file:/my/local/file.sh" } }`. See [_](#jobsnametasksnew_clusterinit_scriptsfile). + + * - `gcs` + - Map + - destination needs to be provided. e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [_](#jobsnametasksnew_clusterinit_scriptsgcs). + + * - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [_](#jobsnametasksnew_clusterinit_scriptss3). + + * - `volumes` + - Map + - destination needs to be provided. e.g. `{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }`. See [_](#jobsnametasksnew_clusterinit_scriptsvolumes). + + * - `workspace` + - Map + - destination needs to be provided. e.g. `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }`. See [_](#jobsnametasksnew_clusterinit_scriptsworkspace). + + +### jobs.\.tasks.new_cluster.init_scripts.abfss + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "abfss" : { "destination" : "abfss://@.dfs.core.windows.net/" } } + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. + + +### jobs.\.tasks.new_cluster.init_scripts.dbfs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - dbfs destination, e.g. `dbfs:/my/path` + + +### jobs.\.tasks.new_cluster.init_scripts.file + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "file" : { "destination" : "file:/my/local/file.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - local file destination, e.g. `file:/my/local/file.sh` + + +### jobs.\.tasks.new_cluster.init_scripts.gcs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` + + +### jobs.\.tasks.new_cluster.init_scripts.s3 + +**`Type: Map`** + +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `canned_acl` + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + + * - `destination` + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + + * - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + + * - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + + * - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + * - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + + * - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + +### jobs.\.tasks.new_cluster.init_scripts.volumes + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh` + + +### jobs.\.tasks.new_cluster.init_scripts.workspace + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + + +### jobs.\.tasks.new_cluster.workload_type + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `clients` + - Map + - defined what type of clients can use the cluster. E.g. Notebooks, Jobs. See [_](#jobsnametasksnew_clusterworkload_typeclients). + + +### jobs.\.tasks.new_cluster.workload_type.clients + +**`Type: Map`** + + defined what type of clients can use the cluster. E.g. Notebooks, Jobs + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `jobs` + - Boolean + - With jobs set, the cluster can be used for jobs + + * - `notebooks` + - Boolean + - With notebooks set, this cluster can be used for notebooks + + +### jobs.\.tasks.notebook_task + +**`Type: Map`** + +The task runs a notebook when the `notebook_task` field is present. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `base_parameters` + - Map + - Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run Now with parameters specified, the two parameters maps are merged. If the same key is specified in `base_parameters` and in `run-now`, the value from `run-now` is used. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. If the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters, the default value from the notebook is used. Retrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets). The JSON representation of this field cannot exceed 1MB. + + * - `notebook_path` + - String + - The path of the notebook to be run in the Databricks workspace or remote repository. For notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash. For notebooks stored in a remote repository, the path must be relative. This field is required. + + * - `source` + - String + - Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: Notebook is located in Databricks workspace. * `GIT`: Notebook is located in cloud Git provider. + + * - `warehouse_id` + - String + - Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses. Note that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail. + + +### jobs.\.tasks.notification_settings + +**`Type: Map`** + +Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `alert_on_last_attempt` + - Boolean + - If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run. + + * - `no_alert_for_canceled_runs` + - Boolean + - If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. + + * - `no_alert_for_skipped_runs` + - Boolean + - If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. + + +### jobs.\.tasks.pipeline_task + +**`Type: Map`** + +The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `full_refresh` + - Boolean + - If true, triggers a full refresh on the delta live table. + + * - `pipeline_id` + - String + - The full name of the pipeline task to execute. + + +### jobs.\.tasks.python_wheel_task + +**`Type: Map`** + +The task runs a Python wheel when the `python_wheel_task` field is present. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `entry_point` + - String + - Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()` + + * - `named_parameters` + - Map + - Command-line parameters passed to Python wheel task in the form of `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` is not null. + + * - `package_name` + - String + - Name of the package to execute + + * - `parameters` + - Sequence + - Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null. + + +### jobs.\.tasks.run_job_task + +**`Type: Map`** + +The task triggers another job when the `run_job_task` field is present. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `dbt_commands` + - Sequence + - An array of commands to execute for jobs with the dbt task, for example `"dbt_commands": ["dbt deps", "dbt seed", "dbt deps", "dbt seed", "dbt run"]` + + * - `jar_params` + - Sequence + - A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. The parameters are used to invoke the main function of the main class specified in the Spark JAR task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be specified in conjunction with notebook_params. The JSON representation of this field (for example `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + + * - `job_id` + - Integer + - ID of the job to trigger. + + * - `job_parameters` + - Map + - Job-level parameters used to trigger the job. + + * - `notebook_params` + - Map + - A map from keys to values for jobs with notebook task, for example `"notebook_params": {"name": "john doe", "age": "35"}`. The map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function. If not specified upon `run-now`, the triggered run uses the job’s base parameters. notebook_params cannot be specified in conjunction with jar_params. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. The JSON representation of this field (for example `{"notebook_params":{"name":"john doe","age":"35"}}`) cannot exceed 10,000 bytes. + + * - `pipeline_params` + - Map + - Controls whether the pipeline should perform a full refresh. See [_](#jobsnametasksrun_job_taskpipeline_params). + + * - `python_named_params` + - Map + - + + * - `python_params` + - Sequence + - A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified in job setting. The JSON representation of this field (for example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. Important These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis. + + * - `spark_submit_params` + - Sequence + - A list of parameters for jobs with spark submit task, for example `"spark_submit_params": ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified in job setting. The JSON representation of this field (for example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs Important These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis. + + * - `sql_params` + - Map + - A map from keys to values for jobs with SQL task, for example `"sql_params": {"name": "john doe", "age": "35"}`. The SQL alert task does not support custom parameters. + + +### jobs.\.tasks.run_job_task.pipeline_params + +**`Type: Map`** + +Controls whether the pipeline should perform a full refresh + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `full_refresh` + - Boolean + - If true, triggers a full refresh on the delta live table. + + +### jobs.\.tasks.spark_jar_task + +**`Type: Map`** + +The task runs a JAR when the `spark_jar_task` field is present. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `jar_uri` + - String + - Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create. + + * - `main_class_name` + - String + - The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library. The code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail. + + * - `parameters` + - Sequence + - Parameters passed to the main method. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + + * - `run_as_repl` + - Boolean + - Deprecated. A value of `false` is no longer supported. + + +### jobs.\.tasks.spark_python_task + +**`Type: Map`** + +The task runs a Python file when the `spark_python_task` field is present. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `parameters` + - Sequence + - Command line parameters passed to the Python file. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + + * - `python_file` + - String + - The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required. + + * - `source` + - String + - Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local Databricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`, the Python file will be retrieved from a Git repository defined in `git_source`. * `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI. * `GIT`: The Python file is located in a remote Git repository. + + +### jobs.\.tasks.spark_submit_task + +**`Type: Map`** + +(Legacy) The task runs the spark-submit script when the `spark_submit_task` field is present. This task can run only on new clusters and is not compatible with serverless compute. + +In the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations. + +`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters. + +By default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage. + +The `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `parameters` + - Sequence + - Command-line parameters passed to spark submit. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + + +### jobs.\.tasks.sql_task + +**`Type: Map`** + +The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `alert` + - Map + - If alert, indicates that this job must refresh a SQL alert. See [_](#jobsnametaskssql_taskalert). + + * - `dashboard` + - Map + - If dashboard, indicates that this job must refresh a SQL dashboard. See [_](#jobsnametaskssql_taskdashboard). + + * - `file` + - Map + - If file, indicates that this job runs a SQL file in a remote Git repository. See [_](#jobsnametaskssql_taskfile). + + * - `parameters` + - Map + - Parameters to be used for each run of this job. The SQL alert task does not support custom parameters. + + * - `query` + - Map + - If query, indicates that this job must execute a SQL query. See [_](#jobsnametaskssql_taskquery). + + * - `warehouse_id` + - String + - The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs. + + +### jobs.\.tasks.sql_task.alert + +**`Type: Map`** + +If alert, indicates that this job must refresh a SQL alert. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `alert_id` + - String + - The canonical identifier of the SQL alert. + + * - `pause_subscriptions` + - Boolean + - If true, the alert notifications are not sent to subscribers. + + * - `subscriptions` + - Sequence + - If specified, alert notifications are sent to subscribers. See [_](#jobsnametaskssql_taskalertsubscriptions). + + +### jobs.\.tasks.sql_task.alert.subscriptions + +**`Type: Sequence`** + +If specified, alert notifications are sent to subscribers. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination_id` + - String + - The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications. + + * - `user_name` + - String + - The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications. + + +### jobs.\.tasks.sql_task.dashboard + +**`Type: Map`** + +If dashboard, indicates that this job must refresh a SQL dashboard. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `custom_subject` + - String + - Subject of the email sent to subscribers of this task. + + * - `dashboard_id` + - String + - The canonical identifier of the SQL dashboard. + + * - `pause_subscriptions` + - Boolean + - If true, the dashboard snapshot is not taken, and emails are not sent to subscribers. + + * - `subscriptions` + - Sequence + - If specified, dashboard snapshots are sent to subscriptions. See [_](#jobsnametaskssql_taskdashboardsubscriptions). + + +### jobs.\.tasks.sql_task.dashboard.subscriptions + +**`Type: Sequence`** + +If specified, dashboard snapshots are sent to subscriptions. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination_id` + - String + - The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications. + + * - `user_name` + - String + - The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications. + + +### jobs.\.tasks.sql_task.file + +**`Type: Map`** + +If file, indicates that this job runs a SQL file in a remote Git repository. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `path` + - String + - Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths. + + * - `source` + - String + - Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: SQL file is located in Databricks workspace. * `GIT`: SQL file is located in cloud Git provider. + + +### jobs.\.tasks.sql_task.query + +**`Type: Map`** + +If query, indicates that this job must execute a SQL query. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `query_id` + - String + - The canonical identifier of the SQL query. + + +### jobs.\.tasks.webhook_notifications + +**`Type: Map`** + +A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `on_duration_warning_threshold_exceeded` + - Sequence + - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. See [_](#jobsnametaskswebhook_notificationson_duration_warning_threshold_exceeded). + + * - `on_failure` + - Sequence + - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. See [_](#jobsnametaskswebhook_notificationson_failure). + + * - `on_start` + - Sequence + - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. See [_](#jobsnametaskswebhook_notificationson_start). + + * - `on_streaming_backlog_exceeded` + - Sequence + - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. See [_](#jobsnametaskswebhook_notificationson_streaming_backlog_exceeded). + + * - `on_success` + - Sequence + - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. See [_](#jobsnametaskswebhook_notificationson_success). + + +### jobs.\.tasks.webhook_notifications.on_duration_warning_threshold_exceeded + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + +### jobs.\.tasks.webhook_notifications.on_failure + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + +### jobs.\.tasks.webhook_notifications.on_start + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + +### jobs.\.tasks.webhook_notifications.on_streaming_backlog_exceeded + +**`Type: Sequence`** + +An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. +Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. +Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. +A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + +### jobs.\.tasks.webhook_notifications.on_success + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + +### jobs.\.trigger + +**`Type: Map`** + +A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `file_arrival` + - Map + - File arrival trigger settings. See [_](#jobsnametriggerfile_arrival). + + * - `pause_status` + - String + - Whether this trigger is paused or not. + + * - `periodic` + - Map + - Periodic trigger settings. See [_](#jobsnametriggerperiodic). + + * - `table` + - Map + - Old table trigger settings name. Deprecated in favor of `table_update`. See [_](#jobsnametriggertable). + + * - `table_update` + - Map + - See [_](#jobsnametriggertable_update). + + +### jobs.\.trigger.file_arrival + +**`Type: Map`** + +File arrival trigger settings. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `min_time_between_triggers_seconds` + - Integer + - If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds + + * - `url` + - String + - URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. + + * - `wait_after_last_change_seconds` + - Integer + - If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. + + +### jobs.\.trigger.periodic + +**`Type: Map`** + +Periodic trigger settings. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `interval` + - Integer + - The interval at which the trigger should run. + + * - `unit` + - String + - The unit of time for the interval. + + +### jobs.\.trigger.table + +**`Type: Map`** + +Old table trigger settings name. Deprecated in favor of `table_update`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `condition` + - String + - The table(s) condition based on which to trigger a job run. + + * - `min_time_between_triggers_seconds` + - Integer + - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. + + * - `table_names` + - Sequence + - A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. + + * - `wait_after_last_change_seconds` + - Integer + - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. + + +### jobs.\.trigger.table_update + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `condition` + - String + - The table(s) condition based on which to trigger a job run. + + * - `min_time_between_triggers_seconds` + - Integer + - If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. + + * - `table_names` + - Sequence + - A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. + + * - `wait_after_last_change_seconds` + - Integer + - If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. + + +### jobs.\.webhook_notifications + +**`Type: Map`** + +A collection of system notification IDs to notify when runs of this job begin or complete. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `on_duration_warning_threshold_exceeded` + - Sequence + - An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. See [_](#jobsnamewebhook_notificationson_duration_warning_threshold_exceeded). + + * - `on_failure` + - Sequence + - An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. See [_](#jobsnamewebhook_notificationson_failure). + + * - `on_start` + - Sequence + - An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. See [_](#jobsnamewebhook_notificationson_start). + + * - `on_streaming_backlog_exceeded` + - Sequence + - An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. See [_](#jobsnamewebhook_notificationson_streaming_backlog_exceeded). + + * - `on_success` + - Sequence + - An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. See [_](#jobsnamewebhook_notificationson_success). + + +### jobs.\.webhook_notifications.on_duration_warning_threshold_exceeded + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + +### jobs.\.webhook_notifications.on_failure + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + +### jobs.\.webhook_notifications.on_start + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + +### jobs.\.webhook_notifications.on_streaming_backlog_exceeded + +**`Type: Sequence`** + +An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. +Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. +Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. +A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + +### jobs.\.webhook_notifications.on_success + +**`Type: Sequence`** + +An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `id` + - String + - + + +## model_serving_endpoints + +**`Type: Map`** + +The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](/machine-learning/model-serving/manage-serving-endpoints.md). + +```yaml +model_serving_endpoints: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `ai_gateway` + - Map + - The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned throughput endpoints are currently supported. See [_](#model_serving_endpointsnameai_gateway). + + * - `config` + - Map + - The core config of the serving endpoint. See [_](#model_serving_endpointsnameconfig). + + * - `name` + - String + - The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. + + * - `permissions` + - Sequence + - See [_](#model_serving_endpointsnamepermissions). + + * - `rate_limits` + - Sequence + - Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. See [_](#model_serving_endpointsnamerate_limits). + + * - `route_optimized` + - Boolean + - Enable route optimization for the serving endpoint. + + * - `tags` + - Sequence + - Tags to be attached to the serving endpoint and automatically propagated to billing logs. See [_](#model_serving_endpointsnametags). + + +**Example** + +The following example defines a model serving endpoint: + +```yaml +resources: + model_serving_endpoints: + uc_model_serving_endpoint: + name: "uc-model-endpoint" + config: + served_entities: + - entity_name: "myCatalog.mySchema.my-ads-model" + entity_version: "10" + workload_size: "Small" + scale_to_zero_enabled: "true" + traffic_config: + routes: + - served_model_name: "my-ads-model-10" + traffic_percentage: "100" + tags: + - key: "team" + value: "data science" +``` + +### model_serving_endpoints.\.ai_gateway + +**`Type: Map`** + +The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned throughput endpoints are currently supported. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `guardrails` + - Map + - Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. See [_](#model_serving_endpointsnameai_gatewayguardrails). + + * - `inference_table_config` + - Map + - Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. See [_](#model_serving_endpointsnameai_gatewayinference_table_config). + + * - `rate_limits` + - Sequence + - Configuration for rate limits which can be set to limit endpoint traffic. See [_](#model_serving_endpointsnameai_gatewayrate_limits). + + * - `usage_tracking_config` + - Map + - Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. See [_](#model_serving_endpointsnameai_gatewayusage_tracking_config). + + +### model_serving_endpoints.\.ai_gateway.guardrails + +**`Type: Map`** + +Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `input` + - Map + - Configuration for input guardrail filters. See [_](#model_serving_endpointsnameai_gatewayguardrailsinput). + + * - `output` + - Map + - Configuration for output guardrail filters. See [_](#model_serving_endpointsnameai_gatewayguardrailsoutput). + + +### model_serving_endpoints.\.ai_gateway.guardrails.input + +**`Type: Map`** + +Configuration for input guardrail filters. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `invalid_keywords` + - Sequence + - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. + + * - `pii` + - Map + - Configuration for guardrail PII filter. See [_](#model_serving_endpointsnameai_gatewayguardrailsinputpii). + + * - `safety` + - Boolean + - Indicates whether the safety filter is enabled. + + * - `valid_topics` + - Sequence + - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. + + +### model_serving_endpoints.\.ai_gateway.guardrails.input.pii + +**`Type: Map`** + +Configuration for guardrail PII filter. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `behavior` + - String + - Configuration for input guardrail filters. + + +### model_serving_endpoints.\.ai_gateway.guardrails.output + +**`Type: Map`** + +Configuration for output guardrail filters. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `invalid_keywords` + - Sequence + - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. + + * - `pii` + - Map + - Configuration for guardrail PII filter. See [_](#model_serving_endpointsnameai_gatewayguardrailsoutputpii). + + * - `safety` + - Boolean + - Indicates whether the safety filter is enabled. + + * - `valid_topics` + - Sequence + - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. + + +### model_serving_endpoints.\.ai_gateway.guardrails.output.pii + +**`Type: Map`** + +Configuration for guardrail PII filter. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `behavior` + - String + - Configuration for input guardrail filters. + + +### model_serving_endpoints.\.ai_gateway.inference_table_config + +**`Type: Map`** + +Configuration for payload logging using inference tables. +Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `catalog_name` + - String + - The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. + + * - `enabled` + - Boolean + - Indicates whether the inference table is enabled. + + * - `schema_name` + - String + - The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. + + * - `table_name_prefix` + - String + - The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. + + +### model_serving_endpoints.\.ai_gateway.rate_limits + +**`Type: Sequence`** + +Configuration for rate limits which can be set to limit endpoint traffic. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `calls` + - Integer + - Used to specify how many calls are allowed for a key within the renewal_period. + + * - `key` + - String + - Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified. + + * - `renewal_period` + - String + - Renewal period field for a rate limit. Currently, only 'minute' is supported. + + +### model_serving_endpoints.\.ai_gateway.usage_tracking_config + +**`Type: Map`** + +Configuration to enable usage tracking using system tables. +These tables allow you to monitor operational usage on endpoints and their associated costs. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `enabled` + - Boolean + - Whether to enable usage tracking. + + +### model_serving_endpoints.\.config + +**`Type: Map`** + +The core config of the serving endpoint. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `auto_capture_config` + - Map + - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. Note: this field is deprecated for creating new provisioned throughput endpoints, or updating existing provisioned throughput endpoints that never have inference table configured; in these cases please use AI Gateway to manage inference tables. See [_](#model_serving_endpointsnameconfigauto_capture_config). + + * - `served_entities` + - Sequence + - The list of served entities under the serving endpoint config. See [_](#model_serving_endpointsnameconfigserved_entities). + + * - `served_models` + - Sequence + - (Deprecated, use served_entities instead) The list of served models under the serving endpoint config. See [_](#model_serving_endpointsnameconfigserved_models). + + * - `traffic_config` + - Map + - The traffic configuration associated with the serving endpoint config. See [_](#model_serving_endpointsnameconfigtraffic_config). + + +### model_serving_endpoints.\.config.auto_capture_config + +**`Type: Map`** + +Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. +Note: this field is deprecated for creating new provisioned throughput endpoints, +or updating existing provisioned throughput endpoints that never have inference table configured; +in these cases please use AI Gateway to manage inference tables. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `catalog_name` + - String + - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. + + * - `enabled` + - Boolean + - Indicates whether the inference table is enabled. + + * - `schema_name` + - String + - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. + + * - `table_name_prefix` + - String + - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. + + +### model_serving_endpoints.\.config.served_entities + +**`Type: Sequence`** + +The list of served entities under the serving endpoint config. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `entity_name` + - String + - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of **catalog_name.schema_name.model_name**. + + * - `entity_version` + - String + - + + * - `environment_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` + + * - `external_model` + - Map + - The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. See [_](#model_serving_endpointsnameconfigserved_entitiesexternal_model). + + * - `instance_profile_arn` + - String + - ARN of the instance profile that the served entity uses to access AWS resources. + + * - `max_provisioned_throughput` + - Integer + - The maximum tokens per second that the endpoint can scale up to. + + * - `min_provisioned_throughput` + - Integer + - The minimum tokens per second that the endpoint can scale down to. + + * - `name` + - String + - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. + + * - `scale_to_zero_enabled` + - Boolean + - Whether the compute resources for the served entity should scale down to zero. + + * - `workload_size` + - String + - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. + + * - `workload_type` + - String + - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). + + +### model_serving_endpoints.\.config.served_entities.external_model + +**`Type: Map`** + +The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `ai21labs_config` + - Map + - AI21Labs Config. Only required if the provider is 'ai21labs'. See [_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelai21labs_config). + + * - `amazon_bedrock_config` + - Map + - Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. See [_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelamazon_bedrock_config). + + * - `anthropic_config` + - Map + - Anthropic Config. Only required if the provider is 'anthropic'. See [_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelanthropic_config). + + * - `cohere_config` + - Map + - Cohere Config. Only required if the provider is 'cohere'. See [_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelcohere_config). + + * - `databricks_model_serving_config` + - Map + - Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'. See [_](#model_serving_endpointsnameconfigserved_entitiesexternal_modeldatabricks_model_serving_config). + + * - `google_cloud_vertex_ai_config` + - Map + - Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. See [_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelgoogle_cloud_vertex_ai_config). + + * - `name` + - String + - The name of the external model. + + * - `openai_config` + - Map + - OpenAI Config. Only required if the provider is 'openai'. See [_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelopenai_config). + + * - `palm_config` + - Map + - PaLM Config. Only required if the provider is 'palm'. See [_](#model_serving_endpointsnameconfigserved_entitiesexternal_modelpalm_config). + + * - `provider` + - String + - The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'. + + * - `task` + - String + - The task type of the external model. + + +### model_serving_endpoints.\.config.served_entities.external_model.ai21labs_config + +**`Type: Map`** + +AI21Labs Config. Only required if the provider is 'ai21labs'. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `ai21labs_api_key` + - String + - The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. + + * - `ai21labs_api_key_plaintext` + - String + - An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. + + +### model_serving_endpoints.\.config.served_entities.external_model.amazon_bedrock_config + +**`Type: Map`** + +Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `aws_access_key_id` + - String + - The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id_plaintext`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. + + * - `aws_access_key_id_plaintext` + - String + - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. + + * - `aws_region` + - String + - The AWS region to use. Bedrock has to be enabled there. + + * - `aws_secret_access_key` + - String + - The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. + + * - `aws_secret_access_key_plaintext` + - String + - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. + + * - `bedrock_provider` + - String + - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. + + +### model_serving_endpoints.\.config.served_entities.external_model.anthropic_config + +**`Type: Map`** + +Anthropic Config. Only required if the provider is 'anthropic'. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `anthropic_api_key` + - String + - The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. + + * - `anthropic_api_key_plaintext` + - String + - The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. + + +### model_serving_endpoints.\.config.served_entities.external_model.cohere_config + +**`Type: Map`** + +Cohere Config. Only required if the provider is 'cohere'. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `cohere_api_base` + - String + - This is an optional field to provide a customized base URL for the Cohere API. If left unspecified, the standard Cohere base URL is used. + + * - `cohere_api_key` + - String + - The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. + + * - `cohere_api_key_plaintext` + - String + - The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. + + +### model_serving_endpoints.\.config.served_entities.external_model.databricks_model_serving_config + +**`Type: Map`** + +Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `databricks_api_token` + - String + - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model. If you prefer to paste your API key directly, see `databricks_api_token_plaintext`. You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. + + * - `databricks_api_token_plaintext` + - String + - The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `databricks_api_token`. You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. + + * - `databricks_workspace_url` + - String + - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model. + + +### model_serving_endpoints.\.config.served_entities.external_model.google_cloud_vertex_ai_config + +**`Type: Map`** + +Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `private_key` + - String + - The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys]. If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext` [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys + + * - `private_key_plaintext` + - String + - The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys]. If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`. [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys + + * - `project_id` + - String + - This is the Google Cloud project id that the service account is associated with. + + * - `region` + - String + - This is the region for the Google Cloud Vertex AI Service. See [supported regions] for more details. Some models are only available in specific regions. [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations + + +### model_serving_endpoints.\.config.served_entities.external_model.openai_config + +**`Type: Map`** + +OpenAI Config. Only required if the provider is 'openai'. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `microsoft_entra_client_id` + - String + - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID. + + * - `microsoft_entra_client_secret` + - String + - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication. If you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`. You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. + + * - `microsoft_entra_client_secret_plaintext` + - String + - The client secret used for Microsoft Entra ID authentication provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`. You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. + + * - `microsoft_entra_tenant_id` + - String + - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID. + + * - `openai_api_base` + - String + - This is a field to provide a customized base URl for the OpenAI API. For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service provided by Azure. For other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used. + + * - `openai_api_key` + - String + - The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. + + * - `openai_api_key_plaintext` + - String + - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. + + * - `openai_api_type` + - String + - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread. + + * - `openai_api_version` + - String + - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to utilize, specified by a date. + + * - `openai_deployment_name` + - String + - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service. + + * - `openai_organization` + - String + - This is an optional field to specify the organization in OpenAI or Azure OpenAI. + + +### model_serving_endpoints.\.config.served_entities.external_model.palm_config + +**`Type: Map`** + +PaLM Config. Only required if the provider is 'palm'. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `palm_api_key` + - String + - The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. + + * - `palm_api_key_plaintext` + - String + - The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. + + +### model_serving_endpoints.\.config.served_models + +**`Type: Sequence`** + +(Deprecated, use served_entities instead) The list of served models under the serving endpoint config. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `environment_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` + + * - `instance_profile_arn` + - String + - ARN of the instance profile that the served entity uses to access AWS resources. + + * - `max_provisioned_throughput` + - Integer + - The maximum tokens per second that the endpoint can scale up to. + + * - `min_provisioned_throughput` + - Integer + - The minimum tokens per second that the endpoint can scale down to. + + * - `model_name` + - String + - + + * - `model_version` + - String + - + + * - `name` + - String + - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. + + * - `scale_to_zero_enabled` + - Boolean + - Whether the compute resources for the served entity should scale down to zero. + + * - `workload_size` + - String + - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. + + * - `workload_type` + - String + - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). + + +### model_serving_endpoints.\.config.traffic_config + +**`Type: Map`** + +The traffic configuration associated with the serving endpoint config. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `routes` + - Sequence + - The list of routes that define traffic to each served entity. See [_](#model_serving_endpointsnameconfigtraffic_configroutes). + + +### model_serving_endpoints.\.config.traffic_config.routes + +**`Type: Sequence`** + +The list of routes that define traffic to each served entity. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `served_model_name` + - String + - The name of the served model this route configures traffic for. + + * - `traffic_percentage` + - Integer + - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive. + + +### model_serving_endpoints.\.permissions + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `group_name` + - String + - The name of the group that has the permission set in level. + + * - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + + * - `user_name` + - String + - The name of the user that has the permission set in level. + + +### model_serving_endpoints.\.rate_limits + +**`Type: Sequence`** + +Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `calls` + - Integer + - Used to specify how many calls are allowed for a key within the renewal_period. + + * - `key` + - String + - Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified. + + * - `renewal_period` + - String + - Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported. + + +### model_serving_endpoints.\.tags + +**`Type: Sequence`** + +Tags to be attached to the serving endpoint and automatically propagated to billing logs. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `key` + - String + - Key field for a serving endpoint tag. + + * - `value` + - String + - Optional value field for a serving endpoint tag. + + +## models + +**`Type: Map`** + +The model resource allows you to define [legacy models](/api/workspace/modelregistry/createmodel) in bundles. Databricks recommends you use [registered models](#registered-model) instead. + +```yaml +models: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `creation_timestamp` + - Integer + - Timestamp recorded when this `registered_model` was created. + + * - `description` + - String + - Description of this `registered_model`. + + * - `last_updated_timestamp` + - Integer + - Timestamp recorded when metadata for this `registered_model` was last updated. + + * - `latest_versions` + - Sequence + - Collection of latest model versions for each stage. Only contains models with current `READY` status. See [_](#modelsnamelatest_versions). + + * - `name` + - String + - Unique name for the model. + + * - `permissions` + - Sequence + - See [_](#modelsnamepermissions). + + * - `tags` + - Sequence + - Tags: Additional metadata key-value pairs for this `registered_model`. See [_](#modelsnametags). + + * - `user_id` + - String + - User that created this `registered_model` + + +### models.\.latest_versions + +**`Type: Sequence`** + +Collection of latest model versions for each stage. +Only contains models with current `READY` status. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `creation_timestamp` + - Integer + - Timestamp recorded when this `model_version` was created. + + * - `current_stage` + - String + - Current stage for this `model_version`. + + * - `description` + - String + - Description of this `model_version`. + + * - `last_updated_timestamp` + - Integer + - Timestamp recorded when metadata for this `model_version` was last updated. + + * - `name` + - String + - Unique name of the model + + * - `run_id` + - String + - MLflow run ID used when creating `model_version`, if `source` was generated by an experiment run stored in MLflow tracking server. + + * - `run_link` + - String + - Run Link: Direct link to the run that generated this version + + * - `source` + - String + - URI indicating the location of the source model artifacts, used when creating `model_version` + + * - `status` + - String + - Current status of `model_version` + + * - `status_message` + - String + - Details on current `status`, if it is pending or failed. + + * - `tags` + - Sequence + - Tags: Additional metadata key-value pairs for this `model_version`. See [_](#modelsnamelatest_versionstags). + + * - `user_id` + - String + - User that created this `model_version`. + + * - `version` + - String + - Model's version number. + + +### models.\.latest_versions.tags + +**`Type: Sequence`** + +Tags: Additional metadata key-value pairs for this `model_version`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `key` + - String + - The tag key. + + * - `value` + - String + - The tag value. + + +### models.\.permissions + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `group_name` + - String + - The name of the group that has the permission set in level. + + * - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + + * - `user_name` + - String + - The name of the user that has the permission set in level. + + +### models.\.tags + +**`Type: Sequence`** + +Tags: Additional metadata key-value pairs for this `registered_model`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `key` + - String + - The tag key. + + * - `value` + - String + - The tag value. + + +## pipelines + +**`Type: Map`** + +The pipeline resource allows you to create [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/delta-live-tables/index.md). For a tutorial that uses the template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). + +```yaml +pipelines: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `allow_duplicate_names` + - Boolean + - If false, deployment will fail if name conflicts with that of another pipeline. + + * - `budget_policy_id` + - String + - Budget policy of this pipeline. + + * - `catalog` + - String + - A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + + * - `channel` + - String + - DLT Release Channel that specifies which version to use. + + * - `clusters` + - Sequence + - Cluster settings for this pipeline deployment. See [_](#pipelinesnameclusters). + + * - `configuration` + - Map + - String-String configuration for this pipeline execution. + + * - `continuous` + - Boolean + - Whether the pipeline is continuous or triggered. This replaces `trigger`. + + * - `deployment` + - Map + - Deployment type of this pipeline. See [_](#pipelinesnamedeployment). + + * - `development` + - Boolean + - Whether the pipeline is in Development mode. Defaults to false. + + * - `dry_run` + - Boolean + - + + * - `edition` + - String + - Pipeline product edition. + + * - `filters` + - Map + - Filters on which Pipeline packages to include in the deployed graph. See [_](#pipelinesnamefilters). + + * - `gateway_definition` + - Map + - The definition of a gateway pipeline to support change data capture. See [_](#pipelinesnamegateway_definition). + + * - `id` + - String + - Unique identifier for this pipeline. + + * - `ingestion_definition` + - Map + - The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. See [_](#pipelinesnameingestion_definition). + + * - `libraries` + - Sequence + - Libraries or code needed by this deployment. See [_](#pipelinesnamelibraries). + + * - `name` + - String + - Friendly identifier for this pipeline. + + * - `notifications` + - Sequence + - List of notification settings for this pipeline. See [_](#pipelinesnamenotifications). + + * - `permissions` + - Sequence + - See [_](#pipelinesnamepermissions). + + * - `photon` + - Boolean + - Whether Photon is enabled for this pipeline. + + * - `restart_window` + - Map + - Restart window of this pipeline. See [_](#pipelinesnamerestart_window). + + * - `run_as` + - Map + - Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline. Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. See [_](#pipelinesnamerun_as). + + * - `schema` + - String + - The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. + + * - `serverless` + - Boolean + - Whether serverless compute is enabled for this pipeline. + + * - `storage` + - String + - DBFS root directory for storing checkpoints and tables. + + * - `target` + - String + - Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + + * - `trigger` + - Map + - Which pipeline trigger to use. Deprecated: Use `continuous` instead. See [_](#pipelinesnametrigger). + + +**Example** + +The following example defines a pipeline with the resource key `hello-pipeline`: + +```yaml +resources: + pipelines: + hello-pipeline: + name: hello-pipeline + clusters: + - label: default + num_workers: 1 + development: true + continuous: false + channel: CURRENT + edition: CORE + photon: false + libraries: + - notebook: + path: ./pipeline.py +``` + +### pipelines.\.clusters + +**`Type: Sequence`** + +Cluster settings for this pipeline deployment. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `apply_policy_default_values` + - Boolean + - Note: This field won't be persisted. Only API users will check this field. + + * - `autoscale` + - Map + - Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. See [_](#pipelinesnameclustersautoscale). + + * - `aws_attributes` + - Map + - Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. See [_](#pipelinesnameclustersaws_attributes). + + * - `azure_attributes` + - Map + - Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. See [_](#pipelinesnameclustersazure_attributes). + + * - `cluster_log_conf` + - Map + - The configuration for delivering spark logs to a long-term storage destination. Only dbfs destinations are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. . See [_](#pipelinesnameclusterscluster_log_conf). + + * - `custom_tags` + - Map + - Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + + * - `driver_instance_pool_id` + - String + - The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. + + * - `driver_node_type_id` + - String + - The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + + * - `enable_local_disk_encryption` + - Boolean + - Whether to enable local disk encryption for the cluster. + + * - `gcp_attributes` + - Map + - Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. See [_](#pipelinesnameclustersgcp_attributes). + + * - `init_scripts` + - Sequence + - The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. See [_](#pipelinesnameclustersinit_scripts). + + * - `instance_pool_id` + - String + - The optional ID of the instance pool to which the cluster belongs. + + * - `label` + - String + - A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`. + + * - `node_type_id` + - String + - This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. + + * - `num_workers` + - Integer + - Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. Note: When reading the properties of a cluster, this field reflects the desired number of workers rather than the actual current number of workers. For instance, if a cluster is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. + + * - `policy_id` + - String + - The ID of the cluster policy used to create the cluster if applicable. + + * - `spark_conf` + - Map + - An object containing a set of optional, user-specified Spark configuration key-value pairs. See :method:clusters/create for more details. + + * - `spark_env_vars` + - Map + - An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all default databricks managed environmental variables are included as well. Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + + * - `ssh_public_keys` + - Sequence + - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. + + +### pipelines.\.clusters.autoscale + +**`Type: Map`** + +Parameters needed in order to automatically scale clusters up and down based on load. +Note: autoscaling works best with DB runtime versions 3.0 or later. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `max_workers` + - Integer + - The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`. + + * - `min_workers` + - Integer + - The minimum number of workers the cluster can scale down to when underutilized. It is also the initial number of workers the cluster will have after creation. + + * - `mode` + - String + - Databricks Enhanced Autoscaling optimizes cluster utilization by automatically allocating cluster resources based on workload volume, with minimal impact to the data processing latency of your pipelines. Enhanced Autoscaling is available for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` clusters. + + +### pipelines.\.clusters.aws_attributes + +**`Type: Map`** + +Attributes related to clusters running on Amazon Web Services. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. + + * - `ebs_volume_count` + - Integer + - The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogenously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. + + * - `ebs_volume_iops` + - Integer + - If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - `ebs_volume_size` + - Integer + - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. + + * - `ebs_volume_throughput` + - Integer + - If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + + * - `ebs_volume_type` + - String + - The type of EBS volumes that will be launched with this cluster. + + * - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - `instance_profile_arn` + - String + - Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account administrator. This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. + + * - `spot_bid_price_percent` + - Integer + - The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot instance, then the bid price is half of the price of on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field will be considered. Note that, for safety, we enforce this field to be no more than 10000. The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + + * - `zone_id` + - String + - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if there is not enough capacity. The list of available zones as well as the default value can be found by using the `List Zones` method. + + +### pipelines.\.clusters.azure_attributes + +**`Type: Map`** + +Attributes related to clusters running on Microsoft Azure. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero (which only happens on pool clusters), this availability type will be used for the entire cluster. + + * - `first_on_demand` + - Integer + - The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. + + * - `log_analytics_info` + - Map + - Defines values necessary to configure and run Azure Log Analytics agent. See [_](#pipelinesnameclustersazure_attributeslog_analytics_info). + + * - `spot_bid_max_price` + - Any + - The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + + +### pipelines.\.clusters.azure_attributes.log_analytics_info + +**`Type: Map`** + +Defines values necessary to configure and run Azure Log Analytics agent + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `log_analytics_primary_key` + - String + - + + * - `log_analytics_workspace_id` + - String + - + + +### pipelines.\.clusters.cluster_log_conf + +**`Type: Map`** + +The configuration for delivering spark logs to a long-term storage destination. +Only dbfs destinations are supported. Only one destination can be specified +for one cluster. If the conf is given, the logs will be delivered to the destination every +`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while +the destination of executor logs is `$destination/$clusterId/executor`. + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `dbfs` + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [_](#pipelinesnameclusterscluster_log_confdbfs). + + * - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [_](#pipelinesnameclusterscluster_log_confs3). + + +### pipelines.\.clusters.cluster_log_conf.dbfs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - dbfs destination, e.g. `dbfs:/my/path` + + +### pipelines.\.clusters.cluster_log_conf.s3 + +**`Type: Map`** + +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `canned_acl` + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + + * - `destination` + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + + * - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + + * - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + + * - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + * - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + + * - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + +### pipelines.\.clusters.gcp_attributes + +**`Type: Map`** + +Attributes related to clusters running on Google Cloud Platform. +If not specified at cluster creation, a set of default values will be used. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `availability` + - String + - This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. + + * - `boot_disk_size` + - Integer + - boot disk size in GB + + * - `google_service_account` + - String + - If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. + + * - `local_ssd_count` + - Integer + - If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + + * - `use_preemptible_executors` + - Boolean + - This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. + + * - `zone_id` + - String + - Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. + + +### pipelines.\.clusters.init_scripts + +**`Type: Sequence`** + +The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `abfss` + - Map + - destination needs to be provided. e.g. `{ "abfss" : { "destination" : "abfss://@.dfs.core.windows.net/" } }. See [_](#pipelinesnameclustersinit_scriptsabfss). + + * - `dbfs` + - Map + - destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`. See [_](#pipelinesnameclustersinit_scriptsdbfs). + + * - `file` + - Map + - destination needs to be provided. e.g. `{ "file" : { "destination" : "file:/my/local/file.sh" } }`. See [_](#pipelinesnameclustersinit_scriptsfile). + + * - `gcs` + - Map + - destination needs to be provided. e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`. See [_](#pipelinesnameclustersinit_scriptsgcs). + + * - `s3` + - Map + - destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. See [_](#pipelinesnameclustersinit_scriptss3). + + * - `volumes` + - Map + - destination needs to be provided. e.g. `{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }`. See [_](#pipelinesnameclustersinit_scriptsvolumes). + + * - `workspace` + - Map + - destination needs to be provided. e.g. `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }`. See [_](#pipelinesnameclustersinit_scriptsworkspace). + + +### pipelines.\.clusters.init_scripts.abfss + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "abfss" : { "destination" : "abfss://@.dfs.core.windows.net/" } } + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. + + +### pipelines.\.clusters.init_scripts.dbfs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - dbfs destination, e.g. `dbfs:/my/path` + + +### pipelines.\.clusters.init_scripts.file + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "file" : { "destination" : "file:/my/local/file.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - local file destination, e.g. `file:/my/local/file.sh` + + +### pipelines.\.clusters.init_scripts.gcs + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - GCS destination/URI, e.g. `gs://my-bucket/some-prefix` + + +### pipelines.\.clusters.init_scripts.s3 + +**`Type: Map`** + +destination and either the region or endpoint need to be provided. e.g. +`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` +Cluster iam role is used to access s3, please make sure the cluster iam role in +`instance_profile_arn` has permission to write data to the s3 destination. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `canned_acl` + - String + - (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. + + * - `destination` + - String + - S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. + + * - `enable_encryption` + - Boolean + - (Optional) Flag to enable server side encryption, `false` by default. + + * - `encryption_type` + - String + - (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. + + * - `endpoint` + - String + - S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + * - `kms_key` + - String + - (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + + * - `region` + - String + - S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. + + +### pipelines.\.clusters.init_scripts.volumes + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh` + + +### pipelines.\.clusters.init_scripts.workspace + +**`Type: Map`** + +destination needs to be provided. e.g. +`{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination` + - String + - workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + + +### pipelines.\.deployment + +**`Type: Map`** + +Deployment type of this pipeline. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `kind` + - String + - The deployment method that manages the pipeline. + + * - `metadata_file_path` + - String + - The path to the file containing metadata about the deployment. + + +### pipelines.\.filters + +**`Type: Map`** + +Filters on which Pipeline packages to include in the deployed graph. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `exclude` + - Sequence + - Paths to exclude. + + * - `include` + - Sequence + - Paths to include. + + +### pipelines.\.gateway_definition + +**`Type: Map`** + +The definition of a gateway pipeline to support change data capture. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `connection_id` + - String + - [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. + + * - `connection_name` + - String + - Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. + + * - `gateway_storage_catalog` + - String + - Required, Immutable. The name of the catalog for the gateway pipeline's storage location. + + * - `gateway_storage_name` + - String + - Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. + + * - `gateway_storage_schema` + - String + - Required, Immutable. The name of the schema for the gateway pipelines's storage location. + + +### pipelines.\.ingestion_definition + +**`Type: Map`** + +The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `connection_name` + - String + - Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. + + * - `ingestion_gateway_id` + - String + - Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. + + * - `objects` + - Sequence + - Required. Settings specifying tables to replicate and the destination for the replicated tables. See [_](#pipelinesnameingestion_definitionobjects). + + * - `table_configuration` + - Map + - Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. See [_](#pipelinesnameingestion_definitiontable_configuration). + + +### pipelines.\.ingestion_definition.objects + +**`Type: Sequence`** + +Required. Settings specifying tables to replicate and the destination for the replicated tables. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `report` + - Map + - Select a specific source report. See [_](#pipelinesnameingestion_definitionobjectsreport). + + * - `schema` + - Map + - Select all tables from a specific source schema. See [_](#pipelinesnameingestion_definitionobjectsschema). + + * - `table` + - Map + - Select a specific source table. See [_](#pipelinesnameingestion_definitionobjectstable). + + +### pipelines.\.ingestion_definition.objects.report + +**`Type: Map`** + +Select a specific source report. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination_catalog` + - String + - Required. Destination catalog to store table. + + * - `destination_schema` + - String + - Required. Destination schema to store table. + + * - `destination_table` + - String + - Required. Destination table name. The pipeline fails if a table with that name already exists. + + * - `source_url` + - String + - Required. Report URL in the source system. + + * - `table_configuration` + - Map + - Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object. See [_](#pipelinesnameingestion_definitionobjectsreporttable_configuration). + + +### pipelines.\.ingestion_definition.objects.report.table_configuration + +**`Type: Map`** + +Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `primary_keys` + - Sequence + - The primary key of the table used to apply changes. + + * - `salesforce_include_formula_fields` + - Boolean + - If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector + + * - `scd_type` + - String + - The SCD type to use to ingest the table. + + * - `sequence_by` + - Sequence + - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. + + +### pipelines.\.ingestion_definition.objects.schema + +**`Type: Map`** + +Select all tables from a specific source schema. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination_catalog` + - String + - Required. Destination catalog to store tables. + + * - `destination_schema` + - String + - Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists. + + * - `source_catalog` + - String + - The source catalog name. Might be optional depending on the type of source. + + * - `source_schema` + - String + - Required. Schema name in the source database. + + * - `table_configuration` + - Map + - Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object. See [_](#pipelinesnameingestion_definitionobjectsschematable_configuration). + + +### pipelines.\.ingestion_definition.objects.schema.table_configuration + +**`Type: Map`** + +Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `primary_keys` + - Sequence + - The primary key of the table used to apply changes. + + * - `salesforce_include_formula_fields` + - Boolean + - If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector + + * - `scd_type` + - String + - The SCD type to use to ingest the table. + + * - `sequence_by` + - Sequence + - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. + + +### pipelines.\.ingestion_definition.objects.table + +**`Type: Map`** + +Select a specific source table. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `destination_catalog` + - String + - Required. Destination catalog to store table. + + * - `destination_schema` + - String + - Required. Destination schema to store table. + + * - `destination_table` + - String + - Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used. + + * - `source_catalog` + - String + - Source catalog name. Might be optional depending on the type of source. + + * - `source_schema` + - String + - Schema name in the source database. Might be optional depending on the type of source. + + * - `source_table` + - String + - Required. Table name in the source database. + + * - `table_configuration` + - Map + - Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. See [_](#pipelinesnameingestion_definitionobjectstabletable_configuration). + + +### pipelines.\.ingestion_definition.objects.table.table_configuration + +**`Type: Map`** + +Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `primary_keys` + - Sequence + - The primary key of the table used to apply changes. + + * - `salesforce_include_formula_fields` + - Boolean + - If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector + + * - `scd_type` + - String + - The SCD type to use to ingest the table. + + * - `sequence_by` + - Sequence + - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. + + +### pipelines.\.ingestion_definition.table_configuration + +**`Type: Map`** + +Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `primary_keys` + - Sequence + - The primary key of the table used to apply changes. + + * - `salesforce_include_formula_fields` + - Boolean + - If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector + + * - `scd_type` + - String + - The SCD type to use to ingest the table. + + * - `sequence_by` + - Sequence + - The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. + + +### pipelines.\.libraries + +**`Type: Sequence`** + +Libraries or code needed by this deployment. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `file` + - Map + - The path to a file that defines a pipeline and is stored in the Databricks Repos. . See [_](#pipelinesnamelibrariesfile). + + * - `jar` + - String + - URI of the jar to be installed. Currently only DBFS is supported. + + * - `maven` + - Map + - Specification of a maven library to be installed. . See [_](#pipelinesnamelibrariesmaven). + + * - `notebook` + - Map + - The path to a notebook that defines a pipeline and is stored in the Databricks workspace. . See [_](#pipelinesnamelibrariesnotebook). + + * - `whl` + - String + - URI of the whl to be installed. + + +### pipelines.\.libraries.file + +**`Type: Map`** + +The path to a file that defines a pipeline and is stored in the Databricks Repos. + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `path` + - String + - The absolute path of the file. + + +### pipelines.\.libraries.maven + +**`Type: Map`** + +Specification of a maven library to be installed. + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `coordinates` + - String + - Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". + + * - `exclusions` + - Sequence + - List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. Maven dependency exclusions: https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. + + * - `repo` + - String + - Maven repo to install the Maven package from. If omitted, both Maven Central Repository and Spark Packages are searched. + + +### pipelines.\.libraries.notebook + +**`Type: Map`** + +The path to a notebook that defines a pipeline and is stored in the Databricks workspace. + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `path` + - String + - The absolute path of the notebook. + + +### pipelines.\.notifications + +**`Type: Sequence`** + +List of notification settings for this pipeline. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `alerts` + - Sequence + - A list of alerts that trigger the sending of notifications to the configured destinations. The supported alerts are: * `on-update-success`: A pipeline update completes successfully. * `on-update-failure`: Each time a pipeline update fails. * `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error. * `on-flow-failure`: A single data flow fails. + + * - `email_recipients` + - Sequence + - A list of email addresses notified when a configured alert is triggered. + + +### pipelines.\.permissions + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `group_name` + - String + - The name of the group that has the permission set in level. + + * - `level` + - String + - The allowed permission for user, group, service principal defined for this permission. + + * - `service_principal_name` + - String + - The name of the service principal that has the permission set in level. + + * - `user_name` + - String + - The name of the user that has the permission set in level. + + +### pipelines.\.restart_window + +**`Type: Map`** + +Restart window of this pipeline. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `days_of_week` + - Sequence + - Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). If not specified all days of the week will be used. + + * - `start_hour` + - Integer + - An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. Continuous pipeline restart is triggered only within a five-hour window starting at this hour. + + * - `time_zone_id` + - String + - Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. + + +### pipelines.\.restart_window.days_of_week + +**`Type: Sequence`** + +Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). +If not specified all days of the week will be used. + + +### pipelines.\.run_as + +**`Type: Map`** + +Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline. + +Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `service_principal_name` + - String + - Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + + * - `user_name` + - String + - The email of an active workspace user. Users can only set this field to their own email. + + +### pipelines.\.trigger + +**`Type: Map`** + +Which pipeline trigger to use. Deprecated: Use `continuous` instead. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `cron` + - Map + - See [_](#pipelinesnametriggercron). + + * - `manual` + - Map + - + + +### pipelines.\.trigger.cron + +**`Type: Map`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `quartz_cron_schedule` + - String + - + + * - `timezone_id` + - String + - + + +### pipelines.\.trigger.manual + +**`Type: Map`** + + + + +## quality_monitors + +**`Type: Map`** + +The quality_monitor resource allows you to define a [table monitor](/api/workspace/qualitymonitors/create). For information about monitors, see [_](/machine-learning/model-serving/monitor-diagnose-endpoints.md). + +```yaml +quality_monitors: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `assets_dir` + - String + - The directory to store monitoring assets (e.g. dashboard, metric tables). + + * - `baseline_table_name` + - String + - Name of the baseline table from which drift metrics are computed from. Columns in the monitored table should also be present in the baseline table. + + * - `custom_metrics` + - Sequence + - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). . See [_](#quality_monitorsnamecustom_metrics). + + * - `data_classification_config` + - Map + - The data classification config for the monitor. See [_](#quality_monitorsnamedata_classification_config). + + * - `inference_log` + - Map + - Configuration for monitoring inference logs. See [_](#quality_monitorsnameinference_log). + + * - `notifications` + - Map + - The notification settings for the monitor. See [_](#quality_monitorsnamenotifications). + + * - `output_schema_name` + - String + - Schema where output metric tables are created. + + * - `schedule` + - Map + - The schedule for automatically updating and refreshing metric tables. See [_](#quality_monitorsnameschedule). + + * - `skip_builtin_dashboard` + - Boolean + - Whether to skip creating a default dashboard summarizing data quality metrics. + + * - `slicing_exprs` + - Sequence + - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. + + * - `snapshot` + - Map + - Configuration for monitoring snapshot tables. + + * - `table_name` + - String + - + + * - `time_series` + - Map + - Configuration for monitoring time series tables. See [_](#quality_monitorsnametime_series). + + * - `warehouse_id` + - String + - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. + + +**Example** + +The following example defines a quality monitor: + +```yaml +resources: + quality_monitors: + my_quality_monitor: + table_name: dev.mlops_schema.predictions + output_schema_name: ${bundle.target}.mlops_schema + assets_dir: /Users/${workspace.current_user.userName}/databricks_lakehouse_monitoring + inference_log: + granularities: [1 day] + model_id_col: model_id + prediction_col: prediction + label_col: price + problem_type: PROBLEM_TYPE_REGRESSION + timestamp_col: timestamp + schedule: + quartz_cron_expression: 0 0 8 * * ? # Run Every day at 8am + timezone_id: UTC +``` + +### quality_monitors.\.custom_metrics + +**`Type: Sequence`** + +Custom metrics to compute on the monitored table. These can be aggregate metrics, derived +metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time +windows). + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `definition` + - String + - Jinja template for a SQL expression that specifies how to compute the metric. See [create metric definition](https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition). + + * - `input_columns` + - Sequence + - A list of column names in the input table the metric should be computed for. Can use ``":table"`` to indicate that the metric needs information from multiple columns. + + * - `name` + - String + - Name of the metric in the output tables. + + * - `output_data_type` + - String + - The output type of the custom metric. + + * - `type` + - String + - Can only be one of ``"CUSTOM_METRIC_TYPE_AGGREGATE"``, ``"CUSTOM_METRIC_TYPE_DERIVED"``, or ``"CUSTOM_METRIC_TYPE_DRIFT"``. The ``"CUSTOM_METRIC_TYPE_AGGREGATE"`` and ``"CUSTOM_METRIC_TYPE_DERIVED"`` metrics are computed on a single table, whereas the ``"CUSTOM_METRIC_TYPE_DRIFT"`` compare metrics across baseline and input table, or across the two consecutive time windows. - CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table - CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics - CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics + + +### quality_monitors.\.data_classification_config + +**`Type: Map`** + +The data classification config for the monitor. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `enabled` + - Boolean + - Whether data classification is enabled. + + +### quality_monitors.\.inference_log + +**`Type: Map`** + +Configuration for monitoring inference logs. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `granularities` + - Sequence + - Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. + + * - `label_col` + - String + - Optional column that contains the ground truth for the prediction. + + * - `model_id_col` + - String + - Column that contains the id of the model generating the predictions. Metrics will be computed per model id by default, and also across all model ids. + + * - `prediction_col` + - String + - Column that contains the output/prediction from the model. + + * - `prediction_proba_col` + - String + - Optional column that contains the prediction probabilities for each class in a classification problem type. The values in this column should be a map, mapping each class label to the prediction probability for a given sample. The map should be of PySpark MapType(). + + * - `problem_type` + - String + - Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. + + * - `timestamp_col` + - String + - Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). + + +### quality_monitors.\.notifications + +**`Type: Map`** + +The notification settings for the monitor. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `on_failure` + - Map + - Who to send notifications to on monitor failure. See [_](#quality_monitorsnamenotificationson_failure). + + * - `on_new_classification_tag_detected` + - Map + - Who to send notifications to when new data classification tags are detected. See [_](#quality_monitorsnamenotificationson_new_classification_tag_detected). + + +### quality_monitors.\.notifications.on_failure + +**`Type: Map`** + +Who to send notifications to on monitor failure. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `email_addresses` + - Sequence + - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. + + +### quality_monitors.\.notifications.on_new_classification_tag_detected + +**`Type: Map`** + +Who to send notifications to when new data classification tags are detected. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `email_addresses` + - Sequence + - The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. + + +### quality_monitors.\.schedule + +**`Type: Map`** + +The schedule for automatically updating and refreshing metric tables. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `pause_status` + - String + - Read only field that indicates whether a schedule is paused or not. + + * - `quartz_cron_expression` + - String + - The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). + + * - `timezone_id` + - String + - The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. + + +### quality_monitors.\.snapshot + +**`Type: Map`** + +Configuration for monitoring snapshot tables. + + +### quality_monitors.\.time_series + +**`Type: Map`** + +Configuration for monitoring time series tables. + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `granularities` + - Sequence + - Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. + + * - `timestamp_col` + - String + - Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). + + +## registered_models + +**`Type: Map`** + +The registered model resource allows you to define models in . For information about [registered models](/api/workspace/registeredmodels/create), see [_](/machine-learning/manage-model-lifecycle/index.md). + +```yaml +registered_models: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `catalog_name` + - String + - The name of the catalog where the schema and the registered model reside + + * - `comment` + - String + - The comment attached to the registered model + + * - `grants` + - Sequence + - See [_](#registered_modelsnamegrants). + + * - `name` + - String + - The name of the registered model + + * - `schema_name` + - String + - The name of the schema where the registered model resides + + * - `storage_location` + - String + - The storage location on the cloud under which model version data files are stored + + +**Example** + +The following example defines a registered model in : + +```yaml +resources: + registered_models: + model: + name: my_model + catalog_name: ${bundle.target} + schema_name: mlops_schema + comment: Registered model in Unity Catalog for ${bundle.target} deployment target + grants: + - privileges: + - EXECUTE + principal: account users +``` + +### registered_models.\.grants + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `principal` + - String + - The name of the principal that will be granted privileges + + * - `privileges` + - Sequence + - The privileges to grant to the specified entity + + +## schemas + +**`Type: Map`** + +The schema resource type allows you to define [schemas](/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: + +- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema. +- Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](/api/workspace/schemas/update). + +```yaml +schemas: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `catalog_name` + - String + - Name of parent catalog. + + * - `comment` + - String + - User-provided free-form text description. + + * - `grants` + - Sequence + - See [_](#schemasnamegrants). + + * - `name` + - String + - Name of schema, relative to parent catalog. + + * - `properties` + - Map + - + + * - `storage_root` + - String + - Storage root URL for managed tables within schema. + + +**Example** + +The following example defines a pipeline with the resource key `my_pipeline` that creates a schema with the key `my_schema` as the target: + +```yaml +resources: + pipelines: + my_pipeline: + name: test-pipeline-{{.unique_id}} + libraries: + - notebook: + path: ./nb.sql + development: true + catalog: main + target: ${resources.schemas.my_schema.id} + + schemas: + my_schema: + name: test-schema-{{.unique_id}} + catalog_name: main + comment: This schema was created by DABs. +``` + +A top-level grants mapping is not supported by , so if you want to set grants for a schema, define the grants for the schema within the `schemas` mapping. For more information about grants, see [_](/data-governance/unity-catalog/manage-privileges/index.md#grant). + +The following example defines a schema with grants: + +```yaml +resources: + schemas: + my_schema: + name: test-schema + grants: + - principal: users + privileges: + - CAN_MANAGE + - principal: my_team + privileges: + - CAN_READ + catalog_name: main + ``` + +### schemas.\.grants + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `principal` + - String + - The name of the principal that will be granted privileges + + * - `privileges` + - Sequence + - The privileges to grant to the specified entity + + +## volumes + +**`Type: Map`** + +The volume resource type allows you to define and create [volumes](/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that: + +- A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path` in subsequent deployments. + +- Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development` configured. However, you can manually configure this prefix. See [_](/dev-tools/bundles/deployment-modes.md#custom-presets). + +```yaml +volumes: + : + : +``` + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `catalog_name` + - String + - The name of the catalog where the schema and the volume are + + * - `comment` + - String + - The comment attached to the volume + + * - `grants` + - Sequence + - See [_](#volumesnamegrants). + + * - `name` + - String + - The name of the volume + + * - `schema_name` + - String + - The name of the schema where the volume is + + * - `storage_location` + - String + - The storage location on the cloud + + * - `volume_type` + - String + - + + +**Example** + +The following example creates a volume with the key `my_volume`: + +```yaml +resources: + volumes: + my_volume: + catalog_name: main + name: my_volume + schema_name: my_schema +``` + +For an example bundle that runs a job that writes to a file in volume, see the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples/tree/main/knowledge_base/write_from_job_to_volume). + +### volumes.\.grants + +**`Type: Sequence`** + + + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `principal` + - String + - The name of the principal that will be granted privileges + + * - `privileges` + - Sequence + - The privileges to grant to the specified entity + \ No newline at end of file diff --git a/bundle/docsgen/refs.go b/bundle/docsgen/refs.go new file mode 100644 index 000000000..7a4451129 --- /dev/null +++ b/bundle/docsgen/refs.go @@ -0,0 +1,105 @@ +package main + +import ( + "log" + "strings" + + "github.com/databricks/cli/libs/jsonschema" +) + +func isReferenceType(v *jsonschema.Schema, refs map[string]*jsonschema.Schema, ownFields map[string]bool) bool { + if v.Type != "object" && v.Type != "array" { + return false + } + if len(v.Properties) > 0 { + return true + } + if v.Items != nil { + items := resolveRefs(v.Items, refs) + if items != nil && items.Type == "object" { + return true + } + } + props := resolveAdditionalProperties(v) + if !isInOwnFields(props, ownFields) { + return false + } + if props != nil { + propsResolved := resolveRefs(props, refs) + return propsResolved.Type == "object" + } + + return false +} + +func isInOwnFields(node *jsonschema.Schema, ownFields map[string]bool) bool { + if node != nil && node.Reference != nil { + return ownFields[getRefType(node)] + } + return true +} + +func resolveAdditionalProperties(v *jsonschema.Schema) *jsonschema.Schema { + if v.AdditionalProperties == nil { + return nil + } + additionalProps, ok := v.AdditionalProperties.(*jsonschema.Schema) + if !ok { + return nil + } + return additionalProps +} + +func resolveRefs(s *jsonschema.Schema, schemas map[string]*jsonschema.Schema) *jsonschema.Schema { + if s == nil { + return nil + } + + node := s + description := s.Description + markdownDescription := s.MarkdownDescription + examples := getExamples(s.Examples) + + for node.Reference != nil { + ref := getRefType(node) + newNode, ok := schemas[ref] + if !ok { + log.Printf("schema %s not found", ref) + break + } + + if description == "" { + description = newNode.Description + } + if markdownDescription == "" { + markdownDescription = newNode.MarkdownDescription + } + if len(examples) == 0 { + examples = getExamples(newNode.Examples) + } + + node = newNode + } + + newNode := *node + newNode.Description = description + newNode.MarkdownDescription = markdownDescription + newNode.Examples = examples + + return &newNode +} + +func getExamples(examples any) []string { + typedExamples, ok := examples.([]string) + if !ok { + return []string{} + } + return typedExamples +} + +func getRefType(node *jsonschema.Schema) string { + if node.Reference == nil { + return "" + } + return strings.TrimPrefix(*node.Reference, "#/$defs/") +} diff --git a/bundle/docsgen/renderer.go b/bundle/docsgen/renderer.go new file mode 100644 index 000000000..5f6c77258 --- /dev/null +++ b/bundle/docsgen/renderer.go @@ -0,0 +1,51 @@ +package main + +import ( + "fmt" + "runtime" + "strings" +) + +type markdownRenderer struct { + nodes []string +} + +func newMardownRenderer() *markdownRenderer { + return &markdownRenderer{} +} + +func (m *markdownRenderer) add(s string) *markdownRenderer { + m.nodes = append(m.nodes, s) + return m +} + +func (m *markdownRenderer) PlainText(s string) *markdownRenderer { + return m.add(s) +} + +func (m *markdownRenderer) LF() *markdownRenderer { + return m.add(" ") +} + +func (m *markdownRenderer) H2(s string) *markdownRenderer { + return m.add("## " + s) +} + +func (m *markdownRenderer) H3(s string) *markdownRenderer { + return m.add("### " + s) +} + +func (m *markdownRenderer) CodeBlocks(lang, s string) *markdownRenderer { + return m.add(fmt.Sprintf("```%s%s%s%s```", lang, lineFeed(), s, lineFeed())) +} + +func (m *markdownRenderer) String() string { + return strings.Join(m.nodes, lineFeed()) +} + +func lineFeed() string { + if runtime.GOOS == "windows" { + return "\r\n" + } + return "\n" +} diff --git a/bundle/docsgen/templates/reference.md b/bundle/docsgen/templates/reference.md new file mode 100644 index 000000000..38072f70a --- /dev/null +++ b/bundle/docsgen/templates/reference.md @@ -0,0 +1,13 @@ +--- +description: 'Configuration reference for databricks.yml' +last_update: + date: 2025-02-14 +--- + + + +# Configuration reference + +This article provides reference for keys supported by :re[DABS] configuration (YAML). See [\_](/dev-tools/bundles/index.md). + +For complete bundle examples, see [\_](/dev-tools/bundles/resource-examples.md) and the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples). diff --git a/bundle/docsgen/templates/resources.md b/bundle/docsgen/templates/resources.md new file mode 100644 index 000000000..c6ba3cd88 --- /dev/null +++ b/bundle/docsgen/templates/resources.md @@ -0,0 +1,124 @@ +--- +description: 'Learn about resources supported by Databricks Asset Bundles and how to configure them.' +last_update: + date: 2025-02-14 +--- + + + +# :re[DABS] resources + +:re[DABS] allows you to specify information about the :re[Databricks] resources used by the bundle in the `resources` mapping in the bundle configuration. See [resources mapping](/dev-tools/bundles/settings.md#resources) and [resources key reference](/dev-tools/bundles/reference.md#resources). + +This article outlines supported resource types for bundles and provides details and an example for each supported type. For additional examples, see [\_](/dev-tools/bundles/resource-examples.md). + +:::tip + +To generate YAML for any existing resource, use the `databricks bundle generate` command. See [\_](/dev-tools/cli/bundle-commands.md#generate). + +::: + +## Supported resources + +The following table lists supported resource types for bundles. Some resources can be created by defining them in a bundle and deploying the bundle, and some resources only support referencing an existing resource to include in the bundle. + +Resources are defined using the corresponding [Databricks REST API](https://docs.databricks.com/api/workspace/introduction) object’s create operation request payload, where the object’s supported fields, expressed as YAML, are the resource’s supported properties. Links to documentation for each resource’s corresponding payloads are listed in the table. + +:::tip + +The `databricks bundle validate` command returns warnings if unknown resource properties are found in bundle configuration files. + +::: + +::::aws-azure + +:::list-table + +- - Resource + - Create support + - Corresponding REST API object +- - [app](#apps) + - ✓ + - [App object](https://docs.databricks.com/api/workspace/apps/create) +- - [cluster](#clusters) + - ✓ + - [Cluster object](https://docs.databricks.com/api/workspace/clusters/create) +- - [dashboard](#dashboards) + - + - [Dashboard object](https://docs.databricks.com/api/workspace/lakeview/create) +- - [experiment](#experiments) + - ✓ + - [Experiment object](https://docs.databricks.com/api/workspace/experiments/createexperiment) +- - [job](#job) + - ✓ + - [Job object](https://docs.databricks.com/api/workspace/jobs/create) +- - [model (legacy)](#models) + - ✓ + - [Model (legacy) object](https://docs.databricks.com/api/workspace/modelregistry/createmodel) +- - [model_serving_endpoint](#model_serving_endpoints) + - ✓ + - [Model serving endpoint object](https://docs.databricks.com/api/workspace/servingendpoints/create) +- - [pipeline](#pipeline) + - ✓ + - [Pipeline object](https://docs.databricks.com/api/workspace/pipelines/create) +- - [quality_monitor](#quality_monitors) + - ✓ + - [Quality monitor object](https://docs.databricks.com/api/workspace/qualitymonitors/create) +- - [registered_model](#registered_models) (:re[UC]) + - ✓ + - [Registered model object](https://docs.databricks.com/api/workspace/registeredmodels/create) +- - [schema](#schemas) (:re[UC]) + - ✓ + - [Schema object](https://docs.databricks.com/api/workspace/schemas/create) +- - [volume](#volumes) (:re[UC]) + - ✓ + - [Volume object](https://docs.databricks.com/api/workspace/volumes/create) + +::: + +:::: + +::::gcp + +:::list-table + +- - Resource + - Create support + - Corresponding REST API object +- - [cluster](#clusters) + - ✓ + - [Cluster object](https://docs.databricks.com/api/workspace/clusters/create) +- - [dashboard](#dashboards) + - + - [Dashboard object](https://docs.databricks.com/api/workspace/lakeview/create) +- - [experiment](#experiments) + - ✓ + - [Experiment object](https://docs.databricks.com/api/workspace/experiments/createexperiment) +- - [job](#jobs) + - ✓ + - [Job object](https://docs.databricks.com/api/workspace/jobs/create) +- - [model (legacy)](#models) + - ✓ + - [Model (legacy) object](https://docs.databricks.com/api/workspace/modelregistry/createmodel) +- - [model_serving_endpoint](#model_serving_endpoints) + - ✓ + - [Model serving endpoint object](https://docs.databricks.com/api/workspace/servingendpoints/create) +- - [pipeline](#pipelines) + - ✓ + - [Pipeline object]](https://docs.databricks.com/api/workspace/pipelines/create) +- - [quality_monitor](#quality_monitors) + - ✓ + - [Quality monitor object](https://docs.databricks.com/api/workspace/qualitymonitors/create) +- - [registered_model](#registered_models) (:re[UC]) + - ✓ + - [Registered model object](https://docs.databricks.com/api/workspace/registeredmodels/create) +- - [schema](#schemas) (:re[UC]) + - ✓ + - [Schema object](https://docs.databricks.com/api/workspace/schemas/create) +- - [volume](#volumes) (:re[UC]) + - ✓ + - [Volume object](https://docs.databricks.com/api/workspace/volumes/create) + +::: + +:::: diff --git a/bundle/docsgen/testdata/anchors.md b/bundle/docsgen/testdata/anchors.md new file mode 100644 index 000000000..0145d8cc9 --- /dev/null +++ b/bundle/docsgen/testdata/anchors.md @@ -0,0 +1,28 @@ +Header + +## some_field + +**`Type: Map`** + +This is a description + + + +.. list-table:: + :header-rows: 1 + + * - Key + - Type + - Description + + * - `my_attribute` + - Map + - Desc with link. See [_](#some_fieldnamemy_attribute). + + +### some_field.\.my_attribute + +**`Type: Boolean`** + +Another description + \ No newline at end of file diff --git a/bundle/if.go b/bundle/if.go deleted file mode 100644 index bad1d72d2..000000000 --- a/bundle/if.go +++ /dev/null @@ -1,40 +0,0 @@ -package bundle - -import ( - "context" - - "github.com/databricks/cli/libs/diag" -) - -type ifMutator struct { - condition func(context.Context, *Bundle) (bool, error) - onTrueMutator Mutator - onFalseMutator Mutator -} - -func If( - condition func(context.Context, *Bundle) (bool, error), - onTrueMutator Mutator, - onFalseMutator Mutator, -) Mutator { - return &ifMutator{ - condition, onTrueMutator, onFalseMutator, - } -} - -func (m *ifMutator) Apply(ctx context.Context, b *Bundle) diag.Diagnostics { - v, err := m.condition(ctx, b) - if err != nil { - return diag.FromErr(err) - } - - if v { - return Apply(ctx, b, m.onTrueMutator) - } else { - return Apply(ctx, b, m.onFalseMutator) - } -} - -func (m *ifMutator) Name() string { - return "If" -} diff --git a/bundle/if_test.go b/bundle/if_test.go deleted file mode 100644 index b3fc0b9d9..000000000 --- a/bundle/if_test.go +++ /dev/null @@ -1,53 +0,0 @@ -package bundle - -import ( - "context" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestIfMutatorTrue(t *testing.T) { - m1 := &testMutator{} - m2 := &testMutator{} - ifMutator := If(func(context.Context, *Bundle) (bool, error) { - return true, nil - }, m1, m2) - - b := &Bundle{} - diags := Apply(context.Background(), b, ifMutator) - assert.NoError(t, diags.Error()) - - assert.Equal(t, 1, m1.applyCalled) - assert.Equal(t, 0, m2.applyCalled) -} - -func TestIfMutatorFalse(t *testing.T) { - m1 := &testMutator{} - m2 := &testMutator{} - ifMutator := If(func(context.Context, *Bundle) (bool, error) { - return false, nil - }, m1, m2) - - b := &Bundle{} - diags := Apply(context.Background(), b, ifMutator) - assert.NoError(t, diags.Error()) - - assert.Equal(t, 0, m1.applyCalled) - assert.Equal(t, 1, m2.applyCalled) -} - -func TestIfMutatorError(t *testing.T) { - m1 := &testMutator{} - m2 := &testMutator{} - ifMutator := If(func(context.Context, *Bundle) (bool, error) { - return true, assert.AnError - }, m1, m2) - - b := &Bundle{} - diags := Apply(context.Background(), b, ifMutator) - assert.Error(t, diags.Error()) - - assert.Equal(t, 0, m1.applyCalled) - assert.Equal(t, 0, m2.applyCalled) -} diff --git a/bundle/internal/annotation/descriptor.go b/bundle/internal/annotation/descriptor.go new file mode 100644 index 000000000..26c1a0b06 --- /dev/null +++ b/bundle/internal/annotation/descriptor.go @@ -0,0 +1,12 @@ +package annotation + +type Descriptor struct { + Description string `json:"description,omitempty"` + MarkdownDescription string `json:"markdown_description,omitempty"` + Title string `json:"title,omitempty"` + Default any `json:"default,omitempty"` + Enum []any `json:"enum,omitempty"` + MarkdownExamples string `json:"markdown_examples,omitempty"` +} + +const Placeholder = "PLACEHOLDER" diff --git a/bundle/internal/annotation/file.go b/bundle/internal/annotation/file.go new file mode 100644 index 000000000..0317f441a --- /dev/null +++ b/bundle/internal/annotation/file.go @@ -0,0 +1,44 @@ +package annotation + +import ( + "bytes" + "os" + + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/dyn/merge" + "github.com/databricks/cli/libs/dyn/yamlloader" +) + +// Parsed file with annotations, expected format: +// github.com/databricks/cli/bundle/config.Bundle: +// +// cluster_id: +// description: "Description" +type File map[string]map[string]Descriptor + +func LoadAndMerge(sources []string) (File, error) { + prev := dyn.NilValue + for _, path := range sources { + b, err := os.ReadFile(path) + if err != nil { + return nil, err + } + generated, err := yamlloader.LoadYAML(path, bytes.NewBuffer(b)) + if err != nil { + return nil, err + } + prev, err = merge.Merge(prev, generated) + if err != nil { + return nil, err + } + } + + var data File + + err := convert.ToTyped(&data, prev) + if err != nil { + return nil, err + } + return data, nil +} diff --git a/bundle/internal/schema/annotations.go b/bundle/internal/schema/annotations.go index 91aaa4555..ee3c25ca1 100644 --- a/bundle/internal/schema/annotations.go +++ b/bundle/internal/schema/annotations.go @@ -11,6 +11,7 @@ import ( yaml3 "gopkg.in/yaml.v3" + "github.com/databricks/cli/bundle/internal/annotation" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/convert" "github.com/databricks/cli/libs/dyn/merge" @@ -19,60 +20,23 @@ import ( "github.com/databricks/cli/libs/jsonschema" ) -type annotation struct { - Description string `json:"description,omitempty"` - MarkdownDescription string `json:"markdown_description,omitempty"` - Title string `json:"title,omitempty"` - Default any `json:"default,omitempty"` - Enum []any `json:"enum,omitempty"` -} - type annotationHandler struct { // Annotations read from all annotation files including all overrides - parsedAnnotations annotationFile + parsedAnnotations annotation.File // Missing annotations for fields that are found in config that need to be added to the annotation file - missingAnnotations annotationFile + missingAnnotations annotation.File } -/** - * Parsed file with annotations, expected format: - * github.com/databricks/cli/bundle/config.Bundle: - * cluster_id: - * description: "Description" - */ -type annotationFile map[string]map[string]annotation - -const Placeholder = "PLACEHOLDER" - // Adds annotations to the JSON schema reading from the annotation files. // More details https://json-schema.org/understanding-json-schema/reference/annotations func newAnnotationHandler(sources []string) (*annotationHandler, error) { - prev := dyn.NilValue - for _, path := range sources { - b, err := os.ReadFile(path) - if err != nil { - return nil, err - } - generated, err := yamlloader.LoadYAML(path, bytes.NewBuffer(b)) - if err != nil { - return nil, err - } - prev, err = merge.Merge(prev, generated) - if err != nil { - return nil, err - } - } - - var data annotationFile - - err := convert.ToTyped(&data, prev) + data, err := annotation.LoadAndMerge(sources) if err != nil { return nil, err } - d := &annotationHandler{} d.parsedAnnotations = data - d.missingAnnotations = annotationFile{} + d.missingAnnotations = annotation.File{} return d, nil } @@ -85,7 +49,7 @@ func (d *annotationHandler) addAnnotations(typ reflect.Type, s jsonschema.Schema annotations := d.parsedAnnotations[refPath] if annotations == nil { - annotations = map[string]annotation{} + annotations = map[string]annotation.Descriptor{} } rootTypeAnnotation, ok := annotations[RootTypeKey] @@ -96,11 +60,11 @@ func (d *annotationHandler) addAnnotations(typ reflect.Type, s jsonschema.Schema for k, v := range s.Properties { item := annotations[k] if item.Description == "" { - item.Description = Placeholder + item.Description = annotation.Placeholder emptyAnnotations := d.missingAnnotations[refPath] if emptyAnnotations == nil { - emptyAnnotations = map[string]annotation{} + emptyAnnotations = map[string]annotation.Descriptor{} d.missingAnnotations[refPath] = emptyAnnotations } emptyAnnotations[k] = item @@ -124,7 +88,7 @@ func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error for k := range d.missingAnnotations { if !isCliPath(k) { delete(d.missingAnnotations, k) - fmt.Printf("Missing annotations for `%s` that are not in CLI package, try to fetch latest OpenAPI spec and regenerate annotations", k) + fmt.Printf("Missing annotations for `%s` that are not in CLI package, try to fetch latest OpenAPI spec and regenerate annotations\n", k) } } @@ -138,7 +102,7 @@ func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error return err } - var outputTyped annotationFile + var outputTyped annotation.File err = convert.ToTyped(&outputTyped, output) if err != nil { return err @@ -155,8 +119,8 @@ func getPath(typ reflect.Type) string { return typ.PkgPath() + "." + typ.Name() } -func assignAnnotation(s *jsonschema.Schema, a annotation) { - if a.Description != Placeholder { +func assignAnnotation(s *jsonschema.Schema, a annotation.Descriptor) { + if a.Description != annotation.Placeholder { s.Description = a.Description } @@ -168,7 +132,7 @@ func assignAnnotation(s *jsonschema.Schema, a annotation) { s.Enum = a.Enum } -func saveYamlWithStyle(outputPath string, annotations annotationFile) error { +func saveYamlWithStyle(outputPath string, annotations annotation.File) error { annotationOrder := yamlsaver.NewOrder([]string{"description", "markdown_description", "title", "default", "enum"}) style := map[string]yaml3.Style{} @@ -220,15 +184,17 @@ func convertLinksToAbsoluteUrl(s string) string { referencePage := "/dev-tools/bundles/reference.html" // Regular expression to match Markdown-style links like [_](link) - re := regexp.MustCompile(`\[_\]\(([^)]+)\)`) + re := regexp.MustCompile(`\[(.*?)\]\((.*?)\)`) result := re.ReplaceAllStringFunc(s, func(match string) string { matches := re.FindStringSubmatch(match) if len(matches) < 2 { return match } - link := matches[1] - var text, absoluteURL string + originalText := matches[1] + link := matches[2] + + var text, absoluteURL string if strings.HasPrefix(link, "#") { text = strings.TrimPrefix(link, "#") absoluteURL = fmt.Sprintf("%s%s%s", base, referencePage, link) @@ -246,6 +212,10 @@ func convertLinksToAbsoluteUrl(s string) string { return match } + if originalText != "_" { + text = originalText + } + return fmt.Sprintf("[%s](%s)", text, absoluteURL) }) diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 28d29798a..e658f6e53 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -1,31 +1,25 @@ github.com/databricks/cli/bundle/config.Artifact: "build": "description": |- - An optional set of non-default build commands that you want to run locally before deployment. - - For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment. - - To specify multiple build commands, separate each command with double-ampersand (&&) characters. + An optional set of build commands to run locally before deployment. "executable": "description": |- - The executable type. + The executable type. Valid values are `bash`, `sh`, and `cmd`. "files": "description": |- - The source files for the artifact. - "markdown_description": |- - The source files for the artifact, defined as an [_](#artifact_file). + The relative or absolute path to the built artifact files. "path": "description": |- - The location where the built artifact will be saved. + The local path of the directory for the artifact. "type": "description": |- - The type of the artifact. + Required if the artifact is a Python wheel. The type of the artifact. Valid values are `whl` and `jar`. "markdown_description": |- - The type of the artifact. Valid values are `wheel` or `jar` + Required if the artifact is a Python wheel. The type of the artifact. Valid values are `whl` and `jar`. github.com/databricks/cli/bundle/config.ArtifactFile: "source": "description": |- - The path of the files used to build the artifact. + Required. The artifact source file. github.com/databricks/cli/bundle/config.Bundle: "cluster_id": "description": |- @@ -34,7 +28,7 @@ github.com/databricks/cli/bundle/config.Bundle: The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id). "compute_id": "description": |- - PLACEHOLDER + Deprecated. The ID of the compute to use to run the bundle. "databricks_cli_version": "description": |- The Databricks CLI version to use for the bundle. @@ -44,18 +38,18 @@ github.com/databricks/cli/bundle/config.Bundle: "description": |- The definition of the bundle deployment "markdown_description": |- - The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md). + The definition of the bundle deployment. For supported attributes see [_](/dev-tools/bundles/deployment-modes.md). "git": "description": |- The Git version control details that are associated with your bundle. "markdown_description": |- - The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git). + The Git version control details that are associated with your bundle. For supported attributes see [_](/dev-tools/bundles/settings.md#git). "name": "description": |- The name of the bundle. "uuid": "description": |- - PLACEHOLDER + Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command). github.com/databricks/cli/bundle/config.Deployment: "fail_on_active_runs": "description": |- @@ -63,8 +57,6 @@ github.com/databricks/cli/bundle/config.Deployment: "lock": "description": |- The deployment lock attributes. - "markdown_description": |- - The deployment lock attributes. See [_](#lock). github.com/databricks/cli/bundle/config.Experimental: "pydabs": "description": |- @@ -74,13 +66,13 @@ github.com/databricks/cli/bundle/config.Experimental: Configures loading of Python code defined with 'databricks-bundles' package. "python_wheel_wrapper": "description": |- - Whether to use a Python wheel wrapper + Whether to use a Python wheel wrapper. "scripts": "description": |- - The commands to run + The commands to run. "use_legacy_run_as": "description": |- - Whether to use the legacy run_as behavior + Whether to use the legacy run_as behavior. github.com/databricks/cli/bundle/config.Git: "branch": "description": |- @@ -149,69 +141,85 @@ github.com/databricks/cli/bundle/config.Python: github.com/databricks/cli/bundle/config.Resources: "apps": "description": |- - PLACEHOLDER + The app resource defines a Databricks app. + "markdown_description": |- + The app resource defines a [Databricks app](/api/workspace/apps/create). For information about Databricks Apps, see [_](/dev-tools/databricks-apps/index.md). "clusters": "description": |- - The cluster definitions for the bundle. + The cluster definitions for the bundle, where each key is the name of a cluster. "markdown_description": |- - The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster) + The cluster definitions for the bundle, where each key is the name of a cluster. See [_](/dev-tools/bundles/resources.md#clusters). "dashboards": "description": |- - The dashboard definitions for the bundle. + The dashboard definitions for the bundle, where each key is the name of the dashboard. "markdown_description": |- - The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard) + The dashboard definitions for the bundle, where each key is the name of the dashboard. See [_](/dev-tools/bundles/resources.md#dashboards). "experiments": "description": |- - The experiment definitions for the bundle. + The experiment definitions for the bundle, where each key is the name of the experiment. "markdown_description": |- - The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment) + The experiment definitions for the bundle, where each key is the name of the experiment. See [_](/dev-tools/bundles/resources.md#experiments). "jobs": "description": |- - The job definitions for the bundle. + The job definitions for the bundle, where each key is the name of the job. "markdown_description": |- - The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job) + The job definitions for the bundle, where each key is the name of the job. See [_](/dev-tools/bundles/resources.md#jobs). "model_serving_endpoints": "description": |- - The model serving endpoint definitions for the bundle. + The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. "markdown_description": |- - The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint) + The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. See [_](/dev-tools/bundles/resources.md#model_serving_endpoints). "models": "description": |- - The model definitions for the bundle. + The model definitions for the bundle, where each key is the name of the model. "markdown_description": |- - The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model) + The model definitions for the bundle, where each key is the name of the model. See [_](/dev-tools/bundles/resources.md#models). "pipelines": "description": |- - The pipeline definitions for the bundle. + The pipeline definitions for the bundle, where each key is the name of the pipeline. "markdown_description": |- - The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline) + The pipeline definitions for the bundle, where each key is the name of the pipeline. See [_](/dev-tools/bundles/resources.md#pipelines). "quality_monitors": "description": |- - The quality monitor definitions for the bundle. + The quality monitor definitions for the bundle, where each key is the name of the quality monitor. "markdown_description": |- - The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor) + The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [_](/dev-tools/bundles/resources.md#quality_monitors). "registered_models": "description": |- - The registered model definitions for the bundle. + The registered model definitions for the bundle, where each key is the name of the registered model. "markdown_description": |- - The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model) + The registered model definitions for the bundle, where each key is the name of the registered model. See [_](/dev-tools/bundles/resources.md#registered_models). "schemas": "description": |- - The schema definitions for the bundle. + The schema definitions for the bundle, where each key is the name of the schema. "markdown_description": |- - The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema) + The schema definitions for the bundle, where each key is the name of the schema. See [_](/dev-tools/bundles/resources.md#schemas). "volumes": "description": |- - PLACEHOLDER + The volume definitions for the bundle, where each key is the name of the volume. + "markdown_description": |- + The volume definitions for the bundle, where each key is the name of the volume. See [_](/dev-tools/bundles/resources.md#volumes). github.com/databricks/cli/bundle/config.Root: "artifacts": "description": |- Defines the attributes to build an artifact + "markdown_description": |- + Defines the attributes to build artifacts, where each key is the name of the artifact, and the value is a Map that defines the artifact build settings. For information about the `artifacts` mapping, see [_](/dev-tools/bundles/settings.md#artifacts). + + Artifact settings defined in the top level of the bundle configuration can be overridden in the `targets` mapping. See [_](/dev-tools/bundles/artifact-overrides.md). + "markdown_examples": |- + ```yaml + artifacts: + default: + type: whl + build: poetry build + path: . + ``` "bundle": "description": |- - The attributes of the bundle. + The bundle attributes when deploying to this target. "markdown_description": |- - The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle) + The bundle attributes when deploying to this target, "experimental": "description": |- Defines attributes for experimental features. @@ -219,12 +227,24 @@ github.com/databricks/cli/bundle/config.Root: "description": |- Specifies a list of path globs that contain configuration files to include within the bundle. "markdown_description": |- - Specifies a list of path globs that contain configuration files to include within the bundle. See [_](/dev-tools/bundles/settings.md#include) + Specifies a list of path globs that contain configuration files to include within the bundle. See [_](/dev-tools/bundles/settings.md#include). "permissions": "description": |- - Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle + Defines a permission for a specific entity. "markdown_description": |- - Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). + A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity. + + See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). + "markdown_examples": |- + ```yaml + permissions: + - level: CAN_VIEW + group_name: test-group + - level: CAN_MANAGE + user_name: someone@example.com + - level: CAN_RUN + service_principal_name: 123456-abcdef + ``` "presets": "description": |- Defines bundle deployment presets. @@ -232,26 +252,39 @@ github.com/databricks/cli/bundle/config.Root: Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.md#presets). "resources": "description": |- - Specifies information about the Databricks resources used by the bundle + A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. "markdown_description": |- - Specifies information about the Databricks resources used by the bundle. See [_](/dev-tools/bundles/resources.md). + A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. For more information about supported resources, and resource definition reference, see [_](/dev-tools/bundles/resources.md). + + ```yaml + resources: + : + : + : + ``` "run_as": "description": |- - The identity to use to run the bundle. + The identity to use when running workflows. + "markdown_description": |- + The identity to use when running workflows. See [_](/dev-tools/bundles/run-as.md). "sync": "description": |- The files and file paths to include or exclude in the bundle. "markdown_description": |- - The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/) + The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/settings.md#sync). "targets": "description": |- Defines deployment targets for the bundle. + "markdown_description": |- + Defines deployment targets for the bundle. See [_](/dev-tools/bundles/settings.md#targets) "variables": "description": |- A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable. "workspace": "description": |- Defines the Databricks workspace for the bundle. + "markdown_description": |- + Defines the Databricks workspace for the bundle. See [_](/dev-tools/bundles/settings.md#workspace). github.com/databricks/cli/bundle/config.Sync: "exclude": "description": |- @@ -266,11 +299,9 @@ github.com/databricks/cli/bundle/config.Target: "artifacts": "description": |- The artifacts to include in the target deployment. - "markdown_description": |- - The artifacts to include in the target deployment. See [_](#artifact) "bundle": "description": |- - The name of the bundle when deploying to this target. + The bundle attributes when deploying to this target. "cluster_id": "description": |- The ID of the cluster to use for this target. @@ -283,8 +314,6 @@ github.com/databricks/cli/bundle/config.Target: "git": "description": |- The Git version control settings for the target. - "markdown_description": |- - The Git version control settings for the target. See [_](#git). "mode": "description": |- The deployment mode for the target. @@ -293,38 +322,26 @@ github.com/databricks/cli/bundle/config.Target: "permissions": "description": |- The permissions for deploying and running the bundle in the target. - "markdown_description": |- - The permissions for deploying and running the bundle in the target. See [_](#permission). "presets": "description": |- The deployment presets for the target. - "markdown_description": |- - The deployment presets for the target. See [_](#preset). "resources": "description": |- The resource definitions for the target. - "markdown_description": |- - The resource definitions for the target. See [_](#resources). "run_as": "description": |- The identity to use to run the bundle. "markdown_description": |- - The identity to use to run the bundle. See [_](#job_run_as) and [_](/dev-tools/bundles/run_as.md). + The identity to use to run the bundle, see [_](/dev-tools/bundles/run-as.md). "sync": "description": |- The local paths to sync to the target workspace when a bundle is run or deployed. - "markdown_description": |- - The local paths to sync to the target workspace when a bundle is run or deployed. See [_](#sync). "variables": "description": |- The custom variable definitions for the target. - "markdown_description": |- - The custom variable definitions for the target. See [_](/dev-tools/bundles/settings.md#variables) and [_](/dev-tools/bundles/variables.md). "workspace": "description": |- The Databricks workspace for the target. - "markdown_description": |- - The Databricks workspace for the target. [_](#workspace) github.com/databricks/cli/bundle/config.Workspace: "artifact_path": "description": |- @@ -374,64 +391,6 @@ github.com/databricks/cli/bundle/config.Workspace: "state_path": "description": |- The workspace state path -github.com/databricks/cli/bundle/config/resources.App: - "active_deployment": - "description": |- - PLACEHOLDER - "app_status": - "description": |- - PLACEHOLDER - "compute_status": - "description": |- - PLACEHOLDER - "config": - "description": |- - PLACEHOLDER - "create_time": - "description": |- - PLACEHOLDER - "creator": - "description": |- - PLACEHOLDER - "default_source_code_path": - "description": |- - PLACEHOLDER - "description": - "description": |- - PLACEHOLDER - "name": - "description": |- - PLACEHOLDER - "pending_deployment": - "description": |- - PLACEHOLDER - "permissions": - "description": |- - PLACEHOLDER - "resources": - "description": |- - PLACEHOLDER - "service_principal_client_id": - "description": |- - PLACEHOLDER - "service_principal_id": - "description": |- - PLACEHOLDER - "service_principal_name": - "description": |- - PLACEHOLDER - "source_code_path": - "description": |- - PLACEHOLDER - "update_time": - "description": |- - PLACEHOLDER - "updater": - "description": |- - PLACEHOLDER - "url": - "description": |- - PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Grant: "principal": "description": |- @@ -440,6 +399,11 @@ github.com/databricks/cli/bundle/config/resources.Grant: "description": |- The privileges to grant to the specified entity github.com/databricks/cli/bundle/config/resources.Permission: + "-": + "description": |- + Defines a permission for a specific entity. + "markdown_description": |- + Defines a permission for a specific entity. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md). "group_name": "description": |- The name of the group that has the permission set in level. @@ -455,44 +419,44 @@ github.com/databricks/cli/bundle/config/resources.Permission: github.com/databricks/cli/bundle/config/variable.Lookup: "alert": "description": |- - PLACEHOLDER + The name of the alert for which to retrieve an ID. "cluster": "description": |- - PLACEHOLDER + The name of the cluster for which to retrieve an ID. "cluster_policy": "description": |- - PLACEHOLDER + The name of the cluster_policy for which to retrieve an ID. "dashboard": "description": |- - PLACEHOLDER + The name of the dashboard for which to retrieve an ID. "instance_pool": "description": |- - PLACEHOLDER + The name of the instance_pool for which to retrieve an ID. "job": "description": |- - PLACEHOLDER + The name of the job for which to retrieve an ID. "metastore": "description": |- - PLACEHOLDER + The name of the metastore for which to retrieve an ID. "notification_destination": "description": |- - PLACEHOLDER + The name of the notification_destination for which to retrieve an ID. "pipeline": "description": |- - PLACEHOLDER + The name of the pipeline for which to retrieve an ID. "query": "description": |- - PLACEHOLDER + The name of the query for which to retrieve an ID. "service_principal": "description": |- - PLACEHOLDER + The name of the service_principal for which to retrieve an ID. "warehouse": "description": |- - PLACEHOLDER + The name of the warehouse for which to retrieve an ID. github.com/databricks/cli/bundle/config/variable.TargetVariable: "default": "description": |- - PLACEHOLDER + The default value for the variable. "description": "description": |- The description of the variable. @@ -506,9 +470,14 @@ github.com/databricks/cli/bundle/config/variable.TargetVariable: "description": |- The type of the variable. github.com/databricks/cli/bundle/config/variable.Variable: + "_": + "description": |- + Defines a custom variable for the bundle. + "markdown_description": |- + Defines a custom variable for the bundle. See [_](/dev-tools/bundles/settings.md#variables). "default": "description": |- - PLACEHOLDER + The default value for the variable. "description": "description": |- The description of the variable @@ -516,107 +485,14 @@ github.com/databricks/cli/bundle/config/variable.Variable: "description": |- The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. "markdown_description": |- - The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID." + The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID. "type": "description": |- The type of the variable. -github.com/databricks/databricks-sdk-go/service/apps.AppDeployment: - "create_time": +github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs: + "service_principal_name": "description": |- - PLACEHOLDER - "creator": + The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + "user_name": "description": |- - PLACEHOLDER - "deployment_artifacts": - "description": |- - PLACEHOLDER - "deployment_id": - "description": |- - PLACEHOLDER - "mode": - "description": |- - PLACEHOLDER - "source_code_path": - "description": |- - PLACEHOLDER - "status": - "description": |- - PLACEHOLDER - "update_time": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts: - "source_code_path": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: - "message": - "description": |- - PLACEHOLDER - "state": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResource: - "description": - "description": |- - PLACEHOLDER - "job": - "description": |- - PLACEHOLDER - "name": - "description": |- - PLACEHOLDER - "secret": - "description": |- - PLACEHOLDER - "serving_endpoint": - "description": |- - PLACEHOLDER - "sql_warehouse": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: - "id": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret: - "key": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER - "scope": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint: - "name": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse: - "id": - "description": |- - PLACEHOLDER - "permission": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus: - "message": - "description": |- - PLACEHOLDER - "state": - "description": |- - PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus: - "message": - "description": |- - PLACEHOLDER - "state": - "description": |- - PLACEHOLDER + The email of an active workspace user. Non-admin users can only set this field to their own email. diff --git a/bundle/internal/schema/annotations_openapi.yml b/bundle/internal/schema/annotations_openapi.yml index e9c893c87..669ecb9ed 100644 --- a/bundle/internal/schema/annotations_openapi.yml +++ b/bundle/internal/schema/annotations_openapi.yml @@ -1,4 +1,50 @@ # This file is auto-generated. DO NOT EDIT. +github.com/databricks/cli/bundle/config/resources.App: + "active_deployment": + "description": |- + The active deployment of the app. A deployment is considered active when it has been deployed + to the app compute. + "app_status": {} + "compute_status": {} + "create_time": + "description": |- + The creation time of the app. Formatted timestamp in ISO 6801. + "creator": + "description": |- + The email of the user that created the app. + "default_source_code_path": + "description": |- + The default workspace file system path of the source code from which app deployment are + created. This field tracks the workspace source code path of the last active deployment. + "description": + "description": |- + The description of the app. + "id": + "description": |- + The unique identifier of the app. + "name": + "description": |- + The name of the app. The name must contain only lowercase alphanumeric characters and hyphens. + It must be unique within the workspace. + "pending_deployment": + "description": |- + The pending deployment of the app. A deployment is considered pending when it is being prepared + for deployment to the app compute. + "resources": + "description": |- + Resources for the app. + "service_principal_client_id": {} + "service_principal_id": {} + "service_principal_name": {} + "update_time": + "description": |- + The update time of the app. Formatted timestamp in ISO 6801. + "updater": + "description": |- + The email of the user that last updated the app. + "url": + "description": |- + The URL of the app once it is deployed. github.com/databricks/cli/bundle/config/resources.Cluster: "apply_policy_default_values": "description": |- @@ -24,7 +70,7 @@ github.com/databricks/cli/bundle/config/resources.Cluster: "cluster_log_conf": "description": |- The configuration for delivering spark logs to a long-term storage destination. - Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified + Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. @@ -220,6 +266,7 @@ github.com/databricks/cli/bundle/config/resources.Job: "job_clusters": "description": |- A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. + If more than 100 job clusters are available, you can paginate through them using :method:jobs/get. "max_concurrent_runs": "description": |- An optional maximum allowed number of concurrent runs of the job. @@ -237,6 +284,9 @@ github.com/databricks/cli/bundle/config/resources.Job: "parameters": "description": |- Job-level parameter definitions + "performance_target": + "description": |- + PerformanceTarget defines how performant or cost efficient the execution of run on serverless should be. "queue": "description": |- The queue settings of the job. @@ -250,6 +300,7 @@ github.com/databricks/cli/bundle/config/resources.Job: "tasks": "description": |- A list of task specifications to be executed by this job. + If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. "timeout_seconds": "description": |- An optional timeout applied to each run of this job. A value of `0` means no timeout. @@ -308,12 +359,12 @@ github.com/databricks/cli/bundle/config/resources.MlflowModel: github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: "ai_gateway": "description": |- - The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. + The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned throughput endpoints are currently supported. "config": "description": |- The core config of the serving endpoint. "name": - "description": | + "description": |- The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. "rate_limits": @@ -326,6 +377,9 @@ github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: "description": |- Tags to be attached to the serving endpoint and automatically propagated to billing logs. github.com/databricks/cli/bundle/config/resources.Pipeline: + "allow_duplicate_names": + "description": |- + If false, deployment will fail if name conflicts with that of another pipeline. "budget_policy_id": "description": |- Budget policy of this pipeline. @@ -350,6 +404,7 @@ github.com/databricks/cli/bundle/config/resources.Pipeline: "development": "description": |- Whether the pipeline is in Development mode. Defaults to false. + "dry_run": {} "edition": "description": |- Pipeline product edition. @@ -380,6 +435,7 @@ github.com/databricks/cli/bundle/config/resources.Pipeline: "restart_window": "description": |- Restart window of this pipeline. + "run_as": {} "schema": "description": |- The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. @@ -489,6 +545,187 @@ github.com/databricks/cli/bundle/config/resources.Volume: "description": |- The storage location on the cloud "volume_type": {} +github.com/databricks/databricks-sdk-go/service/apps.AppDeployment: + "create_time": + "description": |- + The creation time of the deployment. Formatted timestamp in ISO 6801. + "creator": + "description": |- + The email of the user creates the deployment. + "deployment_artifacts": + "description": |- + The deployment artifacts for an app. + "deployment_id": + "description": |- + The unique id of the deployment. + "mode": + "description": |- + The mode of which the deployment will manage the source code. + "source_code_path": + "description": |- + The workspace file system path of the source code used to create the app deployment. This is different from + `deployment_artifacts.source_code_path`, which is the path used by the deployed app. The former refers + to the original source code location of the app in the workspace during deployment creation, whereas + the latter provides a system generated stable snapshotted source code path used by the deployment. + "status": + "description": |- + Status and status message of the deployment + "update_time": + "description": |- + The update time of the deployment. Formatted timestamp in ISO 6801. +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts: + "source_code_path": + "description": |- + The snapshotted workspace file system path of the source code loaded by the deployed app. +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentMode: + "_": + "enum": + - |- + SNAPSHOT + - |- + AUTO_SYNC +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentState: + "_": + "enum": + - |- + SUCCEEDED + - |- + FAILED + - |- + IN_PROGRESS + - |- + CANCELLED +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: + "message": + "description": |- + Message corresponding with the deployment state. + "state": + "description": |- + State of the deployment. +github.com/databricks/databricks-sdk-go/service/apps.AppResource: + "description": + "description": |- + Description of the App Resource. + "job": {} + "name": + "description": |- + Name of the App Resource. + "secret": {} + "serving_endpoint": {} + "sql_warehouse": {} +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: + "id": + "description": |- + Id of the job to grant permission on. + "permission": + "description": |- + Permissions to grant on the Job. Supported permissions are: "CAN_MANAGE", "IS_OWNER", "CAN_MANAGE_RUN", "CAN_VIEW". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJobJobPermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + IS_OWNER + - |- + CAN_MANAGE_RUN + - |- + CAN_VIEW +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret: + "key": + "description": |- + Key of the secret to grant permission on. + "permission": + "description": |- + Permission to grant on the secret scope. For secrets, only one permission is allowed. Permission must be one of: "READ", "WRITE", "MANAGE". + "scope": + "description": |- + Scope of the secret to grant permission on. +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecretSecretPermission: + "_": + "description": |- + Permission to grant on the secret scope. Supported permissions are: "READ", "WRITE", "MANAGE". + "enum": + - |- + READ + - |- + WRITE + - |- + MANAGE +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint: + "name": + "description": |- + Name of the serving endpoint to grant permission on. + "permission": + "description": |- + Permission to grant on the serving endpoint. Supported permissions are: "CAN_MANAGE", "CAN_QUERY", "CAN_VIEW". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpointServingEndpointPermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_QUERY + - |- + CAN_VIEW +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse: + "id": + "description": |- + Id of the SQL warehouse to grant permission on. + "permission": + "description": |- + Permission to grant on the SQL warehouse. Supported permissions are: "CAN_MANAGE", "CAN_USE", "IS_OWNER". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouseSqlWarehousePermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_USE + - |- + IS_OWNER +github.com/databricks/databricks-sdk-go/service/apps.ApplicationState: + "_": + "enum": + - |- + DEPLOYING + - |- + RUNNING + - |- + CRASHED + - |- + UNAVAILABLE +github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus: + "message": + "description": |- + Application status message + "state": + "description": |- + State of the application. +github.com/databricks/databricks-sdk-go/service/apps.ComputeState: + "_": + "enum": + - |- + ERROR + - |- + DELETING + - |- + STARTING + - |- + STOPPING + - |- + UPDATING + - |- + STOPPED + - |- + ACTIVE +github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus: + "message": + "description": |- + Compute status message + "state": + "description": |- + State of the app compute. github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedule: "pause_status": "description": |- @@ -775,6 +1012,10 @@ github.com/databricks/databricks-sdk-go/service/compute.ClusterLogConf: `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. + "volumes": + "description": |- + destination needs to be provided. e.g. + `{ "volumes" : { "destination" : "/Volumes/catalog/schema/volume/cluster_log" } }` github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec: "apply_policy_default_values": "description": |- @@ -800,7 +1041,7 @@ github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec: "cluster_log_conf": "description": |- The configuration for delivering spark logs to a long-term storage destination. - Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified + Three kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. @@ -1194,7 +1435,7 @@ github.com/databricks/databricks-sdk-go/service/compute.S3StorageInfo: github.com/databricks/databricks-sdk-go/service/compute.VolumesStorageInfo: "destination": "description": |- - Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh` + Unity Catalog volumes file destination, e.g. `/Volumes/catalog/schema/volume/dir/file` github.com/databricks/databricks-sdk-go/service/compute.WorkloadType: "clients": "description": |2- @@ -1587,6 +1828,17 @@ github.com/databricks/databricks-sdk-go/service/jobs.PauseStatus: UNPAUSED - |- PAUSED +github.com/databricks/databricks-sdk-go/service/jobs.PerformanceTarget: + "_": + "description": |- + PerformanceTarget defines how performant (lower latency) or cost efficient the execution of run on serverless compute should be. + The performance mode on the job or pipeline should map to a performance setting that is passed to Cluster Manager + (see cluster-common PerformanceTarget). + "enum": + - |- + PERFORMANCE_OPTIMIZED + - |- + COST_OPTIMIZED github.com/databricks/databricks-sdk-go/service/jobs.PeriodicTriggerConfiguration: "interval": "description": |- @@ -1748,6 +2000,9 @@ github.com/databricks/databricks-sdk-go/service/jobs.SparkJarTask: Parameters passed to the main method. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + "run_as_repl": + "description": |- + Deprecated. A value of `false` is no longer supported. github.com/databricks/databricks-sdk-go/service/jobs.SparkPythonTask: "parameters": "description": |- @@ -2116,6 +2371,26 @@ github.com/databricks/databricks-sdk-go/service/ml.ModelVersionTag: github.com/databricks/databricks-sdk-go/service/pipelines.CronTrigger: "quartz_cron_schedule": {} "timezone_id": {} +github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek: + "_": + "description": |- + Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). + If not specified all days of the week will be used. + "enum": + - |- + MONDAY + - |- + TUESDAY + - |- + WEDNESDAY + - |- + THURSDAY + - |- + FRIDAY + - |- + SATURDAY + - |- + SUNDAY github.com/databricks/databricks-sdk-go/service/pipelines.DeploymentKind: "_": "description": | @@ -2375,26 +2650,18 @@ github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindow: "description": |- Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. -github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek: +github.com/databricks/databricks-sdk-go/service/pipelines.RunAs: "_": "description": |- - Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). - If not specified all days of the week will be used. - "enum": - - |- - MONDAY - - |- - TUESDAY - - |- - WEDNESDAY - - |- - THURSDAY - - |- - FRIDAY - - |- - SATURDAY - - |- - SUNDAY + Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline. + + Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. + "service_principal_name": + "description": |- + Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + "user_name": + "description": |- + The email of an active workspace user. Users can only set this field to their own email. github.com/databricks/databricks-sdk-go/service/pipelines.SchemaSpec: "destination_catalog": "description": |- @@ -2458,27 +2725,36 @@ github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfigScd github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig: "ai21labs_api_key": "description": |- - The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. + The Databricks secret key reference for an AI21 Labs API key. If you + prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. + You must provide an API key using one of the following fields: + `ai21labs_api_key` or `ai21labs_api_key_plaintext`. "ai21labs_api_key_plaintext": "description": |- - An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. + An AI21 Labs API key provided as a plaintext string. If you prefer to + reference your key using Databricks Secrets, see `ai21labs_api_key`. You + must provide an API key using one of the following fields: + `ai21labs_api_key` or `ai21labs_api_key_plaintext`. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayConfig: "guardrails": "description": |- Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. "inference_table_config": "description": |- - Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. + Configuration for payload logging using inference tables. + Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. "rate_limits": "description": |- Configuration for rate limits which can be set to limit endpoint traffic. "usage_tracking_config": "description": |- - Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. + Configuration to enable usage tracking using system tables. + These tables allow you to monitor operational usage on endpoints and their associated costs. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailParameters: "invalid_keywords": "description": |- - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. + List of invalid keywords. + AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. "pii": "description": |- Configuration for guardrail PII filter. @@ -2487,15 +2763,14 @@ github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailParame Indicates whether the safety filter is enabled. "valid_topics": "description": |- - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. + The list of allowed topics. + Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailPiiBehavior: "behavior": "description": |- - Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. + Configuration for input guardrail filters. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailPiiBehaviorBehavior: "_": - "description": |- - Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. "enum": - |- NONE @@ -2511,30 +2786,32 @@ github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrails: github.com/databricks/databricks-sdk-go/service/serving.AiGatewayInferenceTableConfig: "catalog_name": "description": |- - The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. + The name of the catalog in Unity Catalog. Required when enabling inference tables. + NOTE: On update, you have to disable inference table first in order to change the catalog name. "enabled": "description": |- Indicates whether the inference table is enabled. "schema_name": "description": |- - The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. + The name of the schema in Unity Catalog. Required when enabling inference tables. + NOTE: On update, you have to disable inference table first in order to change the schema name. "table_name_prefix": "description": |- - The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. + The prefix of the table in Unity Catalog. + NOTE: On update, you have to disable inference table first in order to change the prefix name. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimit: "calls": "description": |- Used to specify how many calls are allowed for a key within the renewal_period. "key": "description": |- - Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified. + Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, + with 'endpoint' being the default if not specified. "renewal_period": "description": |- Renewal period field for a rate limit. Currently, only 'minute' is supported. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitKey: "_": - "description": |- - Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified. "enum": - |- user @@ -2542,8 +2819,6 @@ github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitKey: endpoint github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitRenewalPeriod: "_": - "description": |- - Renewal period field for a rate limit. Currently, only 'minute' is supported. "enum": - |- minute @@ -2554,26 +2829,43 @@ github.com/databricks/databricks-sdk-go/service/serving.AiGatewayUsageTrackingCo github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfig: "aws_access_key_id": "description": |- - The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. + The Databricks secret key reference for an AWS access key ID with + permissions to interact with Bedrock services. If you prefer to paste + your API key directly, see `aws_access_key_id_plaintext`. You must provide an API + key using one of the following fields: `aws_access_key_id` or + `aws_access_key_id_plaintext`. "aws_access_key_id_plaintext": "description": |- - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. + An AWS access key ID with permissions to interact with Bedrock services + provided as a plaintext string. If you prefer to reference your key using + Databricks Secrets, see `aws_access_key_id`. You must provide an API key + using one of the following fields: `aws_access_key_id` or + `aws_access_key_id_plaintext`. "aws_region": "description": |- The AWS region to use. Bedrock has to be enabled there. "aws_secret_access_key": "description": |- - The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. + The Databricks secret key reference for an AWS secret access key paired + with the access key ID, with permissions to interact with Bedrock + services. If you prefer to paste your API key directly, see + `aws_secret_access_key_plaintext`. You must provide an API key using one + of the following fields: `aws_secret_access_key` or + `aws_secret_access_key_plaintext`. "aws_secret_access_key_plaintext": "description": |- - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. + An AWS secret access key paired with the access key ID, with permissions + to interact with Bedrock services provided as a plaintext string. If you + prefer to reference your key using Databricks Secrets, see + `aws_secret_access_key`. You must provide an API key using one of the + following fields: `aws_secret_access_key` or + `aws_secret_access_key_plaintext`. "bedrock_provider": "description": |- - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. + The underlying provider in Amazon Bedrock. Supported values (case + insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfigBedrockProvider: "_": - "description": |- - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. "enum": - |- anthropic @@ -2586,10 +2878,16 @@ github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfigBedro github.com/databricks/databricks-sdk-go/service/serving.AnthropicConfig: "anthropic_api_key": "description": |- - The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. + The Databricks secret key reference for an Anthropic API key. If you + prefer to paste your API key directly, see `anthropic_api_key_plaintext`. + You must provide an API key using one of the following fields: + `anthropic_api_key` or `anthropic_api_key_plaintext`. "anthropic_api_key_plaintext": "description": |- - The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. + The Anthropic API key provided as a plaintext string. If you prefer to + reference your key using Databricks Secrets, see `anthropic_api_key`. You + must provide an API key using one of the following fields: + `anthropic_api_key` or `anthropic_api_key_plaintext`. github.com/databricks/databricks-sdk-go/service/serving.AutoCaptureConfigInput: "catalog_name": "description": |- @@ -2605,42 +2903,58 @@ github.com/databricks/databricks-sdk-go/service/serving.AutoCaptureConfigInput: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. github.com/databricks/databricks-sdk-go/service/serving.CohereConfig: "cohere_api_base": - "description": "This is an optional field to provide a customized base URL for the Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n" + "description": |- + This is an optional field to provide a customized base URL for the Cohere + API. If left unspecified, the standard Cohere base URL is used. "cohere_api_key": "description": |- - The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. + The Databricks secret key reference for a Cohere API key. If you prefer + to paste your API key directly, see `cohere_api_key_plaintext`. You must + provide an API key using one of the following fields: `cohere_api_key` or + `cohere_api_key_plaintext`. "cohere_api_key_plaintext": "description": |- - The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. + The Cohere API key provided as a plaintext string. If you prefer to + reference your key using Databricks Secrets, see `cohere_api_key`. You + must provide an API key using one of the following fields: + `cohere_api_key` or `cohere_api_key_plaintext`. github.com/databricks/databricks-sdk-go/service/serving.DatabricksModelServingConfig: "databricks_api_token": - "description": | - The Databricks secret key reference for a Databricks API token that corresponds to a user or service - principal with Can Query access to the model serving endpoint pointed to by this external model. - If you prefer to paste your API key directly, see `databricks_api_token_plaintext`. - You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. + "description": |- + The Databricks secret key reference for a Databricks API token that + corresponds to a user or service principal with Can Query access to the + model serving endpoint pointed to by this external model. If you prefer + to paste your API key directly, see `databricks_api_token_plaintext`. You + must provide an API key using one of the following fields: + `databricks_api_token` or `databricks_api_token_plaintext`. "databricks_api_token_plaintext": - "description": | - The Databricks API token that corresponds to a user or service - principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string. - If you prefer to reference your key using Databricks Secrets, see `databricks_api_token`. - You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. + "description": |- + The Databricks API token that corresponds to a user or service principal + with Can Query access to the model serving endpoint pointed to by this + external model provided as a plaintext string. If you prefer to reference + your key using Databricks Secrets, see `databricks_api_token`. You must + provide an API key using one of the following fields: + `databricks_api_token` or `databricks_api_token_plaintext`. "databricks_workspace_url": - "description": | - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model. + "description": |- + The URL of the Databricks workspace containing the model serving endpoint + pointed to by this external model. github.com/databricks/databricks-sdk-go/service/serving.EndpointCoreConfigInput: "auto_capture_config": "description": |- Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. + Note: this field is deprecated for creating new provisioned throughput endpoints, + or updating existing provisioned throughput endpoints that never have inference table configured; + in these cases please use AI Gateway to manage inference tables. "served_entities": "description": |- - A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities. + The list of served entities under the serving endpoint config. "served_models": "description": |- - (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models. + (Deprecated, use served_entities instead) The list of served models under the serving endpoint config. "traffic_config": "description": |- - The traffic config defining how invocations to the serving endpoint should be routed. + The traffic configuration associated with the serving endpoint config. github.com/databricks/databricks-sdk-go/service/serving.EndpointTag: "key": "description": |- @@ -2677,17 +2991,13 @@ github.com/databricks/databricks-sdk-go/service/serving.ExternalModel: "description": |- PaLM Config. Only required if the provider is 'palm'. "provider": - "description": | - The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', - 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.", + "description": |- + The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', 'palm', and 'custom'. "task": "description": |- The task type of the external model. github.com/databricks/databricks-sdk-go/service/serving.ExternalModelProvider: "_": - "description": | - The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', - 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.", "enum": - |- ai21labs @@ -2708,70 +3018,114 @@ github.com/databricks/databricks-sdk-go/service/serving.ExternalModelProvider: github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig: "private_key": "description": |- - The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext` + The Databricks secret key reference for a private key for the service + account which has access to the Google Cloud Vertex AI Service. See [Best + practices for managing service account keys]. If you prefer to paste your + API key directly, see `private_key_plaintext`. You must provide an API + key using one of the following fields: `private_key` or + `private_key_plaintext` + + [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys "private_key_plaintext": "description": |- - The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`. + The private key for the service account which has access to the Google + Cloud Vertex AI Service provided as a plaintext secret. See [Best + practices for managing service account keys]. If you prefer to reference + your key using Databricks Secrets, see `private_key`. You must provide an + API key using one of the following fields: `private_key` or + `private_key_plaintext`. + + [Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys "project_id": "description": |- - This is the Google Cloud project id that the service account is associated with. + This is the Google Cloud project id that the service account is + associated with. "region": "description": |- - This is the region for the Google Cloud Vertex AI Service. See [supported regions](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. Some models are only available in specific regions. + This is the region for the Google Cloud Vertex AI Service. See [supported + regions] for more details. Some models are only available in specific + regions. + + [supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations github.com/databricks/databricks-sdk-go/service/serving.OpenAiConfig: + "_": + "description": |- + Configs needed to create an OpenAI model route. "microsoft_entra_client_id": - "description": | - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID. + "description": |- + This field is only required for Azure AD OpenAI and is the Microsoft + Entra Client ID. "microsoft_entra_client_secret": - "description": | - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication. - If you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`. - You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. + "description": |- + The Databricks secret key reference for a client secret used for + Microsoft Entra ID authentication. If you prefer to paste your client + secret directly, see `microsoft_entra_client_secret_plaintext`. You must + provide an API key using one of the following fields: + `microsoft_entra_client_secret` or + `microsoft_entra_client_secret_plaintext`. "microsoft_entra_client_secret_plaintext": - "description": | - The client secret used for Microsoft Entra ID authentication provided as a plaintext string. - If you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`. - You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. + "description": |- + The client secret used for Microsoft Entra ID authentication provided as + a plaintext string. If you prefer to reference your key using Databricks + Secrets, see `microsoft_entra_client_secret`. You must provide an API key + using one of the following fields: `microsoft_entra_client_secret` or + `microsoft_entra_client_secret_plaintext`. "microsoft_entra_tenant_id": - "description": | - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID. + "description": |- + This field is only required for Azure AD OpenAI and is the Microsoft + Entra Tenant ID. "openai_api_base": - "description": | - This is a field to provide a customized base URl for the OpenAI API. - For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service - provided by Azure. - For other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used. + "description": |- + This is a field to provide a customized base URl for the OpenAI API. For + Azure OpenAI, this field is required, and is the base URL for the Azure + OpenAI API service provided by Azure. For other OpenAI API types, this + field is optional, and if left unspecified, the standard OpenAI base URL + is used. "openai_api_key": "description": |- - The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. + The Databricks secret key reference for an OpenAI API key using the + OpenAI or Azure service. If you prefer to paste your API key directly, + see `openai_api_key_plaintext`. You must provide an API key using one of + the following fields: `openai_api_key` or `openai_api_key_plaintext`. "openai_api_key_plaintext": "description": |- - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. + The OpenAI API key using the OpenAI or Azure service provided as a + plaintext string. If you prefer to reference your key using Databricks + Secrets, see `openai_api_key`. You must provide an API key using one of + the following fields: `openai_api_key` or `openai_api_key_plaintext`. "openai_api_type": - "description": | - This is an optional field to specify the type of OpenAI API to use. - For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security - access validation protocol. For access token validation, use azure. For authentication using Azure Active + "description": |- + This is an optional field to specify the type of OpenAI API to use. For + Azure OpenAI, this field is required, and adjust this parameter to + represent the preferred security access validation protocol. For access + token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread. "openai_api_version": - "description": | - This is an optional field to specify the OpenAI API version. - For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to - utilize, specified by a date. + "description": |- + This is an optional field to specify the OpenAI API version. For Azure + OpenAI, this field is required, and is the version of the Azure OpenAI + service to utilize, specified by a date. "openai_deployment_name": - "description": | - This field is only required for Azure OpenAI and is the name of the deployment resource for the - Azure OpenAI service. + "description": |- + This field is only required for Azure OpenAI and is the name of the + deployment resource for the Azure OpenAI service. "openai_organization": - "description": | - This is an optional field to specify the organization in OpenAI or Azure OpenAI. + "description": |- + This is an optional field to specify the organization in OpenAI or Azure + OpenAI. github.com/databricks/databricks-sdk-go/service/serving.PaLmConfig: "palm_api_key": "description": |- - The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. + The Databricks secret key reference for a PaLM API key. If you prefer to + paste your API key directly, see `palm_api_key_plaintext`. You must + provide an API key using one of the following fields: `palm_api_key` or + `palm_api_key_plaintext`. "palm_api_key_plaintext": "description": |- - The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. + The PaLM API key provided as a plaintext string. If you prefer to + reference your key using Databricks Secrets, see `palm_api_key`. You must + provide an API key using one of the following fields: `palm_api_key` or + `palm_api_key_plaintext`. github.com/databricks/databricks-sdk-go/service/serving.RateLimit: "calls": "description": |- @@ -2784,8 +3138,6 @@ github.com/databricks/databricks-sdk-go/service/serving.RateLimit: Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported. github.com/databricks/databricks-sdk-go/service/serving.RateLimitKey: "_": - "description": |- - Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified. "enum": - |- user @@ -2793,8 +3145,6 @@ github.com/databricks/databricks-sdk-go/service/serving.RateLimitKey: endpoint github.com/databricks/databricks-sdk-go/service/serving.RateLimitRenewalPeriod: "_": - "description": |- - Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported. "enum": - |- minute @@ -2807,21 +3157,15 @@ github.com/databricks/databricks-sdk-go/service/serving.Route: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive. github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput: "entity_name": - "description": | - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), - or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of - __catalog_name__.__schema_name__.__model_name__. - "entity_version": "description": |- - The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC. + The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of **catalog_name.schema_name.model_name**. + "entity_version": {} "environment_vars": - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity.\nNote: this is an experimental feature and subject to change. \nExample entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`" + "description": |- + An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` "external_model": - "description": | - The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) - can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, - it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. - The task type of all external models within an endpoint must be the same. + "description": |- + The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. "instance_profile_arn": "description": |- ARN of the instance profile that the served entity uses to access AWS resources. @@ -2832,68 +3176,46 @@ github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput: "description": |- The minimum tokens per second that the endpoint can scale down to. "name": - "description": | - The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. - If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other - entities, it defaults to -. + "description": |- + The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. "scale_to_zero_enabled": "description": |- Whether the compute resources for the served entity should scale down to zero. "workload_size": - "description": | - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. - A single unit of provisioned concurrency can process one request at a time. - Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). - If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. + "description": |- + The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. "workload_type": - "description": | - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is - "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. - See the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). + "description": |- + The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput: "environment_vars": - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this model.\nNote: this is an experimental feature and subject to change. \nExample model environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`" + "description": |- + An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}` "instance_profile_arn": "description": |- - ARN of the instance profile that the served model will use to access AWS resources. + ARN of the instance profile that the served entity uses to access AWS resources. "max_provisioned_throughput": "description": |- The maximum tokens per second that the endpoint can scale up to. "min_provisioned_throughput": "description": |- The minimum tokens per second that the endpoint can scale down to. - "model_name": - "description": | - The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model, - in the form of __catalog_name__.__schema_name__.__model_name__. - "model_version": - "description": |- - The version of the model in Databricks Model Registry or Unity Catalog to be served. + "model_name": {} + "model_version": {} "name": - "description": | - The name of a served model. It must be unique across an endpoint. If not specified, this field will default to -. - A served model name can consist of alphanumeric characters, dashes, and underscores. + "description": |- + The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version. "scale_to_zero_enabled": "description": |- - Whether the compute resources for the served model should scale down to zero. + Whether the compute resources for the served entity should scale down to zero. "workload_size": - "description": | - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. - A single unit of provisioned concurrency can process one request at a time. - Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). - If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. + "description": |- + The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. "workload_type": - "description": | - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is - "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. - See the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). + "description": |- + The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadSize: "_": - "description": | - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. - A single unit of provisioned concurrency can process one request at a time. - Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). - If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. "enum": - |- Small @@ -2903,17 +3225,26 @@ github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkload Large github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadType: "_": - "description": | - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is - "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. - See the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). "enum": - |- CPU + - |- + GPU_MEDIUM - |- GPU_SMALL + - |- + GPU_LARGE + - |- + MULTIGPU_MEDIUM +github.com/databricks/databricks-sdk-go/service/serving.ServingModelWorkloadType: + "_": + "enum": + - |- + CPU - |- GPU_MEDIUM + - |- + GPU_SMALL - |- GPU_LARGE - |- diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index ef602d6ef..be83af2d1 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -1,4 +1,91 @@ +github.com/databricks/cli/bundle/config/resources.App: + "active_deployment": + "description": |- + PLACEHOLDER + "app_status": + "description": |- + PLACEHOLDER + "compute_status": + "description": |- + PLACEHOLDER + "config": + "description": |- + PLACEHOLDER + "create_time": + "description": |- + PLACEHOLDER + "creator": + "description": |- + PLACEHOLDER + "default_source_code_path": + "description": |- + PLACEHOLDER + "description": + "description": |- + PLACEHOLDER + "name": + "description": |- + PLACEHOLDER + "pending_deployment": + "description": |- + PLACEHOLDER + "permissions": + "description": |- + PLACEHOLDER + "resources": + "description": |- + PLACEHOLDER + "service_principal_client_id": + "description": |- + PLACEHOLDER + "service_principal_id": + "description": |- + PLACEHOLDER + "service_principal_name": + "description": |- + PLACEHOLDER + "source_code_path": + "description": |- + PLACEHOLDER + "update_time": + "description": |- + PLACEHOLDER + "updater": + "description": |- + PLACEHOLDER + "url": + "description": |- + PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Cluster: + "_": + "markdown_description": |- + The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). + "markdown_examples": |- + The following example creates a cluster named `my_cluster` and sets that as the cluster to use to run the notebook in `my_job`: + + ```yaml + bundle: + name: clusters + + resources: + clusters: + my_cluster: + num_workers: 2 + node_type_id: "i3.xlarge" + autoscale: + min_workers: 2 + max_workers: 7 + spark_version: "13.3.x-scala2.12" + spark_conf: + "spark.executor.memory": "2g" + + jobs: + my_job: + tasks: + - task_key: test_task + notebook_task: + notebook_path: "./src/my_notebook.py" + ``` "data_security_mode": "description": |- PLACEHOLDER @@ -18,6 +105,23 @@ github.com/databricks/cli/bundle/config/resources.Cluster: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Dashboard: + "_": + "markdown_description": |- + The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](/dashboards/index.md). + "markdown_examples": |- + The following example includes and deploys the sample __NYC Taxi Trip Analysis__ dashboard to the Databricks workspace. + + ``` yaml + resources: + dashboards: + nyc_taxi_trip_analysis: + display_name: "NYC Taxi Trip Analysis" + file_path: ../src/nyc_taxi_trip_analysis.lvdash.json + warehouse_id: ${var.warehouse_id} + ``` + If you use the UI to modify the dashboard, modifications made through the UI are not applied to the dashboard JSON file in the local bundle unless you explicitly update it using `bundle generate`. You can use the `--watch` option to continuously poll and retrieve changes to the dashboard. See [_](/dev-tools/cli/bundle-commands.md#generate). + + In addition, if you attempt to deploy a bundle that contains a dashboard JSON file that is different than the one in the remote workspace, an error will occur. To force the deploy and overwrite the dashboard in the remote workspace with the local one, use the `--force` option. See [_](/dev-tools/cli/bundle-commands.md#deploy). "embed_credentials": "description": |- PLACEHOLDER @@ -28,6 +132,24 @@ github.com/databricks/cli/bundle/config/resources.Dashboard: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Job: + "_": + "markdown_description": |- + The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a template to create a job, see [_](/dev-tools/bundles/jobs-tutorial.md). + "markdown_examples": |- + The following example defines a job with the resource key `hello-job` with one notebook task: + + ```yaml + resources: + jobs: + hello-job: + name: hello-job + tasks: + - task_key: hello-task + notebook_task: + notebook_path: ./hello.py + ``` + + For information about defining job tasks and overriding job settings, see [_](/dev-tools/bundles/job-task-types.md), [_](/dev-tools/bundles/job-task-override.md), and [_](/dev-tools/bundles/cluster-override.md). "health": "description": |- PLACEHOLDER @@ -38,30 +160,192 @@ github.com/databricks/cli/bundle/config/resources.Job: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.MlflowExperiment: + "_": + "markdown_description": |- + The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](/mlflow/experiments.md). + "markdown_examples": |- + The following example defines an experiment that all users can view: + + ```yaml + resources: + experiments: + experiment: + name: my_ml_experiment + permissions: + - level: CAN_READ + group_name: users + description: MLflow experiment used to track runs + ``` "permissions": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.MlflowModel: + "_": + "markdown_description": |- + The model resource allows you to define [legacy models](/api/workspace/modelregistry/createmodel) in bundles. Databricks recommends you use [registered models](#registered-model) instead. "permissions": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: + "_": + "markdown_description": |- + The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](/machine-learning/model-serving/manage-serving-endpoints.md). + "markdown_examples": |- + The following example defines a model serving endpoint: + + ```yaml + resources: + model_serving_endpoints: + uc_model_serving_endpoint: + name: "uc-model-endpoint" + config: + served_entities: + - entity_name: "myCatalog.mySchema.my-ads-model" + entity_version: "10" + workload_size: "Small" + scale_to_zero_enabled: "true" + traffic_config: + routes: + - served_model_name: "my-ads-model-10" + traffic_percentage: "100" + tags: + - key: "team" + value: "data science" + ``` "permissions": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Pipeline: + "_": + "markdown_description": |- + The pipeline resource allows you to create [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/delta-live-tables/index.md). For a tutorial that uses the template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). + "markdown_examples": |- + The following example defines a pipeline with the resource key `hello-pipeline`: + + ```yaml + resources: + pipelines: + hello-pipeline: + name: hello-pipeline + clusters: + - label: default + num_workers: 1 + development: true + continuous: false + channel: CURRENT + edition: CORE + photon: false + libraries: + - notebook: + path: ./pipeline.py + ``` + "dry_run": + "description": |- + PLACEHOLDER "permissions": "description": |- PLACEHOLDER + "run_as": + "description": |- + PLACEHOLDER github.com/databricks/cli/bundle/config/resources.QualityMonitor: + "_": + "markdown_description": |- + The quality_monitor resource allows you to define a [table monitor](/api/workspace/qualitymonitors/create). For information about monitors, see [_](/machine-learning/model-serving/monitor-diagnose-endpoints.md). + "markdown_examples": |- + The following example defines a quality monitor: + + ```yaml + resources: + quality_monitors: + my_quality_monitor: + table_name: dev.mlops_schema.predictions + output_schema_name: ${bundle.target}.mlops_schema + assets_dir: /Users/${workspace.current_user.userName}/databricks_lakehouse_monitoring + inference_log: + granularities: [1 day] + model_id_col: model_id + prediction_col: prediction + label_col: price + problem_type: PROBLEM_TYPE_REGRESSION + timestamp_col: timestamp + schedule: + quartz_cron_expression: 0 0 8 * * ? # Run Every day at 8am + timezone_id: UTC + ``` "table_name": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.RegisteredModel: + "_": + "markdown_description": |- + The registered model resource allows you to define models in . For information about [registered models](/api/workspace/registeredmodels/create), see [_](/machine-learning/manage-model-lifecycle/index.md). + "markdown_examples": |- + The following example defines a registered model in : + + ```yaml + resources: + registered_models: + model: + name: my_model + catalog_name: ${bundle.target} + schema_name: mlops_schema + comment: Registered model in Unity Catalog for ${bundle.target} deployment target + grants: + - privileges: + - EXECUTE + principal: account users + ``` "grants": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Schema: + "_": + "markdown_description": |- + The schema resource type allows you to define [schemas](/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: + + - The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema. + - Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](/api/workspace/schemas/update). + "markdown_examples": |- + The following example defines a pipeline with the resource key `my_pipeline` that creates a schema with the key `my_schema` as the target: + + ```yaml + resources: + pipelines: + my_pipeline: + name: test-pipeline-{{.unique_id}} + libraries: + - notebook: + path: ./nb.sql + development: true + catalog: main + target: ${resources.schemas.my_schema.id} + + schemas: + my_schema: + name: test-schema-{{.unique_id}} + catalog_name: main + comment: This schema was created by DABs. + ``` + + A top-level grants mapping is not supported by , so if you want to set grants for a schema, define the grants for the schema within the `schemas` mapping. For more information about grants, see [_](/data-governance/unity-catalog/manage-privileges/index.md#grant). + + The following example defines a schema with grants: + + ```yaml + resources: + schemas: + my_schema: + name: test-schema + grants: + - principal: users + privileges: + - CAN_MANAGE + - principal: my_team + privileges: + - CAN_READ + catalog_name: main + ``` "grants": "description": |- PLACEHOLDER @@ -69,12 +353,124 @@ github.com/databricks/cli/bundle/config/resources.Schema: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Volume: + "_": + "markdown_description": |- + The volume resource type allows you to define and create [volumes](/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that: + + - A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path` in subsequent deployments. + + - Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development` configured. However, you can manually configure this prefix. See [_](/dev-tools/bundles/deployment-modes.md#custom-presets). + "markdown_examples": |- + The following example creates a volume with the key `my_volume`: + + ```yaml + resources: + volumes: + my_volume: + catalog_name: main + name: my_volume + schema_name: my_schema + ``` + + For an example bundle that runs a job that writes to a file in volume, see the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples/tree/main/knowledge_base/write_from_job_to_volume). "grants": "description": |- PLACEHOLDER "volume_type": "description": |- PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppDeployment: + "create_time": + "description": |- + PLACEHOLDER + "creator": + "description": |- + PLACEHOLDER + "deployment_artifacts": + "description": |- + PLACEHOLDER + "deployment_id": + "description": |- + PLACEHOLDER + "mode": + "description": |- + PLACEHOLDER + "source_code_path": + "description": |- + PLACEHOLDER + "status": + "description": |- + PLACEHOLDER + "update_time": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts: + "source_code_path": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: + "message": + "description": |- + PLACEHOLDER + "state": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResource: + "job": + "description": |- + PLACEHOLDER + "secret": + "description": |- + PLACEHOLDER + "serving_endpoint": + "description": |- + PLACEHOLDER + "sql_warehouse": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: + "id": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret: + "key": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER + "scope": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint: + "name": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse: + "id": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus: + "message": + "description": |- + PLACEHOLDER + "state": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus: + "message": + "description": |- + PLACEHOLDER + "state": {} github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: "availability": "description": |- @@ -159,3 +555,14 @@ github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger: "manual": "description": |- PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput: + "entity_version": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput: + "model_name": + "description": |- + PLACEHOLDER + "model_version": + "description": |- + PLACEHOLDER diff --git a/bundle/internal/schema/annotations_test.go b/bundle/internal/schema/annotations_test.go index d7e2fea7c..0e1593359 100644 --- a/bundle/internal/schema/annotations_test.go +++ b/bundle/internal/schema/annotations_test.go @@ -33,6 +33,10 @@ func TestConvertLinksToAbsoluteUrl(t *testing.T) { input: "This is a link to [external](https://external.com)", expected: "This is a link to [external](https://external.com)", }, + { + input: "This is a link to [one](/relative), [two](/relative-2)", + expected: "This is a link to [one](https://docs.databricks.com/relative), [two](https://docs.databricks.com/relative-2)", + }, } for _, test := range tests { diff --git a/bundle/internal/schema/main.go b/bundle/internal/schema/main.go index 77927a966..2e0120e62 100644 --- a/bundle/internal/schema/main.go +++ b/bundle/internal/schema/main.go @@ -40,6 +40,19 @@ func addInterpolationPatterns(typ reflect.Type, s jsonschema.Schema) jsonschema. } } + // Allows using variables in enum fields + if s.Type == jsonschema.StringType && s.Enum != nil { + return jsonschema.Schema{ + OneOf: []jsonschema.Schema{ + s, + { + Type: jsonschema.StringType, + Pattern: interpolationPattern("var"), + }, + }, + } + } + switch s.Type { case jsonschema.ArrayType, jsonschema.ObjectType: // arrays and objects can have complex variable values specified. @@ -96,6 +109,20 @@ func removeJobsFields(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { return s } +func removePipelineFields(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema { + switch typ { + case reflect.TypeOf(resources.Pipeline{}): + // Even though DABs supports this field, TF provider does not. Thus, we + // should not expose it to the user. + delete(s.Properties, "dry_run") + delete(s.Properties, "allow_duplicate_names") + default: + // Do nothing + } + + return s +} + // While volume_type is required in the volume create API, DABs automatically sets // it's value to "MANAGED" if it's not provided. Thus, we make it optional // in the bundle schema. @@ -155,10 +182,20 @@ func generateSchema(workdir, outputFile string) { // Generate the JSON schema from the bundle Go struct. s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ removeJobsFields, + removePipelineFields, makeVolumeTypeOptional, a.addAnnotations, addInterpolationPatterns, }) + + // AdditionalProperties is set to an empty schema to allow non-typed keys used as yaml-anchors + // Example: + // some_anchor: &some_anchor + // file_path: /some/path/ + // workspace: + // <<: *some_anchor + s.AdditionalProperties = jsonschema.Schema{} + if err != nil { log.Fatal(err) } diff --git a/bundle/internal/schema/main_test.go b/bundle/internal/schema/main_test.go index 06e89c856..620f1cb70 100644 --- a/bundle/internal/schema/main_test.go +++ b/bundle/internal/schema/main_test.go @@ -10,6 +10,7 @@ import ( "testing" "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/internal/annotation" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/merge" "github.com/databricks/cli/libs/dyn/yamlloader" @@ -113,13 +114,33 @@ func TestNoDetachedAnnotations(t *testing.T) { assert.Empty(t, types, "Detached annotations found, regenerate schema and check for package path changes") } -func getAnnotations(path string) (annotationFile, error) { +func getAnnotations(path string) (annotation.File, error) { b, err := os.ReadFile(path) if err != nil { return nil, err } - var data annotationFile + var data annotation.File err = yaml.Unmarshal(b, &data) return data, err } + +func TestNoDuplicatedAnnotations(t *testing.T) { + // Check for duplicated annotations in annotation files + files := []string{ + "annotations_openapi_overrides.yml", + "annotations.yml", + } + + annotations := map[string]bool{} + for _, file := range files { + annotationsFile, err := getAnnotations(file) + assert.NoError(t, err) + for k := range annotationsFile { + if _, ok := annotations[k]; ok { + t.Errorf("Annotation `%s` is duplicated in %s", k, file) + } + annotations[k] = true + } + } +} diff --git a/bundle/internal/schema/parser.go b/bundle/internal/schema/parser.go index 919908429..ca8c27d4c 100644 --- a/bundle/internal/schema/parser.go +++ b/bundle/internal/schema/parser.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "encoding/json" "fmt" "os" @@ -8,8 +9,10 @@ import ( "reflect" "strings" + "github.com/databricks/cli/bundle/internal/annotation" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/dyn/yamlloader" "github.com/databricks/cli/libs/jsonschema" - "gopkg.in/yaml.v3" ) type Components struct { @@ -114,19 +117,23 @@ func mapIncorrectTypNames(ref string) string { // Use the OpenAPI spec to load descriptions for the given type. func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overridesPath string) error { - annotations := annotationFile{} - overrides := annotationFile{} + annotations := annotation.File{} + overrides := annotation.File{} b, err := os.ReadFile(overridesPath) if err != nil { return err } - err = yaml.Unmarshal(b, &overrides) + overridesDyn, err := yamlloader.LoadYAML(overridesPath, bytes.NewBuffer(b)) + if err != nil { + return err + } + err = convert.ToTyped(&overrides, overridesDyn) if err != nil { return err } if overrides == nil { - overrides = annotationFile{} + overrides = annotation.File{} } _, err = jsonschema.FromType(typ, []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{ @@ -137,16 +144,16 @@ func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overrid } basePath := getPath(typ) - pkg := map[string]annotation{} + pkg := map[string]annotation.Descriptor{} annotations[basePath] = pkg if ref.Description != "" || ref.Enum != nil { - pkg[RootTypeKey] = annotation{Description: ref.Description, Enum: ref.Enum} + pkg[RootTypeKey] = annotation.Descriptor{Description: ref.Description, Enum: ref.Enum} } for k := range s.Properties { if refProp, ok := ref.Properties[k]; ok { - pkg[k] = annotation{Description: refProp.Description, Enum: refProp.Enum} + pkg[k] = annotation.Descriptor{Description: refProp.Description, Enum: refProp.Enum} if refProp.Description == "" { addEmptyOverride(k, basePath, overrides) } @@ -195,22 +202,22 @@ func prependCommentToFile(outputPath, comment string) error { return err } -func addEmptyOverride(key, pkg string, overridesFile annotationFile) { +func addEmptyOverride(key, pkg string, overridesFile annotation.File) { if overridesFile[pkg] == nil { - overridesFile[pkg] = map[string]annotation{} + overridesFile[pkg] = map[string]annotation.Descriptor{} } overrides := overridesFile[pkg] if overrides[key].Description == "" { - overrides[key] = annotation{Description: Placeholder} + overrides[key] = annotation.Descriptor{Description: annotation.Placeholder} } a, ok := overrides[key] if !ok { - a = annotation{} + a = annotation.Descriptor{} } if a.Description == "" { - a.Description = Placeholder + a.Description = annotation.Placeholder } overrides[key] = a } diff --git a/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml b/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml deleted file mode 100644 index e8a8866bc..000000000 --- a/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml +++ /dev/null @@ -1 +0,0 @@ -unknown: value diff --git a/bundle/internal/schema/testdata/pass/job.yml b/bundle/internal/schema/testdata/pass/job.yml index e13a52c03..ec447ba39 100644 --- a/bundle/internal/schema/testdata/pass/job.yml +++ b/bundle/internal/schema/testdata/pass/job.yml @@ -13,6 +13,8 @@ variables: simplevar: default: true description: "simplevar description" + schedule_status: + default: "PAUSED" complexvar: default: @@ -42,6 +44,8 @@ resources: dependencies: - python=3.7 client: "myclient" + trigger: + pause_status: ${var.schedule_status} tags: foo: bar bar: baz diff --git a/bundle/internal/schema/testdata/pass/yaml_anchors.yml b/bundle/internal/schema/testdata/pass/yaml_anchors.yml new file mode 100644 index 000000000..18749891d --- /dev/null +++ b/bundle/internal/schema/testdata/pass/yaml_anchors.yml @@ -0,0 +1,11 @@ +tags: &job-tags + environment: "some_environment" + +resources: + jobs: + db1: + tags: + <<: *job-tags + db2: + tags: + <<: *job-tags diff --git a/bundle/internal/tf/codegen/README.md b/bundle/internal/tf/codegen/README.md index b1f8a33a8..968bf29ed 100644 --- a/bundle/internal/tf/codegen/README.md +++ b/bundle/internal/tf/codegen/README.md @@ -19,3 +19,6 @@ How to regenerate Go structs from an updated terraform provider? 2. Delete `./tmp` if it exists 3. Run `go run .` 4. Run `gofmt -s -w ../schema` +5. Go back to the root of the repo. +6. Update `/acceptance/terraform/main.tf` file to use new version of TF provider +7. Run `go test ./acceptance -v -update -run TestAccept/terraform` to update test output with a new version of TF provider diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index 27c4b16cd..46548f3e8 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.62.0" +const ProviderVersion = "1.65.1" diff --git a/bundle/internal/tf/schema/data_source_catalog.go b/bundle/internal/tf/schema/data_source_catalog.go index 6f9237cfa..4b8c6df97 100644 --- a/bundle/internal/tf/schema/data_source_catalog.go +++ b/bundle/internal/tf/schema/data_source_catalog.go @@ -28,7 +28,6 @@ type DataSourceCatalogCatalogInfo struct { Owner string `json:"owner,omitempty"` Properties map[string]string `json:"properties,omitempty"` ProviderName string `json:"provider_name,omitempty"` - SecurableKind string `json:"securable_kind,omitempty"` SecurableType string `json:"securable_type,omitempty"` ShareName string `json:"share_name,omitempty"` StorageLocation string `json:"storage_location,omitempty"` diff --git a/bundle/internal/tf/schema/data_source_serving_endpoints.go b/bundle/internal/tf/schema/data_source_serving_endpoints.go index bdfd778e0..973989216 100644 --- a/bundle/internal/tf/schema/data_source_serving_endpoints.go +++ b/bundle/internal/tf/schema/data_source_serving_endpoints.go @@ -3,7 +3,7 @@ package schema type DataSourceServingEndpointsEndpointsAiGatewayGuardrailsInputPii struct { - Behavior string `json:"behavior"` + Behavior string `json:"behavior,omitempty"` } type DataSourceServingEndpointsEndpointsAiGatewayGuardrailsInput struct { @@ -14,7 +14,7 @@ type DataSourceServingEndpointsEndpointsAiGatewayGuardrailsInput struct { } type DataSourceServingEndpointsEndpointsAiGatewayGuardrailsOutputPii struct { - Behavior string `json:"behavior"` + Behavior string `json:"behavior,omitempty"` } type DataSourceServingEndpointsEndpointsAiGatewayGuardrailsOutput struct { @@ -87,8 +87,8 @@ type DataSourceServingEndpointsEndpointsConfigServedEntitiesExternalModelDatabri type DataSourceServingEndpointsEndpointsConfigServedEntitiesExternalModelGoogleCloudVertexAiConfig struct { PrivateKey string `json:"private_key,omitempty"` PrivateKeyPlaintext string `json:"private_key_plaintext,omitempty"` - ProjectId string `json:"project_id,omitempty"` - Region string `json:"region,omitempty"` + ProjectId string `json:"project_id"` + Region string `json:"region"` } type DataSourceServingEndpointsEndpointsConfigServedEntitiesExternalModelOpenaiConfig struct { diff --git a/bundle/internal/tf/schema/resource_aibi_dashboard_embedding_access_policy_setting.go b/bundle/internal/tf/schema/resource_aibi_dashboard_embedding_access_policy_setting.go new file mode 100644 index 000000000..d816b235d --- /dev/null +++ b/bundle/internal/tf/schema/resource_aibi_dashboard_embedding_access_policy_setting.go @@ -0,0 +1,14 @@ +// Generated from Databricks Terraform provider schema. DO NOT EDIT. + +package schema + +type ResourceAibiDashboardEmbeddingAccessPolicySettingAibiDashboardEmbeddingAccessPolicy struct { + AccessPolicyType string `json:"access_policy_type"` +} + +type ResourceAibiDashboardEmbeddingAccessPolicySetting struct { + Etag string `json:"etag,omitempty"` + Id string `json:"id,omitempty"` + SettingName string `json:"setting_name,omitempty"` + AibiDashboardEmbeddingAccessPolicy *ResourceAibiDashboardEmbeddingAccessPolicySettingAibiDashboardEmbeddingAccessPolicy `json:"aibi_dashboard_embedding_access_policy,omitempty"` +} diff --git a/bundle/internal/tf/schema/resource_aibi_dashboard_embedding_approved_domains_setting.go b/bundle/internal/tf/schema/resource_aibi_dashboard_embedding_approved_domains_setting.go new file mode 100644 index 000000000..690b334cd --- /dev/null +++ b/bundle/internal/tf/schema/resource_aibi_dashboard_embedding_approved_domains_setting.go @@ -0,0 +1,14 @@ +// Generated from Databricks Terraform provider schema. DO NOT EDIT. + +package schema + +type ResourceAibiDashboardEmbeddingApprovedDomainsSettingAibiDashboardEmbeddingApprovedDomains struct { + ApprovedDomains []string `json:"approved_domains"` +} + +type ResourceAibiDashboardEmbeddingApprovedDomainsSetting struct { + Etag string `json:"etag,omitempty"` + Id string `json:"id,omitempty"` + SettingName string `json:"setting_name,omitempty"` + AibiDashboardEmbeddingApprovedDomains *ResourceAibiDashboardEmbeddingApprovedDomainsSettingAibiDashboardEmbeddingApprovedDomains `json:"aibi_dashboard_embedding_approved_domains,omitempty"` +} diff --git a/bundle/internal/tf/schema/resource_app.go b/bundle/internal/tf/schema/resource_app.go index 14c93b793..cbce5ab0e 100644 --- a/bundle/internal/tf/schema/resource_app.go +++ b/bundle/internal/tf/schema/resource_app.go @@ -91,6 +91,7 @@ type ResourceApp struct { DefaultSourceCodePath string `json:"default_source_code_path,omitempty"` Description string `json:"description,omitempty"` Name string `json:"name"` + NoCompute bool `json:"no_compute,omitempty"` PendingDeployment *ResourceAppPendingDeployment `json:"pending_deployment,omitempty"` Resources []ResourceAppResources `json:"resources,omitempty"` ServicePrincipalClientId string `json:"service_principal_client_id,omitempty"` diff --git a/bundle/internal/tf/schema/resource_custom_app_integration.go b/bundle/internal/tf/schema/resource_custom_app_integration.go index e89eb7fe5..0a964f6ab 100644 --- a/bundle/internal/tf/schema/resource_custom_app_integration.go +++ b/bundle/internal/tf/schema/resource_custom_app_integration.go @@ -8,16 +8,17 @@ type ResourceCustomAppIntegrationTokenAccessPolicy struct { } type ResourceCustomAppIntegration struct { - ClientId string `json:"client_id,omitempty"` - ClientSecret string `json:"client_secret,omitempty"` - Confidential bool `json:"confidential,omitempty"` - CreateTime string `json:"create_time,omitempty"` - CreatedBy int `json:"created_by,omitempty"` - CreatorUsername string `json:"creator_username,omitempty"` - Id string `json:"id,omitempty"` - IntegrationId string `json:"integration_id,omitempty"` - Name string `json:"name,omitempty"` - RedirectUrls []string `json:"redirect_urls,omitempty"` - Scopes []string `json:"scopes,omitempty"` - TokenAccessPolicy *ResourceCustomAppIntegrationTokenAccessPolicy `json:"token_access_policy,omitempty"` + ClientId string `json:"client_id,omitempty"` + ClientSecret string `json:"client_secret,omitempty"` + Confidential bool `json:"confidential,omitempty"` + CreateTime string `json:"create_time,omitempty"` + CreatedBy int `json:"created_by,omitempty"` + CreatorUsername string `json:"creator_username,omitempty"` + Id string `json:"id,omitempty"` + IntegrationId string `json:"integration_id,omitempty"` + Name string `json:"name,omitempty"` + RedirectUrls []string `json:"redirect_urls,omitempty"` + Scopes []string `json:"scopes,omitempty"` + UserAuthorizedScopes []string `json:"user_authorized_scopes,omitempty"` + TokenAccessPolicy *ResourceCustomAppIntegrationTokenAccessPolicy `json:"token_access_policy,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_external_location.go b/bundle/internal/tf/schema/resource_external_location.go index da28271bc..72411f4dc 100644 --- a/bundle/internal/tf/schema/resource_external_location.go +++ b/bundle/internal/tf/schema/resource_external_location.go @@ -13,8 +13,13 @@ type ResourceExternalLocationEncryptionDetails struct { type ResourceExternalLocation struct { AccessPoint string `json:"access_point,omitempty"` + BrowseOnly bool `json:"browse_only,omitempty"` Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + CredentialId string `json:"credential_id,omitempty"` CredentialName string `json:"credential_name"` + Fallback bool `json:"fallback,omitempty"` ForceDestroy bool `json:"force_destroy,omitempty"` ForceUpdate bool `json:"force_update,omitempty"` Id string `json:"id,omitempty"` @@ -24,6 +29,8 @@ type ResourceExternalLocation struct { Owner string `json:"owner,omitempty"` ReadOnly bool `json:"read_only,omitempty"` SkipValidation bool `json:"skip_validation,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` Url string `json:"url"` EncryptionDetails *ResourceExternalLocationEncryptionDetails `json:"encryption_details,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_job.go b/bundle/internal/tf/schema/resource_job.go index 63c8aeb7b..2c27f0be7 100644 --- a/bundle/internal/tf/schema/resource_job.go +++ b/bundle/internal/tf/schema/resource_job.go @@ -904,6 +904,7 @@ type ResourceJobTaskForEachTaskTaskSparkJarTask struct { JarUri string `json:"jar_uri,omitempty"` MainClassName string `json:"main_class_name,omitempty"` Parameters []string `json:"parameters,omitempty"` + RunAsRepl bool `json:"run_as_repl,omitempty"` } type ResourceJobTaskForEachTaskTaskSparkPythonTask struct { @@ -1299,6 +1300,7 @@ type ResourceJobTaskSparkJarTask struct { JarUri string `json:"jar_uri,omitempty"` MainClassName string `json:"main_class_name,omitempty"` Parameters []string `json:"parameters,omitempty"` + RunAsRepl bool `json:"run_as_repl,omitempty"` } type ResourceJobTaskSparkPythonTask struct { @@ -1487,6 +1489,7 @@ type ResourceJob struct { MaxRetries int `json:"max_retries,omitempty"` MinRetryIntervalMillis int `json:"min_retry_interval_millis,omitempty"` Name string `json:"name,omitempty"` + PerformanceTarget string `json:"performance_target,omitempty"` RetryOnTimeout bool `json:"retry_on_timeout,omitempty"` Tags map[string]string `json:"tags,omitempty"` TimeoutSeconds int `json:"timeout_seconds,omitempty"` diff --git a/bundle/internal/tf/schema/resource_model_serving.go b/bundle/internal/tf/schema/resource_model_serving.go index 71cf8925d..2025de34c 100644 --- a/bundle/internal/tf/schema/resource_model_serving.go +++ b/bundle/internal/tf/schema/resource_model_serving.go @@ -3,7 +3,7 @@ package schema type ResourceModelServingAiGatewayGuardrailsInputPii struct { - Behavior string `json:"behavior"` + Behavior string `json:"behavior,omitempty"` } type ResourceModelServingAiGatewayGuardrailsInput struct { @@ -14,7 +14,7 @@ type ResourceModelServingAiGatewayGuardrailsInput struct { } type ResourceModelServingAiGatewayGuardrailsOutputPii struct { - Behavior string `json:"behavior"` + Behavior string `json:"behavior,omitempty"` } type ResourceModelServingAiGatewayGuardrailsOutput struct { @@ -94,8 +94,8 @@ type ResourceModelServingConfigServedEntitiesExternalModelDatabricksModelServing type ResourceModelServingConfigServedEntitiesExternalModelGoogleCloudVertexAiConfig struct { PrivateKey string `json:"private_key,omitempty"` PrivateKeyPlaintext string `json:"private_key_plaintext,omitempty"` - ProjectId string `json:"project_id,omitempty"` - Region string `json:"region,omitempty"` + ProjectId string `json:"project_id"` + Region string `json:"region"` } type ResourceModelServingConfigServedEntitiesExternalModelOpenaiConfig struct { diff --git a/bundle/internal/tf/schema/resource_pipeline.go b/bundle/internal/tf/schema/resource_pipeline.go index ebdb85027..8e260e65c 100644 --- a/bundle/internal/tf/schema/resource_pipeline.go +++ b/bundle/internal/tf/schema/resource_pipeline.go @@ -249,6 +249,11 @@ type ResourcePipelineRestartWindow struct { TimeZoneId string `json:"time_zone_id,omitempty"` } +type ResourcePipelineRunAs struct { + ServicePrincipalName string `json:"service_principal_name,omitempty"` + UserName string `json:"user_name,omitempty"` +} + type ResourcePipelineTriggerCron struct { QuartzCronSchedule string `json:"quartz_cron_schedule,omitempty"` TimezoneId string `json:"timezone_id,omitempty"` @@ -296,5 +301,6 @@ type ResourcePipeline struct { Library []ResourcePipelineLibrary `json:"library,omitempty"` Notification []ResourcePipelineNotification `json:"notification,omitempty"` RestartWindow *ResourcePipelineRestartWindow `json:"restart_window,omitempty"` + RunAs *ResourcePipelineRunAs `json:"run_as,omitempty"` Trigger *ResourcePipelineTrigger `json:"trigger,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_recipient.go b/bundle/internal/tf/schema/resource_recipient.go index 91de4df76..4c8f2c7e7 100644 --- a/bundle/internal/tf/schema/resource_recipient.go +++ b/bundle/internal/tf/schema/resource_recipient.go @@ -29,6 +29,7 @@ type ResourceRecipient struct { CreatedAt int `json:"created_at,omitempty"` CreatedBy string `json:"created_by,omitempty"` DataRecipientGlobalMetastoreId string `json:"data_recipient_global_metastore_id,omitempty"` + ExpirationTime int `json:"expiration_time,omitempty"` Id string `json:"id,omitempty"` MetastoreId string `json:"metastore_id,omitempty"` Name string `json:"name"` diff --git a/bundle/internal/tf/schema/resources.go b/bundle/internal/tf/schema/resources.go index b57c2711a..c6eaa5b21 100644 --- a/bundle/internal/tf/schema/resources.go +++ b/bundle/internal/tf/schema/resources.go @@ -3,115 +3,119 @@ package schema type Resources struct { - AccessControlRuleSet map[string]any `json:"databricks_access_control_rule_set,omitempty"` - Alert map[string]any `json:"databricks_alert,omitempty"` - App map[string]any `json:"databricks_app,omitempty"` - ArtifactAllowlist map[string]any `json:"databricks_artifact_allowlist,omitempty"` - AutomaticClusterUpdateWorkspaceSetting map[string]any `json:"databricks_automatic_cluster_update_workspace_setting,omitempty"` - AwsS3Mount map[string]any `json:"databricks_aws_s3_mount,omitempty"` - AzureAdlsGen1Mount map[string]any `json:"databricks_azure_adls_gen1_mount,omitempty"` - AzureAdlsGen2Mount map[string]any `json:"databricks_azure_adls_gen2_mount,omitempty"` - AzureBlobMount map[string]any `json:"databricks_azure_blob_mount,omitempty"` - Budget map[string]any `json:"databricks_budget,omitempty"` - Catalog map[string]any `json:"databricks_catalog,omitempty"` - CatalogWorkspaceBinding map[string]any `json:"databricks_catalog_workspace_binding,omitempty"` - Cluster map[string]any `json:"databricks_cluster,omitempty"` - ClusterPolicy map[string]any `json:"databricks_cluster_policy,omitempty"` - ComplianceSecurityProfileWorkspaceSetting map[string]any `json:"databricks_compliance_security_profile_workspace_setting,omitempty"` - Connection map[string]any `json:"databricks_connection,omitempty"` - Credential map[string]any `json:"databricks_credential,omitempty"` - CustomAppIntegration map[string]any `json:"databricks_custom_app_integration,omitempty"` - Dashboard map[string]any `json:"databricks_dashboard,omitempty"` - DbfsFile map[string]any `json:"databricks_dbfs_file,omitempty"` - DefaultNamespaceSetting map[string]any `json:"databricks_default_namespace_setting,omitempty"` - Directory map[string]any `json:"databricks_directory,omitempty"` - EnhancedSecurityMonitoringWorkspaceSetting map[string]any `json:"databricks_enhanced_security_monitoring_workspace_setting,omitempty"` - Entitlements map[string]any `json:"databricks_entitlements,omitempty"` - ExternalLocation map[string]any `json:"databricks_external_location,omitempty"` - File map[string]any `json:"databricks_file,omitempty"` - GitCredential map[string]any `json:"databricks_git_credential,omitempty"` - GlobalInitScript map[string]any `json:"databricks_global_init_script,omitempty"` - Grant map[string]any `json:"databricks_grant,omitempty"` - Grants map[string]any `json:"databricks_grants,omitempty"` - Group map[string]any `json:"databricks_group,omitempty"` - GroupInstanceProfile map[string]any `json:"databricks_group_instance_profile,omitempty"` - GroupMember map[string]any `json:"databricks_group_member,omitempty"` - GroupRole map[string]any `json:"databricks_group_role,omitempty"` - InstancePool map[string]any `json:"databricks_instance_pool,omitempty"` - InstanceProfile map[string]any `json:"databricks_instance_profile,omitempty"` - IpAccessList map[string]any `json:"databricks_ip_access_list,omitempty"` - Job map[string]any `json:"databricks_job,omitempty"` - LakehouseMonitor map[string]any `json:"databricks_lakehouse_monitor,omitempty"` - Library map[string]any `json:"databricks_library,omitempty"` - Metastore map[string]any `json:"databricks_metastore,omitempty"` - MetastoreAssignment map[string]any `json:"databricks_metastore_assignment,omitempty"` - MetastoreDataAccess map[string]any `json:"databricks_metastore_data_access,omitempty"` - MlflowExperiment map[string]any `json:"databricks_mlflow_experiment,omitempty"` - MlflowModel map[string]any `json:"databricks_mlflow_model,omitempty"` - MlflowWebhook map[string]any `json:"databricks_mlflow_webhook,omitempty"` - ModelServing map[string]any `json:"databricks_model_serving,omitempty"` - Mount map[string]any `json:"databricks_mount,omitempty"` - MwsCredentials map[string]any `json:"databricks_mws_credentials,omitempty"` - MwsCustomerManagedKeys map[string]any `json:"databricks_mws_customer_managed_keys,omitempty"` - MwsLogDelivery map[string]any `json:"databricks_mws_log_delivery,omitempty"` - MwsNccBinding map[string]any `json:"databricks_mws_ncc_binding,omitempty"` - MwsNccPrivateEndpointRule map[string]any `json:"databricks_mws_ncc_private_endpoint_rule,omitempty"` - MwsNetworkConnectivityConfig map[string]any `json:"databricks_mws_network_connectivity_config,omitempty"` - MwsNetworks map[string]any `json:"databricks_mws_networks,omitempty"` - MwsPermissionAssignment map[string]any `json:"databricks_mws_permission_assignment,omitempty"` - MwsPrivateAccessSettings map[string]any `json:"databricks_mws_private_access_settings,omitempty"` - MwsStorageConfigurations map[string]any `json:"databricks_mws_storage_configurations,omitempty"` - MwsVpcEndpoint map[string]any `json:"databricks_mws_vpc_endpoint,omitempty"` - MwsWorkspaces map[string]any `json:"databricks_mws_workspaces,omitempty"` - Notebook map[string]any `json:"databricks_notebook,omitempty"` - NotificationDestination map[string]any `json:"databricks_notification_destination,omitempty"` - OboToken map[string]any `json:"databricks_obo_token,omitempty"` - OnlineTable map[string]any `json:"databricks_online_table,omitempty"` - PermissionAssignment map[string]any `json:"databricks_permission_assignment,omitempty"` - Permissions map[string]any `json:"databricks_permissions,omitempty"` - Pipeline map[string]any `json:"databricks_pipeline,omitempty"` - Provider map[string]any `json:"databricks_provider,omitempty"` - QualityMonitor map[string]any `json:"databricks_quality_monitor,omitempty"` - Query map[string]any `json:"databricks_query,omitempty"` - Recipient map[string]any `json:"databricks_recipient,omitempty"` - RegisteredModel map[string]any `json:"databricks_registered_model,omitempty"` - Repo map[string]any `json:"databricks_repo,omitempty"` - RestrictWorkspaceAdminsSetting map[string]any `json:"databricks_restrict_workspace_admins_setting,omitempty"` - Schema map[string]any `json:"databricks_schema,omitempty"` - Secret map[string]any `json:"databricks_secret,omitempty"` - SecretAcl map[string]any `json:"databricks_secret_acl,omitempty"` - SecretScope map[string]any `json:"databricks_secret_scope,omitempty"` - ServicePrincipal map[string]any `json:"databricks_service_principal,omitempty"` - ServicePrincipalRole map[string]any `json:"databricks_service_principal_role,omitempty"` - ServicePrincipalSecret map[string]any `json:"databricks_service_principal_secret,omitempty"` - Share map[string]any `json:"databricks_share,omitempty"` - SqlAlert map[string]any `json:"databricks_sql_alert,omitempty"` - SqlDashboard map[string]any `json:"databricks_sql_dashboard,omitempty"` - SqlEndpoint map[string]any `json:"databricks_sql_endpoint,omitempty"` - SqlGlobalConfig map[string]any `json:"databricks_sql_global_config,omitempty"` - SqlPermissions map[string]any `json:"databricks_sql_permissions,omitempty"` - SqlQuery map[string]any `json:"databricks_sql_query,omitempty"` - SqlTable map[string]any `json:"databricks_sql_table,omitempty"` - SqlVisualization map[string]any `json:"databricks_sql_visualization,omitempty"` - SqlWidget map[string]any `json:"databricks_sql_widget,omitempty"` - StorageCredential map[string]any `json:"databricks_storage_credential,omitempty"` - SystemSchema map[string]any `json:"databricks_system_schema,omitempty"` - Table map[string]any `json:"databricks_table,omitempty"` - Token map[string]any `json:"databricks_token,omitempty"` - User map[string]any `json:"databricks_user,omitempty"` - UserInstanceProfile map[string]any `json:"databricks_user_instance_profile,omitempty"` - UserRole map[string]any `json:"databricks_user_role,omitempty"` - VectorSearchEndpoint map[string]any `json:"databricks_vector_search_endpoint,omitempty"` - VectorSearchIndex map[string]any `json:"databricks_vector_search_index,omitempty"` - Volume map[string]any `json:"databricks_volume,omitempty"` - WorkspaceBinding map[string]any `json:"databricks_workspace_binding,omitempty"` - WorkspaceConf map[string]any `json:"databricks_workspace_conf,omitempty"` - WorkspaceFile map[string]any `json:"databricks_workspace_file,omitempty"` + AccessControlRuleSet map[string]any `json:"databricks_access_control_rule_set,omitempty"` + AibiDashboardEmbeddingAccessPolicySetting map[string]any `json:"databricks_aibi_dashboard_embedding_access_policy_setting,omitempty"` + AibiDashboardEmbeddingApprovedDomainsSetting map[string]any `json:"databricks_aibi_dashboard_embedding_approved_domains_setting,omitempty"` + Alert map[string]any `json:"databricks_alert,omitempty"` + App map[string]any `json:"databricks_app,omitempty"` + ArtifactAllowlist map[string]any `json:"databricks_artifact_allowlist,omitempty"` + AutomaticClusterUpdateWorkspaceSetting map[string]any `json:"databricks_automatic_cluster_update_workspace_setting,omitempty"` + AwsS3Mount map[string]any `json:"databricks_aws_s3_mount,omitempty"` + AzureAdlsGen1Mount map[string]any `json:"databricks_azure_adls_gen1_mount,omitempty"` + AzureAdlsGen2Mount map[string]any `json:"databricks_azure_adls_gen2_mount,omitempty"` + AzureBlobMount map[string]any `json:"databricks_azure_blob_mount,omitempty"` + Budget map[string]any `json:"databricks_budget,omitempty"` + Catalog map[string]any `json:"databricks_catalog,omitempty"` + CatalogWorkspaceBinding map[string]any `json:"databricks_catalog_workspace_binding,omitempty"` + Cluster map[string]any `json:"databricks_cluster,omitempty"` + ClusterPolicy map[string]any `json:"databricks_cluster_policy,omitempty"` + ComplianceSecurityProfileWorkspaceSetting map[string]any `json:"databricks_compliance_security_profile_workspace_setting,omitempty"` + Connection map[string]any `json:"databricks_connection,omitempty"` + Credential map[string]any `json:"databricks_credential,omitempty"` + CustomAppIntegration map[string]any `json:"databricks_custom_app_integration,omitempty"` + Dashboard map[string]any `json:"databricks_dashboard,omitempty"` + DbfsFile map[string]any `json:"databricks_dbfs_file,omitempty"` + DefaultNamespaceSetting map[string]any `json:"databricks_default_namespace_setting,omitempty"` + Directory map[string]any `json:"databricks_directory,omitempty"` + EnhancedSecurityMonitoringWorkspaceSetting map[string]any `json:"databricks_enhanced_security_monitoring_workspace_setting,omitempty"` + Entitlements map[string]any `json:"databricks_entitlements,omitempty"` + ExternalLocation map[string]any `json:"databricks_external_location,omitempty"` + File map[string]any `json:"databricks_file,omitempty"` + GitCredential map[string]any `json:"databricks_git_credential,omitempty"` + GlobalInitScript map[string]any `json:"databricks_global_init_script,omitempty"` + Grant map[string]any `json:"databricks_grant,omitempty"` + Grants map[string]any `json:"databricks_grants,omitempty"` + Group map[string]any `json:"databricks_group,omitempty"` + GroupInstanceProfile map[string]any `json:"databricks_group_instance_profile,omitempty"` + GroupMember map[string]any `json:"databricks_group_member,omitempty"` + GroupRole map[string]any `json:"databricks_group_role,omitempty"` + InstancePool map[string]any `json:"databricks_instance_pool,omitempty"` + InstanceProfile map[string]any `json:"databricks_instance_profile,omitempty"` + IpAccessList map[string]any `json:"databricks_ip_access_list,omitempty"` + Job map[string]any `json:"databricks_job,omitempty"` + LakehouseMonitor map[string]any `json:"databricks_lakehouse_monitor,omitempty"` + Library map[string]any `json:"databricks_library,omitempty"` + Metastore map[string]any `json:"databricks_metastore,omitempty"` + MetastoreAssignment map[string]any `json:"databricks_metastore_assignment,omitempty"` + MetastoreDataAccess map[string]any `json:"databricks_metastore_data_access,omitempty"` + MlflowExperiment map[string]any `json:"databricks_mlflow_experiment,omitempty"` + MlflowModel map[string]any `json:"databricks_mlflow_model,omitempty"` + MlflowWebhook map[string]any `json:"databricks_mlflow_webhook,omitempty"` + ModelServing map[string]any `json:"databricks_model_serving,omitempty"` + Mount map[string]any `json:"databricks_mount,omitempty"` + MwsCredentials map[string]any `json:"databricks_mws_credentials,omitempty"` + MwsCustomerManagedKeys map[string]any `json:"databricks_mws_customer_managed_keys,omitempty"` + MwsLogDelivery map[string]any `json:"databricks_mws_log_delivery,omitempty"` + MwsNccBinding map[string]any `json:"databricks_mws_ncc_binding,omitempty"` + MwsNccPrivateEndpointRule map[string]any `json:"databricks_mws_ncc_private_endpoint_rule,omitempty"` + MwsNetworkConnectivityConfig map[string]any `json:"databricks_mws_network_connectivity_config,omitempty"` + MwsNetworks map[string]any `json:"databricks_mws_networks,omitempty"` + MwsPermissionAssignment map[string]any `json:"databricks_mws_permission_assignment,omitempty"` + MwsPrivateAccessSettings map[string]any `json:"databricks_mws_private_access_settings,omitempty"` + MwsStorageConfigurations map[string]any `json:"databricks_mws_storage_configurations,omitempty"` + MwsVpcEndpoint map[string]any `json:"databricks_mws_vpc_endpoint,omitempty"` + MwsWorkspaces map[string]any `json:"databricks_mws_workspaces,omitempty"` + Notebook map[string]any `json:"databricks_notebook,omitempty"` + NotificationDestination map[string]any `json:"databricks_notification_destination,omitempty"` + OboToken map[string]any `json:"databricks_obo_token,omitempty"` + OnlineTable map[string]any `json:"databricks_online_table,omitempty"` + PermissionAssignment map[string]any `json:"databricks_permission_assignment,omitempty"` + Permissions map[string]any `json:"databricks_permissions,omitempty"` + Pipeline map[string]any `json:"databricks_pipeline,omitempty"` + Provider map[string]any `json:"databricks_provider,omitempty"` + QualityMonitor map[string]any `json:"databricks_quality_monitor,omitempty"` + Query map[string]any `json:"databricks_query,omitempty"` + Recipient map[string]any `json:"databricks_recipient,omitempty"` + RegisteredModel map[string]any `json:"databricks_registered_model,omitempty"` + Repo map[string]any `json:"databricks_repo,omitempty"` + RestrictWorkspaceAdminsSetting map[string]any `json:"databricks_restrict_workspace_admins_setting,omitempty"` + Schema map[string]any `json:"databricks_schema,omitempty"` + Secret map[string]any `json:"databricks_secret,omitempty"` + SecretAcl map[string]any `json:"databricks_secret_acl,omitempty"` + SecretScope map[string]any `json:"databricks_secret_scope,omitempty"` + ServicePrincipal map[string]any `json:"databricks_service_principal,omitempty"` + ServicePrincipalRole map[string]any `json:"databricks_service_principal_role,omitempty"` + ServicePrincipalSecret map[string]any `json:"databricks_service_principal_secret,omitempty"` + Share map[string]any `json:"databricks_share,omitempty"` + SqlAlert map[string]any `json:"databricks_sql_alert,omitempty"` + SqlDashboard map[string]any `json:"databricks_sql_dashboard,omitempty"` + SqlEndpoint map[string]any `json:"databricks_sql_endpoint,omitempty"` + SqlGlobalConfig map[string]any `json:"databricks_sql_global_config,omitempty"` + SqlPermissions map[string]any `json:"databricks_sql_permissions,omitempty"` + SqlQuery map[string]any `json:"databricks_sql_query,omitempty"` + SqlTable map[string]any `json:"databricks_sql_table,omitempty"` + SqlVisualization map[string]any `json:"databricks_sql_visualization,omitempty"` + SqlWidget map[string]any `json:"databricks_sql_widget,omitempty"` + StorageCredential map[string]any `json:"databricks_storage_credential,omitempty"` + SystemSchema map[string]any `json:"databricks_system_schema,omitempty"` + Table map[string]any `json:"databricks_table,omitempty"` + Token map[string]any `json:"databricks_token,omitempty"` + User map[string]any `json:"databricks_user,omitempty"` + UserInstanceProfile map[string]any `json:"databricks_user_instance_profile,omitempty"` + UserRole map[string]any `json:"databricks_user_role,omitempty"` + VectorSearchEndpoint map[string]any `json:"databricks_vector_search_endpoint,omitempty"` + VectorSearchIndex map[string]any `json:"databricks_vector_search_index,omitempty"` + Volume map[string]any `json:"databricks_volume,omitempty"` + WorkspaceBinding map[string]any `json:"databricks_workspace_binding,omitempty"` + WorkspaceConf map[string]any `json:"databricks_workspace_conf,omitempty"` + WorkspaceFile map[string]any `json:"databricks_workspace_file,omitempty"` } func NewResources() *Resources { return &Resources{ - AccessControlRuleSet: make(map[string]any), + AccessControlRuleSet: make(map[string]any), + AibiDashboardEmbeddingAccessPolicySetting: make(map[string]any), + AibiDashboardEmbeddingApprovedDomainsSetting: make(map[string]any), Alert: make(map[string]any), App: make(map[string]any), ArtifactAllowlist: make(map[string]any), diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index 1f89dc64d..816e8e6aa 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.62.0" +const ProviderVersion = "1.65.1" func NewRoot() *Root { return &Root{ diff --git a/bundle/libraries/upload_test.go b/bundle/libraries/upload_test.go index 44b194c56..3ce67eeb6 100644 --- a/bundle/libraries/upload_test.go +++ b/bundle/libraries/upload_test.go @@ -93,7 +93,7 @@ func TestArtifactUploadForWorkspace(t *testing.T) { filer.CreateParentDirectories, ).Return(nil) - diags := bundle.Apply(context.Background(), b, bundle.Seq(ExpandGlobReferences(), UploadWithClient(mockFiler))) + diags := bundle.ApplySeq(context.Background(), b, ExpandGlobReferences(), UploadWithClient(mockFiler)) require.NoError(t, diags.Error()) // Test that libraries path is updated @@ -181,7 +181,7 @@ func TestArtifactUploadForVolumes(t *testing.T) { filer.CreateParentDirectories, ).Return(nil) - diags := bundle.Apply(context.Background(), b, bundle.Seq(ExpandGlobReferences(), UploadWithClient(mockFiler))) + diags := bundle.ApplySeq(context.Background(), b, ExpandGlobReferences(), UploadWithClient(mockFiler)) require.NoError(t, diags.Error()) // Test that libraries path is updated @@ -225,7 +225,7 @@ func TestArtifactUploadWithNoLibraryReference(t *testing.T) { filer.CreateParentDirectories, ).Return(nil) - diags := bundle.Apply(context.Background(), b, bundle.Seq(ExpandGlobReferences(), UploadWithClient(mockFiler))) + diags := bundle.ApplySeq(context.Background(), b, ExpandGlobReferences(), UploadWithClient(mockFiler)) require.NoError(t, diags.Error()) require.Equal(t, "/Workspace/foo/bar/artifacts/.internal/source.whl", b.Config.Artifacts["whl"].Files[0].RemotePath) @@ -311,7 +311,7 @@ func TestUploadMultipleLibraries(t *testing.T) { filer.CreateParentDirectories, ).Return(nil).Once() - diags := bundle.Apply(context.Background(), b, bundle.Seq(ExpandGlobReferences(), UploadWithClient(mockFiler))) + diags := bundle.ApplySeq(context.Background(), b, ExpandGlobReferences(), UploadWithClient(mockFiler)) require.NoError(t, diags.Error()) // Test that libraries path is updated diff --git a/bundle/log_string.go b/bundle/log_string.go deleted file mode 100644 index f14e3a3ad..000000000 --- a/bundle/log_string.go +++ /dev/null @@ -1,28 +0,0 @@ -package bundle - -import ( - "context" - - "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/diag" -) - -type LogStringMutator struct { - message string -} - -func (d *LogStringMutator) Name() string { - return "log_string" -} - -func LogString(message string) Mutator { - return &LogStringMutator{ - message: message, - } -} - -func (m *LogStringMutator) Apply(ctx context.Context, b *Bundle) diag.Diagnostics { - cmdio.LogString(ctx, m.message) - - return nil -} diff --git a/bundle/mutator.go b/bundle/mutator.go index 6c9968aac..af3940414 100644 --- a/bundle/mutator.go +++ b/bundle/mutator.go @@ -42,12 +42,23 @@ func Apply(ctx context.Context, b *Bundle, m Mutator) diag.Diagnostics { // such that they are not logged multiple times. // If this is done, we can omit this block. if err := diags.Error(); err != nil { - log.Errorf(ctx, "Error: %s", err) + log.Debugf(ctx, "Error: %s", err) } return diags } +func ApplySeq(ctx context.Context, b *Bundle, mutators ...Mutator) diag.Diagnostics { + diags := diag.Diagnostics{} + for _, m := range mutators { + diags = diags.Extend(Apply(ctx, b, m)) + if diags.HasError() { + return diags + } + } + return diags +} + type funcMutator struct { fn func(context.Context, *Bundle) diag.Diagnostics } diff --git a/bundle/mutator_read_only.go b/bundle/mutator_read_only.go index ee4e36e0f..700a90d8d 100644 --- a/bundle/mutator_read_only.go +++ b/bundle/mutator_read_only.go @@ -22,7 +22,7 @@ func ApplyReadOnly(ctx context.Context, rb ReadOnlyBundle, m ReadOnlyMutator) di log.Debugf(ctx, "ApplyReadOnly") diags := m.Apply(ctx, rb) if err := diags.Error(); err != nil { - log.Errorf(ctx, "Error: %s", err) + log.Debugf(ctx, "Error: %s", err) } return diags diff --git a/bundle/mutator_test.go b/bundle/mutator_test.go index 04ff19cff..d6f21adc9 100644 --- a/bundle/mutator_test.go +++ b/bundle/mutator_test.go @@ -19,7 +19,7 @@ func (t *testMutator) Name() string { func (t *testMutator) Apply(ctx context.Context, b *Bundle) diag.Diagnostics { t.applyCalled++ - return Apply(ctx, b, Seq(t.nestedMutators...)) + return ApplySeq(ctx, b, t.nestedMutators...) } func TestMutator(t *testing.T) { diff --git a/bundle/permissions/permission_diagnostics.go b/bundle/permissions/permission_diagnostics.go index d2c24fa01..3c76f3505 100644 --- a/bundle/permissions/permission_diagnostics.go +++ b/bundle/permissions/permission_diagnostics.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/iamutil" "github.com/databricks/cli/libs/set" ) @@ -33,9 +34,25 @@ func (m *permissionDiagnostics) Apply(ctx context.Context, b *bundle.Bundle) dia return nil } + me := b.Config.Workspace.CurrentUser.User + identityType := "user_name" + if iamutil.IsServicePrincipal(me) { + identityType = "service_principal_name" + } + return diag.Diagnostics{{ - Severity: diag.Warning, - Summary: fmt.Sprintf("permissions section should include %s or one of their groups with CAN_MANAGE permissions", b.Config.Workspace.CurrentUser.UserName), + Severity: diag.Recommendation, + Summary: fmt.Sprintf("permissions section should explicitly include the current deployment identity '%s' or one of its groups\n"+ + "If it is not included, CAN_MANAGE permissions are only applied if the present identity is used to deploy.\n\n"+ + "Consider using a adding a top-level permissions section such as the following:\n\n"+ + " permissions:\n"+ + " - %s: %s\n"+ + " level: CAN_MANAGE\n\n"+ + "See https://docs.databricks.com/dev-tools/bundles/permissions.html to learn more about permission configuration.", + b.Config.Workspace.CurrentUser.UserName, + identityType, + b.Config.Workspace.CurrentUser.UserName, + ), Locations: []dyn.Location{b.Config.GetLocation("permissions")}, ID: diag.PermissionNotIncluded, }} @@ -46,7 +63,7 @@ func (m *permissionDiagnostics) Apply(ctx context.Context, b *bundle.Bundle) dia // target workspace folder. // // Returns: -// - isManager: true if the current user is can manage the bundle resources. +// - canManageBundle: true if the current user or one of their groups can manage the bundle resources. // - assistance: advice on who to contact as to manage this project func analyzeBundlePermissions(b *bundle.Bundle) (bool, string) { canManageBundle := false diff --git a/bundle/permissions/permission_diagnostics_test.go b/bundle/permissions/permission_diagnostics_test.go index 6c55ab594..892f122de 100644 --- a/bundle/permissions/permission_diagnostics_test.go +++ b/bundle/permissions/permission_diagnostics_test.go @@ -18,7 +18,14 @@ func TestPermissionDiagnosticsApplySuccess(t *testing.T) { {Level: "CAN_MANAGE", UserName: "testuser@databricks.com"}, }) - diags := permissions.PermissionDiagnostics().Apply(context.Background(), b) + diags := bundle.Apply(context.Background(), b, permissions.PermissionDiagnostics()) + require.NoError(t, diags.Error()) +} + +func TestPermissionDiagnosticsEmpty(t *testing.T) { + b := mockBundle(nil) + + diags := bundle.Apply(context.Background(), b, permissions.PermissionDiagnostics()) require.NoError(t, diags.Error()) } @@ -27,9 +34,19 @@ func TestPermissionDiagnosticsApplyFail(t *testing.T) { {Level: "CAN_VIEW", UserName: "testuser@databricks.com"}, }) - diags := permissions.PermissionDiagnostics().Apply(context.Background(), b) - require.Equal(t, diag.Warning, diags[0].Severity) - require.Contains(t, diags[0].Summary, "permissions section should include testuser@databricks.com or one of their groups with CAN_MANAGE permissions") + diags := bundle.Apply(context.Background(), b, permissions.PermissionDiagnostics()) + require.Equal(t, diag.Recommendation, diags[0].Severity) + + expectedMsg := "permissions section should explicitly include the current deployment identity " + + "'testuser@databricks.com' or one of its groups\n" + + "If it is not included, CAN_MANAGE permissions are only applied if the present identity is used to deploy.\n\n" + + "Consider using a adding a top-level permissions section such as the following:\n\n" + + " permissions:\n" + + " - user_name: testuser@databricks.com\n" + + " level: CAN_MANAGE\n\n" + + "See https://docs.databricks.com/dev-tools/bundles/permissions.html to learn more about permission configuration." + + require.Contains(t, diags[0].Summary, expectedMsg) } func mockBundle(permissions []resources.Permission) *bundle.Bundle { diff --git a/bundle/permissions/validate_test.go b/bundle/permissions/validate_test.go index ff132b4e1..aa0b486d3 100644 --- a/bundle/permissions/validate_test.go +++ b/bundle/permissions/validate_test.go @@ -34,7 +34,7 @@ func TestValidateSharedRootPermissionsForShared(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) - diags := bundle.Apply(context.Background(), b, bundle.Seq(ValidateSharedRootPermissions())) + diags := bundle.Apply(context.Background(), b, ValidateSharedRootPermissions()) require.Empty(t, diags) } @@ -59,7 +59,7 @@ func TestValidateSharedRootPermissionsForSharedError(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) - diags := bundle.Apply(context.Background(), b, bundle.Seq(ValidateSharedRootPermissions())) + diags := bundle.Apply(context.Background(), b, ValidateSharedRootPermissions()) require.Len(t, diags, 1) require.Equal(t, "the bundle root path /Workspace/Shared/foo/bar is writable by all workspace users", diags[0].Summary) require.Equal(t, diag.Warning, diags[0].Severity) diff --git a/bundle/permissions/workspace_root_test.go b/bundle/permissions/workspace_root_test.go index c48704a63..f9c5da7d6 100644 --- a/bundle/permissions/workspace_root_test.go +++ b/bundle/permissions/workspace_root_test.go @@ -38,8 +38,8 @@ func TestApplyWorkspaceRootPermissions(t *testing.T) { "job_2": {JobSettings: &jobs.JobSettings{Name: "job_2"}}, }, Pipelines: map[string]*resources.Pipeline{ - "pipeline_1": {PipelineSpec: &pipelines.PipelineSpec{}}, - "pipeline_2": {PipelineSpec: &pipelines.PipelineSpec{}}, + "pipeline_1": {CreatePipeline: &pipelines.CreatePipeline{}}, + "pipeline_2": {CreatePipeline: &pipelines.CreatePipeline{}}, }, Models: map[string]*resources.MlflowModel{ "model_1": {Model: &ml.Model{}}, @@ -73,7 +73,7 @@ func TestApplyWorkspaceRootPermissions(t *testing.T) { WorkspaceObjectType: "directories", }).Return(nil, nil) - diags := bundle.Apply(context.Background(), b, bundle.Seq(ValidateSharedRootPermissions(), ApplyWorkspaceRootPermissions())) + diags := bundle.ApplySeq(context.Background(), b, ValidateSharedRootPermissions(), ApplyWorkspaceRootPermissions()) require.Empty(t, diags) } @@ -98,8 +98,8 @@ func TestApplyWorkspaceRootPermissionsForAllPaths(t *testing.T) { "job_2": {JobSettings: &jobs.JobSettings{Name: "job_2"}}, }, Pipelines: map[string]*resources.Pipeline{ - "pipeline_1": {PipelineSpec: &pipelines.PipelineSpec{}}, - "pipeline_2": {PipelineSpec: &pipelines.PipelineSpec{}}, + "pipeline_1": {CreatePipeline: &pipelines.CreatePipeline{}}, + "pipeline_2": {CreatePipeline: &pipelines.CreatePipeline{}}, }, Models: map[string]*resources.MlflowModel{ "model_1": {Model: &ml.Model{}}, diff --git a/bundle/phases/bind.go b/bundle/phases/bind.go index c62c48aea..ae54e8657 100644 --- a/bundle/phases/bind.go +++ b/bundle/phases/bind.go @@ -1,45 +1,57 @@ package phases import ( + "context" + "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/terraform" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/log" ) -func Bind(opts *terraform.BindOptions) bundle.Mutator { - return newPhase( - "bind", - []bundle.Mutator{ - lock.Acquire(), - bundle.Defer( - bundle.Seq( - terraform.StatePull(), - terraform.Interpolate(), - terraform.Write(), - terraform.Import(opts), - terraform.StatePush(), - ), - lock.Release(lock.GoalBind), - ), - }, - ) +func Bind(ctx context.Context, b *bundle.Bundle, opts *terraform.BindOptions) (diags diag.Diagnostics) { + log.Info(ctx, "Phase: bind") + + diags = bundle.Apply(ctx, b, lock.Acquire()) + if diags.HasError() { + return diags + } + + defer func() { + diags = diags.Extend(bundle.Apply(ctx, b, lock.Release(lock.GoalBind))) + }() + + diags = diags.Extend(bundle.ApplySeq(ctx, b, + terraform.StatePull(), + terraform.Interpolate(), + terraform.Write(), + terraform.Import(opts), + terraform.StatePush(), + )) + + return diags } -func Unbind(resourceType, resourceKey string) bundle.Mutator { - return newPhase( - "unbind", - []bundle.Mutator{ - lock.Acquire(), - bundle.Defer( - bundle.Seq( - terraform.StatePull(), - terraform.Interpolate(), - terraform.Write(), - terraform.Unbind(resourceType, resourceKey), - terraform.StatePush(), - ), - lock.Release(lock.GoalUnbind), - ), - }, - ) +func Unbind(ctx context.Context, b *bundle.Bundle, resourceType, resourceKey string) (diags diag.Diagnostics) { + log.Info(ctx, "Phase: unbind") + + diags = bundle.Apply(ctx, b, lock.Acquire()) + if diags.HasError() { + return diags + } + + defer func() { + diags = diags.Extend(bundle.Apply(ctx, b, lock.Release(lock.GoalUnbind))) + }() + + diags = diags.Extend(bundle.ApplySeq(ctx, b, + terraform.StatePull(), + terraform.Interpolate(), + terraform.Write(), + terraform.Unbind(resourceType, resourceKey), + terraform.StatePush(), + )) + + return diags } diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 3ddc6b181..0170ed51c 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -1,27 +1,31 @@ package phases import ( + "context" + "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/artifacts" + "github.com/databricks/cli/bundle/artifacts/whl" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/scripts" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/log" ) // The build phase builds artifacts. -func Build() bundle.Mutator { - return newPhase( - "build", - []bundle.Mutator{ - scripts.Execute(config.ScriptPreBuild), - artifacts.DetectPackages(), - artifacts.InferMissingProperties(), - artifacts.PrepareAll(), - artifacts.BuildAll(), - scripts.Execute(config.ScriptPostBuild), - mutator.ResolveVariableReferences( - "artifacts", - ), - }, +func Build(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + log.Info(ctx, "Phase: build") + + return bundle.ApplySeq(ctx, b, + scripts.Execute(config.ScriptPreBuild), + whl.DetectPackage(), + artifacts.InferMissingProperties(), + artifacts.PrepareAll(), + artifacts.BuildAll(), + scripts.Execute(config.ScriptPostBuild), + mutator.ResolveVariableReferences( + "artifacts", + ), ) } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index c6ec04962..b31ed682f 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -19,6 +19,8 @@ import ( "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/bundle/trampoline" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/sync" terraformlib "github.com/databricks/cli/libs/terraform" tfjson "github.com/hashicorp/terraform-json" @@ -124,60 +126,89 @@ is removed from the catalog, but the underlying files are not deleted:` return approved, nil } -// The deploy phase deploys artifacts and resources. -func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { +func deployCore(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { // Core mutators that CRUD resources and modify deployment state. These // mutators need informed consent if they are potentially destructive. - deployCore := bundle.Defer( - bundle.Seq( - bundle.LogString("Deploying resources..."), - terraform.Apply(), - ), - bundle.Seq( - terraform.StatePush(), - terraform.Load(), - apps.InterpolateVariables(), - apps.UploadConfig(), - metadata.Compute(), - metadata.Upload(), - bundle.LogString("Deployment complete!"), - ), - ) + cmdio.LogString(ctx, "Deploying resources...") + diags := bundle.Apply(ctx, b, terraform.Apply()) - deployMutator := bundle.Seq( + // following original logic, continuing with sequence below even if terraform had errors + + diags = diags.Extend(bundle.ApplySeq(ctx, b, + terraform.StatePush(), + terraform.Load(), + apps.InterpolateVariables(), + apps.UploadConfig(), + metadata.Compute(), + metadata.Upload(), + )) + + if !diags.HasError() { + cmdio.LogString(ctx, "Deployment complete!") + } + + return diags +} + +// The deploy phase deploys artifacts and resources. +func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHandler) (diags diag.Diagnostics) { + log.Info(ctx, "Phase: deploy") + + // Core mutators that CRUD resources and modify deployment state. These + // mutators need informed consent if they are potentially destructive. + diags = bundle.ApplySeq(ctx, b, scripts.Execute(config.ScriptPreDeploy), lock.Acquire(), - bundle.Defer( - bundle.Seq( - terraform.StatePull(), - terraform.CheckDashboardsModifiedRemotely(), - deploy.StatePull(), - mutator.ValidateGitDetails(), - artifacts.CleanUp(), - libraries.ExpandGlobReferences(), - libraries.Upload(), - trampoline.TransformWheelTask(), - files.Upload(outputHandler), - deploy.StateUpdate(), - deploy.StatePush(), - permissions.ApplyWorkspaceRootPermissions(), - terraform.Interpolate(), - terraform.Write(), - terraform.CheckRunningResource(), - terraform.Plan(terraform.PlanGoal("deploy")), - bundle.If( - approvalForDeploy, - deployCore, - bundle.LogString("Deployment cancelled!"), - ), - ), - lock.Release(lock.GoalDeploy), - ), - scripts.Execute(config.ScriptPostDeploy), ) - return newPhase( - "deploy", - []bundle.Mutator{deployMutator}, + if diags.HasError() { + // lock is not acquired here + return diags + } + + // lock is acquired here + defer func() { + diags = diags.Extend(bundle.Apply(ctx, b, lock.Release(lock.GoalDeploy))) + }() + + diags = bundle.ApplySeq(ctx, b, + terraform.StatePull(), + terraform.CheckDashboardsModifiedRemotely(), + deploy.StatePull(), + mutator.ValidateGitDetails(), + artifacts.CleanUp(), + libraries.ExpandGlobReferences(), + libraries.Upload(), + trampoline.TransformWheelTask(), + files.Upload(outputHandler), + deploy.StateUpdate(), + deploy.StatePush(), + permissions.ApplyWorkspaceRootPermissions(), + terraform.Interpolate(), + terraform.Write(), + terraform.CheckRunningResource(), + terraform.Plan(terraform.PlanGoal("deploy")), ) + + if diags.HasError() { + return diags + } + + haveApproval, err := approvalForDeploy(ctx, b) + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + return diags + } + + if haveApproval { + diags = diags.Extend(deployCore(ctx, b)) + } else { + cmdio.LogString(ctx, "Deployment cancelled!") + } + + if diags.HasError() { + return diags + } + + return diags.Extend(bundle.Apply(ctx, b, scripts.Execute(config.ScriptPostDeploy))) } diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 05a41dea2..daff07965 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -11,6 +11,7 @@ import ( "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/log" terraformlib "github.com/databricks/cli/libs/terraform" @@ -77,42 +78,65 @@ func approvalForDestroy(ctx context.Context, b *bundle.Bundle) (bool, error) { return approved, nil } -// The destroy phase deletes artifacts and resources. -func Destroy() bundle.Mutator { +func destroyCore(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { // Core destructive mutators for destroy. These require informed user consent. - destroyCore := bundle.Seq( + diags := bundle.ApplySeq(ctx, b, terraform.Apply(), files.Delete(), - bundle.LogString("Destroy complete!"), ) - destroyMutator := bundle.Seq( - lock.Acquire(), - bundle.Defer( - bundle.Seq( - terraform.StatePull(), - terraform.Interpolate(), - terraform.Write(), - terraform.Plan(terraform.PlanGoal("destroy")), - bundle.If( - approvalForDestroy, - destroyCore, - bundle.LogString("Destroy cancelled!"), - ), - ), - lock.Release(lock.GoalDestroy), - ), - ) + if !diags.HasError() { + cmdio.LogString(ctx, "Destroy complete!") + } - return newPhase( - "destroy", - []bundle.Mutator{ - // Only run deploy mutator if root path exists. - bundle.If( - assertRootPathExists, - destroyMutator, - bundle.LogString("No active deployment found to destroy!"), - ), - }, - ) + return diags +} + +// The destroy phase deletes artifacts and resources. +func Destroy(ctx context.Context, b *bundle.Bundle) (diags diag.Diagnostics) { + log.Info(ctx, "Phase: destroy") + + ok, err := assertRootPathExists(ctx, b) + if err != nil { + return diag.FromErr(err) + } + + if !ok { + cmdio.LogString(ctx, "No active deployment found to destroy!") + return diags + } + + diags = diags.Extend(bundle.Apply(ctx, b, lock.Acquire())) + if diags.HasError() { + return diags + } + + defer func() { + diags = diags.Extend(bundle.Apply(ctx, b, lock.Release(lock.GoalDestroy))) + }() + + diags = diags.Extend(bundle.ApplySeq(ctx, b, + terraform.StatePull(), + terraform.Interpolate(), + terraform.Write(), + terraform.Plan(terraform.PlanGoal("destroy")), + )) + + if diags.HasError() { + return diags + } + + hasApproval, err := approvalForDestroy(ctx, b) + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + return diags + } + + if hasApproval { + diags = diags.Extend(destroyCore(ctx, b)) + } else { + cmdio.LogString(ctx, "Destroy cancelled!") + } + + return diags } diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 50df5634a..fef38bd28 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -1,6 +1,8 @@ package phases import ( + "context" + "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/apps" "github.com/databricks/cli/bundle/config" @@ -12,95 +14,96 @@ import ( "github.com/databricks/cli/bundle/permissions" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/bundle/trampoline" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/log" ) // The initialize phase fills in defaults and connects to the workspace. // Interpolation of fields referring to the "bundle" and "workspace" keys // happens upon completion of this phase. -func Initialize() bundle.Mutator { - return newPhase( - "initialize", - []bundle.Mutator{ - validate.AllResourcesHaveValues(), +func Initialize(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + log.Info(ctx, "Phase: initialize") - // Update all path fields in the sync block to be relative to the bundle root path. - mutator.RewriteSyncPaths(), + return bundle.ApplySeq(ctx, b, + validate.AllResourcesHaveValues(), - // Configure the default sync path to equal the bundle root if not explicitly configured. - // By default, this means all files in the bundle root directory are synchronized. - mutator.SyncDefaultPath(), + // Update all path fields in the sync block to be relative to the bundle root path. + mutator.RewriteSyncPaths(), - // Figure out if the sync root path is identical or an ancestor of the bundle root path. - // If it is an ancestor, this updates all paths to be relative to the sync root path. - mutator.SyncInferRoot(), + // Configure the default sync path to equal the bundle root if not explicitly configured. + // By default, this means all files in the bundle root directory are synchronized. + mutator.SyncDefaultPath(), - mutator.InitializeWorkspaceClient(), - mutator.PopulateCurrentUser(), - mutator.LoadGitDetails(), + // Figure out if the sync root path is identical or an ancestor of the bundle root path. + // If it is an ancestor, this updates all paths to be relative to the sync root path. + mutator.SyncInferRoot(), - // This mutator needs to be run before variable interpolation and defining default workspace paths - // because it affects how workspace variables are resolved. - mutator.ApplySourceLinkedDeploymentPreset(), + mutator.PopulateCurrentUser(), + mutator.LoadGitDetails(), - mutator.DefineDefaultWorkspaceRoot(), - mutator.ExpandWorkspaceRoot(), - mutator.DefineDefaultWorkspacePaths(), - mutator.PrependWorkspacePrefix(), + // This mutator needs to be run before variable interpolation and defining default workspace paths + // because it affects how workspace variables are resolved. + mutator.ApplySourceLinkedDeploymentPreset(), - // This mutator needs to be run before variable interpolation because it - // searches for strings with variable references in them. - mutator.RewriteWorkspacePrefix(), + mutator.DefineDefaultWorkspaceRoot(), + mutator.ExpandWorkspaceRoot(), + mutator.DefineDefaultWorkspacePaths(), + mutator.PrependWorkspacePrefix(), - mutator.SetVariables(), + // This mutator needs to be run before variable interpolation because it + // searches for strings with variable references in them. + mutator.RewriteWorkspacePrefix(), - // Intentionally placed before ResolveVariableReferencesInLookup, ResolveResourceReferences, - // ResolveVariableReferencesInComplexVariables and ResolveVariableReferences. - // See what is expected in PythonMutatorPhaseInit doc - pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseInit), - pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseLoadResources), - pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseApplyMutators), - mutator.ResolveVariableReferencesInLookup(), - mutator.ResolveResourceReferences(), - mutator.ResolveVariableReferencesInComplexVariables(), - mutator.ResolveVariableReferences( - "bundle", - "workspace", - "variables", - ), + mutator.SetVariables(), - mutator.MergeJobClusters(), - mutator.MergeJobParameters(), - mutator.MergeJobTasks(), - mutator.MergePipelineClusters(), - mutator.MergeApps(), + // Intentionally placed before ResolveVariableReferencesInLookup, ResolveResourceReferences, + // ResolveVariableReferencesInComplexVariables and ResolveVariableReferences. + // See what is expected in PythonMutatorPhaseInit doc + pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseInit), + pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseLoadResources), + pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseApplyMutators), + mutator.ResolveVariableReferencesInLookup(), + mutator.ResolveResourceReferences(), + mutator.ResolveVariableReferences( + "bundle", + "workspace", + "variables", + ), - // Provide permission config errors & warnings after initializing all variables - permissions.PermissionDiagnostics(), - mutator.SetRunAs(), - mutator.OverrideCompute(), - mutator.ConfigureDashboardDefaults(), - mutator.ConfigureVolumeDefaults(), - mutator.ProcessTargetMode(), - mutator.ApplyPresets(), - mutator.DefaultQueueing(), - mutator.ExpandPipelineGlobPaths(), + mutator.MergeJobClusters(), + mutator.MergeJobParameters(), + mutator.MergeJobTasks(), + mutator.MergePipelineClusters(), + mutator.MergeApps(), - // Configure use of WSFS for reads if the CLI is running on Databricks. - mutator.ConfigureWSFS(), + mutator.CaptureSchemaDependency(), - mutator.TranslatePaths(), - trampoline.WrapperWarning(), + // Provide permission config errors & warnings after initializing all variables + permissions.PermissionDiagnostics(), + mutator.SetRunAs(), + mutator.OverrideCompute(), + mutator.ConfigureDashboardDefaults(), + mutator.ConfigureVolumeDefaults(), + mutator.ProcessTargetMode(), + mutator.ApplyPresets(), + mutator.DefaultQueueing(), + mutator.ExpandPipelineGlobPaths(), - apps.Validate(), + // Configure use of WSFS for reads if the CLI is running on Databricks. + mutator.ConfigureWSFS(), - permissions.ValidateSharedRootPermissions(), - permissions.ApplyBundlePermissions(), - permissions.FilterCurrentUser(), + mutator.TranslatePaths(), + trampoline.WrapperWarning(), - metadata.AnnotateJobs(), - metadata.AnnotatePipelines(), - terraform.Initialize(), - scripts.Execute(config.ScriptPostInit), - }, + apps.Validate(), + + permissions.ValidateSharedRootPermissions(), + permissions.ApplyBundlePermissions(), + permissions.FilterCurrentUser(), + + metadata.AnnotateJobs(), + metadata.AnnotatePipelines(), + terraform.Initialize(), + scripts.Execute(config.ScriptPostInit), ) } diff --git a/bundle/phases/load.go b/bundle/phases/load.go index fa0668775..844bc0776 100644 --- a/bundle/phases/load.go +++ b/bundle/phases/load.go @@ -1,29 +1,40 @@ package phases import ( + "context" + "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/log" ) // The load phase loads configuration from disk and performs // lightweight preprocessing (anything that can be done without network I/O). -func Load() bundle.Mutator { - return newPhase( - "load", - mutator.DefaultMutators(), - ) +func Load(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + log.Info(ctx, "Phase: load") + + return mutator.DefaultMutators(ctx, b) } -func LoadDefaultTarget() bundle.Mutator { - return newPhase( - "load", - append(mutator.DefaultMutators(), mutator.SelectDefaultTarget()), - ) +func LoadDefaultTarget(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + log.Info(ctx, "Phase: load") + + diags := mutator.DefaultMutators(ctx, b) + if diags.HasError() { + return diags + } + + return diags.Extend(bundle.Apply(ctx, b, mutator.SelectDefaultTarget())) } -func LoadNamedTarget(target string) bundle.Mutator { - return newPhase( - "load", - append(mutator.DefaultMutators(), mutator.SelectTarget(target)), - ) +func LoadNamedTarget(ctx context.Context, b *bundle.Bundle, target string) diag.Diagnostics { + log.Info(ctx, "Phase: load") + + diags := mutator.DefaultMutators(ctx, b) + if diags.HasError() { + return diags + } + + return diags.Extend(bundle.Apply(ctx, b, mutator.SelectTarget(target))) } diff --git a/bundle/phases/phase.go b/bundle/phases/phase.go deleted file mode 100644 index 1bb4f86a2..000000000 --- a/bundle/phases/phase.go +++ /dev/null @@ -1,33 +0,0 @@ -// Package phases defines build phases as logical groups of [bundle.Mutator] instances. -package phases - -import ( - "context" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/log" -) - -// This phase type groups mutators that belong to a lifecycle phase. -// It expands into the specific mutators when applied. -type phase struct { - name string - mutators []bundle.Mutator -} - -func newPhase(name string, mutators []bundle.Mutator) bundle.Mutator { - return &phase{ - name: name, - mutators: mutators, - } -} - -func (p *phase) Name() string { - return p.name -} - -func (p *phase) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - log.Infof(ctx, "Phase: %s", p.Name()) - return bundle.Apply(ctx, b, bundle.Seq(p.mutators...)) -} diff --git a/bundle/render/render_text_output_test.go b/bundle/render/render_text_output_test.go index 506756f70..d092e77c8 100644 --- a/bundle/render/render_text_output_test.go +++ b/bundle/render/render_text_output_test.go @@ -530,12 +530,12 @@ func TestRenderSummary(t *testing.T) { "pipeline2": { ID: "4", // no URL - PipelineSpec: &pipelines.PipelineSpec{Name: "pipeline2-name"}, + CreatePipeline: &pipelines.CreatePipeline{Name: "pipeline2-name"}, }, "pipeline1": { - ID: "3", - URL: "https://url3", - PipelineSpec: &pipelines.PipelineSpec{Name: "pipeline1-name"}, + ID: "3", + URL: "https://url3", + CreatePipeline: &pipelines.CreatePipeline{Name: "pipeline1-name"}, }, }, Schemas: map[string]*resources.Schema{ diff --git a/bundle/resources/completion_test.go b/bundle/resources/completion_test.go index 80412b6f1..56559f18c 100644 --- a/bundle/resources/completion_test.go +++ b/bundle/resources/completion_test.go @@ -25,7 +25,7 @@ func TestCompletions_SkipDuplicates(t *testing.T) { }, Pipelines: map[string]*resources.Pipeline{ "foo": { - PipelineSpec: &pipelines.PipelineSpec{}, + CreatePipeline: &pipelines.CreatePipeline{}, }, }, }, @@ -50,7 +50,7 @@ func TestCompletions_Filter(t *testing.T) { }, Pipelines: map[string]*resources.Pipeline{ "bar": { - PipelineSpec: &pipelines.PipelineSpec{}, + CreatePipeline: &pipelines.CreatePipeline{}, }, }, }, diff --git a/bundle/resources/lookup_test.go b/bundle/resources/lookup_test.go index 0ea5af7a2..d95da977a 100644 --- a/bundle/resources/lookup_test.go +++ b/bundle/resources/lookup_test.go @@ -56,7 +56,7 @@ func TestLookup_MultipleFound(t *testing.T) { }, Pipelines: map[string]*resources.Pipeline{ "foo": { - PipelineSpec: &pipelines.PipelineSpec{}, + CreatePipeline: &pipelines.CreatePipeline{}, }, }, }, @@ -107,7 +107,7 @@ func TestLookup_NominalWithFilters(t *testing.T) { }, Pipelines: map[string]*resources.Pipeline{ "bar": { - PipelineSpec: &pipelines.PipelineSpec{}, + CreatePipeline: &pipelines.CreatePipeline{}, }, }, }, diff --git a/bundle/run/app.go b/bundle/run/app.go index 11030beda..b15f3f4b6 100644 --- a/bundle/run/app.go +++ b/bundle/run/app.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/run/output" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/apps" "github.com/spf13/cobra" ) @@ -111,11 +112,21 @@ func (a *appRunner) start(ctx context.Context) error { // active and pending deployments fields (if any). If there are active or pending deployments, // we need to wait for them to complete before we can do the new deployment. // Otherwise, the new deployment will fail. - // Thus, we first wait for the active deployment to complete. - if startedApp.ActiveDeployment != nil && - startedApp.ActiveDeployment.Status.State == apps.AppDeploymentStateInProgress { + err = waitForDeploymentToComplete(ctx, w, startedApp) + if err != nil { + return err + } + + logProgress(ctx, "App is started!") + return nil +} + +func waitForDeploymentToComplete(ctx context.Context, w *databricks.WorkspaceClient, app *apps.App) error { + // We first wait for the active deployment to complete. + if app.ActiveDeployment != nil && + app.ActiveDeployment.Status.State == apps.AppDeploymentStateInProgress { logProgress(ctx, "Waiting for the active deployment to complete...") - _, err = w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, startedApp.ActiveDeployment.DeploymentId, 20*time.Minute, nil) + _, err := w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, app.ActiveDeployment.DeploymentId, 20*time.Minute, nil) if err != nil { return err } @@ -123,17 +134,16 @@ func (a *appRunner) start(ctx context.Context) error { } // Then, we wait for the pending deployment to complete. - if startedApp.PendingDeployment != nil && - startedApp.PendingDeployment.Status.State == apps.AppDeploymentStateInProgress { + if app.PendingDeployment != nil && + app.PendingDeployment.Status.State == apps.AppDeploymentStateInProgress { logProgress(ctx, "Waiting for the pending deployment to complete...") - _, err = w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, startedApp.PendingDeployment.DeploymentId, 20*time.Minute, nil) + _, err := w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, app.PendingDeployment.DeploymentId, 20*time.Minute, nil) if err != nil { return err } logProgress(ctx, "Pending deployment is completed!") } - logProgress(ctx, "App is started!") return nil } @@ -142,16 +152,38 @@ func (a *appRunner) deploy(ctx context.Context) error { b := a.bundle w := b.WorkspaceClient() + sourceCodePath := app.SourceCodePath wait, err := w.Apps.Deploy(ctx, apps.CreateAppDeploymentRequest{ AppName: app.Name, AppDeployment: &apps.AppDeployment{ Mode: apps.AppDeploymentModeSnapshot, - SourceCodePath: app.SourceCodePath, + SourceCodePath: sourceCodePath, }, }) // If deploy returns an error, then there's an active deployment in progress, wait for it to complete. + // For this we first need to get an app and its acrive and pending deployments and then wait for them. if err != nil { - return err + app, err := w.Apps.Get(ctx, apps.GetAppRequest{Name: app.Name}) + if err != nil { + return fmt.Errorf("failed to get app %s: %w", app.Name, err) + } + + err = waitForDeploymentToComplete(ctx, w, app) + if err != nil { + return err + } + + // Now we can try to deploy the app again + wait, err = w.Apps.Deploy(ctx, apps.CreateAppDeploymentRequest{ + AppName: app.Name, + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: sourceCodePath, + }, + }) + if err != nil { + return err + } } _, err = wait.OnProgress(func(ad *apps.AppDeployment) { diff --git a/bundle/run/app_test.go b/bundle/run/app_test.go index 44ff698e5..e988988f4 100644 --- a/bundle/run/app_test.go +++ b/bundle/run/app_test.go @@ -1,8 +1,8 @@ package run import ( - "bytes" "context" + "errors" "os" "path/filepath" "testing" @@ -15,7 +15,6 @@ import ( "github.com/databricks/cli/bundle/internal/bundletest" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/dyn" - "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go/experimental/mocks" "github.com/databricks/databricks-sdk-go/service/apps" @@ -75,14 +74,12 @@ func setupBundle(t *testing.T) (context.Context, *bundle.Bundle, *mocks.MockWork b.SetWorkpaceClient(mwc.WorkspaceClient) bundletest.SetLocation(b, "resources.apps.my_app", []dyn.Location{{File: "./databricks.yml"}}) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) - ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) + ctx := cmdio.MockDiscard(context.Background()) - diags := bundle.Apply(ctx, b, bundle.Seq( + diags := bundle.ApplySeq(ctx, b, mutator.DefineDefaultWorkspacePaths(), mutator.TranslatePaths(), - )) + ) require.Empty(t, diags) return ctx, b, mwc @@ -189,6 +186,69 @@ func TestAppRunWithAnActiveDeploymentInProgress(t *testing.T) { r.run(t) } +func TestAppDeployWithDeploymentInProgress(t *testing.T) { + ctx, b, mwc := setupBundle(t) + + appApi := mwc.GetMockAppsAPI() + appApi.EXPECT().Get(mock.Anything, apps.GetAppRequest{ + Name: "my_app", + }).Return(&apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: apps.ApplicationStateRunning, + }, + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, + }, + }, nil).Once() + + wait := &apps.WaitGetDeploymentAppSucceeded[apps.AppDeployment]{ + Poll: func(_ time.Duration, _ func(*apps.AppDeployment)) (*apps.AppDeployment, error) { + return nil, nil + }, + } + + // First deployment fails + appApi.EXPECT().Deploy(mock.Anything, apps.CreateAppDeploymentRequest{ + AppName: "my_app", + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: "/Workspace/Users/foo@bar.com/files/my_app", + }, + }).Return(nil, errors.New("deployment in progress")).Once() + + // After first deployment fails, we should get the app and wait for the deployment to complete + appApi.EXPECT().Get(mock.Anything, apps.GetAppRequest{ + Name: "my_app", + }).Return(&apps.App{ + Name: "my_app", + ActiveDeployment: &apps.AppDeployment{ + DeploymentId: "active_deployment_id", + Status: &apps.AppDeploymentStatus{ + State: apps.AppDeploymentStateInProgress, + }, + }, + }, nil).Once() + + appApi.EXPECT().WaitGetDeploymentAppSucceeded(mock.Anything, "my_app", "active_deployment_id", mock.Anything, mock.Anything).Return(nil, nil) + + // Second one should succeeed + appApi.EXPECT().Deploy(mock.Anything, apps.CreateAppDeploymentRequest{ + AppName: "my_app", + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: "/Workspace/Users/foo@bar.com/files/my_app", + }, + }).Return(wait, nil).Once() + + r := &testAppRunner{ + m: mwc, + b: b, + ctx: ctx, + } + r.run(t) +} + func TestStopApp(t *testing.T) { ctx, b, mwc := setupBundle(t) appsApi := mwc.GetMockAppsAPI() diff --git a/bundle/run/job_test.go b/bundle/run/job_test.go index 72aecc887..daf6cf063 100644 --- a/bundle/run/job_test.go +++ b/bundle/run/job_test.go @@ -1,7 +1,6 @@ package run import ( - "bytes" "context" "testing" "time" @@ -159,8 +158,8 @@ func TestJobRunnerRestart(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "")) + + ctx := cmdio.MockDiscard(context.Background()) ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) jobApi := m.GetMockJobsAPI() @@ -230,8 +229,8 @@ func TestJobRunnerRestartForContinuousUnpausedJobs(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + + ctx := cmdio.MockDiscard(context.Background()) ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) jobApi := m.GetMockJobsAPI() diff --git a/bundle/run/pipeline.go b/bundle/run/pipeline.go index bdcf0f142..1cd6e8743 100644 --- a/bundle/run/pipeline.go +++ b/bundle/run/pipeline.go @@ -79,10 +79,10 @@ type pipelineRunner struct { } func (r *pipelineRunner) Name() string { - if r.pipeline == nil || r.pipeline.PipelineSpec == nil { + if r.pipeline == nil || r.pipeline.CreatePipeline == nil { return "" } - return r.pipeline.PipelineSpec.Name + return r.pipeline.CreatePipeline.Name } func (r *pipelineRunner) Run(ctx context.Context, opts *Options) (output.RunOutput, error) { diff --git a/bundle/run/pipeline_test.go b/bundle/run/pipeline_test.go index bfa0c5846..56d800d35 100644 --- a/bundle/run/pipeline_test.go +++ b/bundle/run/pipeline_test.go @@ -1,7 +1,6 @@ package run import ( - "bytes" "context" "testing" "time" @@ -75,8 +74,8 @@ func TestPipelineRunnerRestart(t *testing.T) { Host: "https://test.com", } b.SetWorkpaceClient(m.WorkspaceClient) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + + ctx := cmdio.MockDiscard(context.Background()) ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) mockWait := &pipelines.WaitGetPipelineIdle[struct{}]{ diff --git a/bundle/schema/embed_test.go b/bundle/schema/embed_test.go index 59f1458cb..03d2165e4 100644 --- a/bundle/schema/embed_test.go +++ b/bundle/schema/embed_test.go @@ -59,8 +59,8 @@ func TestJsonSchema(t *testing.T) { } providers := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.GitProvider") - assert.Contains(t, providers.Enum, "gitHub") - assert.Contains(t, providers.Enum, "bitbucketCloud") - assert.Contains(t, providers.Enum, "gitHubEnterprise") - assert.Contains(t, providers.Enum, "bitbucketServer") + assert.Contains(t, providers.OneOf[0].Enum, "gitHub") + assert.Contains(t, providers.OneOf[0].Enum, "bitbucketCloud") + assert.Contains(t, providers.OneOf[0].Enum, "gitHubEnterprise") + assert.Contains(t, providers.OneOf[0].Enum, "bitbucketServer") } diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 81ae1329f..4bfbd62fc 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -88,6 +88,10 @@ "description": { "$ref": "#/$defs/string" }, + "id": { + "description": "The unique identifier of the app.", + "$ref": "#/$defs/string" + }, "name": { "$ref": "#/$defs/string" }, @@ -160,7 +164,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes" }, "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nThree kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClusterLogConf" }, "cluster_name": { @@ -258,7 +262,8 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.WorkloadType" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The cluster resource defines an [all-purpose cluster](https://docs.databricks.com/api/workspace/clusters/create)." }, { "type": "string", @@ -321,7 +326,8 @@ "$ref": "#/$defs/string" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The dashboard resource allows you to manage [AI/BI dashboards](https://docs.databricks.com/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [link](https://docs.databricks.com/dashboards/index.html)." }, { "type": "string", @@ -388,7 +394,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules" }, "job_clusters": { - "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", + "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.\nIf more than 100 job clusters are available, you can paginate through them using :method:jobs/get.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobCluster" }, "max_concurrent_runs": { @@ -407,6 +413,10 @@ "description": "Job-level parameter definitions", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobParameterDefinition" }, + "performance_target": { + "description": "PerformanceTarget defines how performant or cost efficient the execution of run on serverless should be.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.PerformanceTarget" + }, "permissions": { "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" }, @@ -426,7 +436,7 @@ "$ref": "#/$defs/map/string" }, "tasks": { - "description": "A list of task specifications to be executed by this job.", + "description": "A list of task specifications to be executed by this job.\nIf more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Task" }, "timeout_seconds": { @@ -442,7 +452,8 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.WebhookNotifications" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The job resource allows you to define [jobs and their corresponding tasks](https://docs.databricks.com/api/workspace/jobs/create) in your bundle. For information about jobs, see [link](https://docs.databricks.com/jobs/index.html). For a tutorial that uses a \u003cDABS\u003e template to create a job, see [link](https://docs.databricks.com/dev-tools/bundles/jobs-tutorial.html)." }, { "type": "string", @@ -487,7 +498,8 @@ "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ExperimentTag" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The experiment resource allows you to define [MLflow experiments](https://docs.databricks.com/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [link](https://docs.databricks.com/mlflow/experiments.html)." }, { "type": "string", @@ -532,7 +544,8 @@ "$ref": "#/$defs/string" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The model resource allows you to define [legacy models](https://docs.databricks.com/api/workspace/modelregistry/createmodel) in bundles. Databricks recommends you use \u003cUC\u003e [registered models](https://docs.databricks.com/dev-tools/bundles/reference.html#registered-model) instead." }, { "type": "string", @@ -546,7 +559,7 @@ "type": "object", "properties": { "ai_gateway": { - "description": "The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now.", + "description": "The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned throughput endpoints are currently supported.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AiGatewayConfig" }, "config": { @@ -554,7 +567,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.EndpointCoreConfigInput" }, "name": { - "description": "The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.\nAn endpoint name can consist of alphanumeric characters, dashes, and underscores.\n", + "description": "The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.\nAn endpoint name can consist of alphanumeric characters, dashes, and underscores.", "$ref": "#/$defs/string" }, "permissions": { @@ -575,9 +588,9 @@ }, "additionalProperties": false, "required": [ - "config", "name" - ] + ], + "markdownDescription": "The model_serving_endpoint resource allows you to define [model serving endpoints](https://docs.databricks.com/api/workspace/servingendpoints/create). See [link](https://docs.databricks.com/machine-learning/model-serving/manage-serving-endpoints.html)." }, { "type": "string", @@ -698,6 +711,9 @@ "description": "Restart window of this pipeline.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindow" }, + "run_as": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.RunAs" + }, "schema": { "description": "The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode.", "$ref": "#/$defs/string" @@ -719,7 +735,8 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The pipeline resource allows you to create \u003cDLT\u003e [pipelines](https://docs.databricks.com/api/workspace/pipelines/create). For information about pipelines, see [link](https://docs.databricks.com/delta-live-tables/index.html). For a tutorial that uses the \u003cDABS\u003e template to create a pipeline, see [link](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, { "type": "string", @@ -793,7 +810,8 @@ "table_name", "assets_dir", "output_schema_name" - ] + ], + "markdownDescription": "The quality_monitor resource allows you to define a \u003cUC\u003e [table monitor](https://docs.databricks.com/api/workspace/qualitymonitors/create). For information about monitors, see [link](https://docs.databricks.com/machine-learning/model-serving/monitor-diagnose-endpoints.html)." }, { "type": "string", @@ -835,7 +853,8 @@ "catalog_name", "name", "schema_name" - ] + ], + "markdownDescription": "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](https://docs.databricks.com/api/workspace/registeredmodels/create), see [link](https://docs.databricks.com/machine-learning/manage-model-lifecycle/index.html)." }, { "type": "string", @@ -875,7 +894,8 @@ "required": [ "catalog_name", "name" - ] + ], + "markdownDescription": "The schema resource type allows you to define \u003cUC\u003e [schemas](https://docs.databricks.com/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations:\n\n- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema.\n- Only fields supported by the corresponding [Schemas object create API](https://docs.databricks.com/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](https://docs.databricks.com/api/workspace/schemas/update)." }, { "type": "string", @@ -920,7 +940,8 @@ "catalog_name", "name", "schema_name" - ] + ], + "markdownDescription": "The volume resource type allows you to define and create \u003cUC\u003e [volumes](https://docs.databricks.com/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that:\n\n- A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use \u003cDABS\u003e to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path` in subsequent deployments.\n\n- Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development` configured. However, you can manually configure this prefix. See [custom-presets](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html#custom-presets)." }, { "type": "string", @@ -934,39 +955,51 @@ "type": "object", "properties": { "alert": { + "description": "The name of the alert for which to retrieve an ID.", "$ref": "#/$defs/string" }, "cluster": { + "description": "The name of the cluster for which to retrieve an ID.", "$ref": "#/$defs/string" }, "cluster_policy": { + "description": "The name of the cluster_policy for which to retrieve an ID.", "$ref": "#/$defs/string" }, "dashboard": { + "description": "The name of the dashboard for which to retrieve an ID.", "$ref": "#/$defs/string" }, "instance_pool": { + "description": "The name of the instance_pool for which to retrieve an ID.", "$ref": "#/$defs/string" }, "job": { + "description": "The name of the job for which to retrieve an ID.", "$ref": "#/$defs/string" }, "metastore": { + "description": "The name of the metastore for which to retrieve an ID.", "$ref": "#/$defs/string" }, "notification_destination": { + "description": "The name of the notification_destination for which to retrieve an ID.", "$ref": "#/$defs/string" }, "pipeline": { + "description": "The name of the pipeline for which to retrieve an ID.", "$ref": "#/$defs/string" }, "query": { + "description": "The name of the query for which to retrieve an ID.", "$ref": "#/$defs/string" }, "service_principal": { + "description": "The name of the service_principal for which to retrieve an ID.", "$ref": "#/$defs/string" }, "warehouse": { + "description": "The name of the warehouse for which to retrieve an ID.", "$ref": "#/$defs/string" } }, @@ -984,6 +1017,7 @@ "type": "object", "properties": { "default": { + "description": "The default value for the variable.", "$ref": "#/$defs/interface" }, "description": { @@ -1006,8 +1040,10 @@ }, "variable.Variable": { "type": "object", + "description": "Defines a custom variable for the bundle.", "properties": { "default": { + "description": "The default value for the variable.", "$ref": "#/$defs/interface" }, "description": { @@ -1017,14 +1053,15 @@ "lookup": { "description": "The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.Lookup", - "markdownDescription": "The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID.\"" + "markdownDescription": "The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID." }, "type": { "description": "The type of the variable.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config/variable.VariableType" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "Defines a custom variable for the bundle. See [variables](https://docs.databricks.com/dev-tools/bundles/settings.html#variables)." }, "variable.VariableType": { "type": "string" @@ -1036,26 +1073,25 @@ "type": "object", "properties": { "build": { - "description": "An optional set of non-default build commands that you want to run locally before deployment.\n\nFor Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment.\n\nTo specify multiple build commands, separate each command with double-ampersand (\u0026\u0026) characters.", + "description": "An optional set of build commands to run locally before deployment.", "$ref": "#/$defs/string" }, "executable": { - "description": "The executable type.", + "description": "The executable type. Valid values are `bash`, `sh`, and `cmd`.", "$ref": "#/$defs/github.com/databricks/cli/libs/exec.ExecutableType" }, "files": { - "description": "The source files for the artifact.", - "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config.ArtifactFile", - "markdownDescription": "The source files for the artifact, defined as an [artifact_file](https://docs.databricks.com/dev-tools/bundles/reference.html#artifact_file)." + "description": "The relative or absolute path to the built artifact files.", + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config.ArtifactFile" }, "path": { - "description": "The location where the built artifact will be saved.", + "description": "The local path of the directory for the artifact.", "$ref": "#/$defs/string" }, "type": { - "description": "The type of the artifact.", + "description": "Required if the artifact is a Python wheel. The type of the artifact. Valid values are `whl` and `jar`.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.ArtifactType", - "markdownDescription": "The type of the artifact. Valid values are `wheel` or `jar`" + "markdownDescription": "Required if the artifact is a Python wheel. The type of the artifact. Valid values are `whl` and `jar`." } }, "additionalProperties": false, @@ -1075,7 +1111,7 @@ "type": "object", "properties": { "source": { - "description": "The path of the files used to build the artifact.", + "description": "Required. The artifact source file.", "$ref": "#/$defs/string" } }, @@ -1104,6 +1140,7 @@ "markdownDescription": "The ID of a cluster to use to run the bundle. See [cluster_id](https://docs.databricks.com/dev-tools/bundles/settings.html#cluster_id)." }, "compute_id": { + "description": "Deprecated. The ID of the compute to use to run the bundle.", "$ref": "#/$defs/string" }, "databricks_cli_version": { @@ -1114,18 +1151,19 @@ "deployment": { "description": "The definition of the bundle deployment", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Deployment", - "markdownDescription": "The definition of the bundle deployment. For supported attributes, see [deployment](https://docs.databricks.com/dev-tools/bundles/reference.html#deployment) and [link](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)." + "markdownDescription": "The definition of the bundle deployment. For supported attributes see [link](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)." }, "git": { "description": "The Git version control details that are associated with your bundle.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", - "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes, see [git](https://docs.databricks.com/dev-tools/bundles/reference.html#git) and [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." + "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes see [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." }, "name": { "description": "The name of the bundle.", "$ref": "#/$defs/string" }, "uuid": { + "description": "Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command).", "$ref": "#/$defs/string" } }, @@ -1154,8 +1192,7 @@ }, "lock": { "description": "The deployment lock attributes.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Lock", - "markdownDescription": "The deployment lock attributes. See [lock](https://docs.databricks.com/dev-tools/bundles/reference.html#lock)." + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Lock" } }, "additionalProperties": false @@ -1180,15 +1217,15 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Python" }, "python_wheel_wrapper": { - "description": "Whether to use a Python wheel wrapper", + "description": "Whether to use a Python wheel wrapper.", "$ref": "#/$defs/bool" }, "scripts": { - "description": "The commands to run", + "description": "The commands to run.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Command" }, "use_legacy_run_as": { - "description": "Whether to use the legacy run_as behavior", + "description": "Whether to use the legacy run_as behavior.", "$ref": "#/$defs/bool" } }, @@ -1349,60 +1386,64 @@ "type": "object", "properties": { "apps": { - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.App" + "description": "The app resource defines a Databricks app.", + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.App", + "markdownDescription": "The app resource defines a [Databricks app](https://docs.databricks.com/api/workspace/apps/create). For information about Databricks Apps, see [link](https://docs.databricks.com/dev-tools/databricks-apps/index.html)." }, "clusters": { - "description": "The cluster definitions for the bundle.", + "description": "The cluster definitions for the bundle, where each key is the name of a cluster.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Cluster", - "markdownDescription": "The cluster definitions for the bundle. See [cluster](https://docs.databricks.com/dev-tools/bundles/resources.html#cluster)" + "markdownDescription": "The cluster definitions for the bundle, where each key is the name of a cluster. See [clusters](https://docs.databricks.com/dev-tools/bundles/resources.html#clusters)." }, "dashboards": { - "description": "The dashboard definitions for the bundle.", + "description": "The dashboard definitions for the bundle, where each key is the name of the dashboard.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Dashboard", - "markdownDescription": "The dashboard definitions for the bundle. See [dashboard](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboard)" + "markdownDescription": "The dashboard definitions for the bundle, where each key is the name of the dashboard. See [dashboards](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboards)." }, "experiments": { - "description": "The experiment definitions for the bundle.", + "description": "The experiment definitions for the bundle, where each key is the name of the experiment.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.MlflowExperiment", - "markdownDescription": "The experiment definitions for the bundle. See [experiment](https://docs.databricks.com/dev-tools/bundles/resources.html#experiment)" + "markdownDescription": "The experiment definitions for the bundle, where each key is the name of the experiment. See [experiments](https://docs.databricks.com/dev-tools/bundles/resources.html#experiments)." }, "jobs": { - "description": "The job definitions for the bundle.", + "description": "The job definitions for the bundle, where each key is the name of the job.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Job", - "markdownDescription": "The job definitions for the bundle. See [job](https://docs.databricks.com/dev-tools/bundles/resources.html#job)" + "markdownDescription": "The job definitions for the bundle, where each key is the name of the job. See [jobs](https://docs.databricks.com/dev-tools/bundles/resources.html#jobs)." }, "model_serving_endpoints": { - "description": "The model serving endpoint definitions for the bundle.", + "description": "The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint", - "markdownDescription": "The model serving endpoint definitions for the bundle. See [model_serving_endpoint](https://docs.databricks.com/dev-tools/bundles/resources.html#model_serving_endpoint)" + "markdownDescription": "The model serving endpoint definitions for the bundle, where each key is the name of the model serving endpoint. See [model_serving_endpoints](https://docs.databricks.com/dev-tools/bundles/resources.html#model_serving_endpoints)." }, "models": { - "description": "The model definitions for the bundle.", + "description": "The model definitions for the bundle, where each key is the name of the model.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.MlflowModel", - "markdownDescription": "The model definitions for the bundle. See [model](https://docs.databricks.com/dev-tools/bundles/resources.html#model)" + "markdownDescription": "The model definitions for the bundle, where each key is the name of the model. See [models](https://docs.databricks.com/dev-tools/bundles/resources.html#models)." }, "pipelines": { - "description": "The pipeline definitions for the bundle.", + "description": "The pipeline definitions for the bundle, where each key is the name of the pipeline.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Pipeline", - "markdownDescription": "The pipeline definitions for the bundle. See [pipeline](https://docs.databricks.com/dev-tools/bundles/resources.html#pipeline)" + "markdownDescription": "The pipeline definitions for the bundle, where each key is the name of the pipeline. See [pipelines](https://docs.databricks.com/dev-tools/bundles/resources.html#pipelines)." }, "quality_monitors": { - "description": "The quality monitor definitions for the bundle.", + "description": "The quality monitor definitions for the bundle, where each key is the name of the quality monitor.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.QualityMonitor", - "markdownDescription": "The quality monitor definitions for the bundle. See [quality_monitor](https://docs.databricks.com/dev-tools/bundles/resources.html#quality_monitor)" + "markdownDescription": "The quality monitor definitions for the bundle, where each key is the name of the quality monitor. See [quality_monitors](https://docs.databricks.com/dev-tools/bundles/resources.html#quality_monitors)." }, "registered_models": { - "description": "The registered model definitions for the bundle.", + "description": "The registered model definitions for the bundle, where each key is the name of the \u003cUC\u003e registered model.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.RegisteredModel", - "markdownDescription": "The registered model definitions for the bundle. See [registered_model](https://docs.databricks.com/dev-tools/bundles/resources.html#registered_model)" + "markdownDescription": "The registered model definitions for the bundle, where each key is the name of the \u003cUC\u003e registered model. See [registered_models](https://docs.databricks.com/dev-tools/bundles/resources.html#registered_models)." }, "schemas": { - "description": "The schema definitions for the bundle.", + "description": "The schema definitions for the bundle, where each key is the name of the schema.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Schema", - "markdownDescription": "The schema definitions for the bundle. See [schema](https://docs.databricks.com/dev-tools/bundles/resources.html#schema)" + "markdownDescription": "The schema definitions for the bundle, where each key is the name of the schema. See [schemas](https://docs.databricks.com/dev-tools/bundles/resources.html#schemas)." }, "volumes": { - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Volume" + "description": "The volume definitions for the bundle, where each key is the name of the volume.", + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Volume", + "markdownDescription": "The volume definitions for the bundle, where each key is the name of the volume. See [volumes](https://docs.databricks.com/dev-tools/bundles/resources.html#volumes)." } }, "additionalProperties": false @@ -1446,11 +1487,10 @@ "properties": { "artifacts": { "description": "The artifacts to include in the target deployment.", - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact", - "markdownDescription": "The artifacts to include in the target deployment. See [artifact](https://docs.databricks.com/dev-tools/bundles/reference.html#artifact)" + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact" }, "bundle": { - "description": "The name of the bundle when deploying to this target.", + "description": "The bundle attributes when deploying to this target.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Bundle" }, "cluster_id": { @@ -1467,8 +1507,7 @@ }, "git": { "description": "The Git version control settings for the target.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", - "markdownDescription": "The Git version control settings for the target. See [git](https://docs.databricks.com/dev-tools/bundles/reference.html#git)." + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git" }, "mode": { "description": "The deployment mode for the target.", @@ -1477,38 +1516,32 @@ }, "permissions": { "description": "The permissions for deploying and running the bundle in the target.", - "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission", - "markdownDescription": "The permissions for deploying and running the bundle in the target. See [permission](https://docs.databricks.com/dev-tools/bundles/reference.html#permission)." + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" }, "presets": { "description": "The deployment presets for the target.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Presets", - "markdownDescription": "The deployment presets for the target. See [preset](https://docs.databricks.com/dev-tools/bundles/reference.html#preset)." + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Presets" }, "resources": { "description": "The resource definitions for the target.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Resources", - "markdownDescription": "The resource definitions for the target. See [resources](https://docs.databricks.com/dev-tools/bundles/reference.html#resources)." + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Resources" }, "run_as": { "description": "The identity to use to run the bundle.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs", - "markdownDescription": "The identity to use to run the bundle. See [job_run_as](https://docs.databricks.com/dev-tools/bundles/reference.html#job_run_as) and [link](https://docs.databricks.com/dev-tools/bundles/run_as.html)." + "markdownDescription": "The identity to use to run the bundle, see [link](https://docs.databricks.com/dev-tools/bundles/run-as.html)." }, "sync": { "description": "The local paths to sync to the target workspace when a bundle is run or deployed.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync", - "markdownDescription": "The local paths to sync to the target workspace when a bundle is run or deployed. See [sync](https://docs.databricks.com/dev-tools/bundles/reference.html#sync)." + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync" }, "variables": { "description": "The custom variable definitions for the target.", - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.TargetVariable", - "markdownDescription": "The custom variable definitions for the target. See [variables](https://docs.databricks.com/dev-tools/bundles/settings.html#variables) and [link](https://docs.databricks.com/dev-tools/bundles/variables.html)." + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/variable.TargetVariable" }, "workspace": { "description": "The Databricks workspace for the target.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace", - "markdownDescription": "The Databricks workspace for the target. [workspace](https://docs.databricks.com/dev-tools/bundles/reference.html#workspace)" + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" } }, "additionalProperties": false @@ -1662,10 +1695,36 @@ ] }, "apps.AppDeploymentMode": { - "type": "string" + "oneOf": [ + { + "type": "string", + "enum": [ + "SNAPSHOT", + "AUTO_SYNC" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] }, "apps.AppDeploymentState": { - "type": "string" + "oneOf": [ + { + "type": "string", + "enum": [ + "SUCCEEDED", + "FAILED", + "IN_PROGRESS", + "CANCELLED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] }, "apps.AppDeploymentStatus": { "oneOf": [ @@ -1693,12 +1752,14 @@ "type": "object", "properties": { "description": { + "description": "Description of the App Resource.", "$ref": "#/$defs/string" }, "job": { "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob" }, "name": { + "description": "Name of the App Resource.", "$ref": "#/$defs/string" }, "secret": { @@ -1747,7 +1808,21 @@ ] }, "apps.AppResourceJobJobPermission": { - "type": "string" + "oneOf": [ + { + "type": "string", + "enum": [ + "CAN_MANAGE", + "IS_OWNER", + "CAN_MANAGE_RUN", + "CAN_VIEW" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] }, "apps.AppResourceSecret": { "oneOf": [ @@ -1778,7 +1853,21 @@ ] }, "apps.AppResourceSecretSecretPermission": { - "type": "string" + "oneOf": [ + { + "type": "string", + "description": "Permission to grant on the secret scope. Supported permissions are: \"READ\", \"WRITE\", \"MANAGE\".", + "enum": [ + "READ", + "WRITE", + "MANAGE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] }, "apps.AppResourceServingEndpoint": { "oneOf": [ @@ -1805,7 +1894,20 @@ ] }, "apps.AppResourceServingEndpointServingEndpointPermission": { - "type": "string" + "oneOf": [ + { + "type": "string", + "enum": [ + "CAN_MANAGE", + "CAN_QUERY", + "CAN_VIEW" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] }, "apps.AppResourceSqlWarehouse": { "oneOf": [ @@ -1832,10 +1934,37 @@ ] }, "apps.AppResourceSqlWarehouseSqlWarehousePermission": { - "type": "string" + "oneOf": [ + { + "type": "string", + "enum": [ + "CAN_MANAGE", + "CAN_USE", + "IS_OWNER" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] }, "apps.ApplicationState": { - "type": "string" + "oneOf": [ + { + "type": "string", + "enum": [ + "DEPLOYING", + "RUNNING", + "CRASHED", + "UNAVAILABLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] }, "apps.ApplicationStatus": { "oneOf": [ @@ -1858,7 +1987,24 @@ ] }, "apps.ComputeState": { - "type": "string" + "oneOf": [ + { + "type": "string", + "enum": [ + "ERROR", + "DELETING", + "STARTING", + "STOPPING", + "UPDATING", + "STOPPED", + "ACTIVE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] }, "apps.ComputeStatus": { "oneOf": [ @@ -1869,6 +2015,7 @@ "$ref": "#/$defs/string" }, "state": { + "description": "State of the app compute.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.ComputeState" } }, @@ -1911,11 +2058,19 @@ ] }, "catalog.MonitorCronSchedulePauseStatus": { - "type": "string", - "description": "Read only field that indicates whether a schedule is paused or not.", - "enum": [ - "UNPAUSED", - "PAUSED" + "oneOf": [ + { + "type": "string", + "description": "Read only field that indicates whether a schedule is paused or not.", + "enum": [ + "UNPAUSED", + "PAUSED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "catalog.MonitorDataClassificationConfig": { @@ -2004,11 +2159,19 @@ ] }, "catalog.MonitorInferenceLogProblemType": { - "type": "string", - "description": "Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed.", - "enum": [ - "PROBLEM_TYPE_CLASSIFICATION", - "PROBLEM_TYPE_REGRESSION" + "oneOf": [ + { + "type": "string", + "description": "Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed.", + "enum": [ + "PROBLEM_TYPE_CLASSIFICATION", + "PROBLEM_TYPE_REGRESSION" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "catalog.MonitorMetric": { @@ -2053,12 +2216,20 @@ ] }, "catalog.MonitorMetricType": { - "type": "string", - "description": "Can only be one of ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"``, ``\"CUSTOM_METRIC_TYPE_DERIVED\"``, or ``\"CUSTOM_METRIC_TYPE_DRIFT\"``.\nThe ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"`` and ``\"CUSTOM_METRIC_TYPE_DERIVED\"`` metrics\nare computed on a single table, whereas the ``\"CUSTOM_METRIC_TYPE_DRIFT\"`` compare metrics across\nbaseline and input table, or across the two consecutive time windows.\n- CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table\n- CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics\n- CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics\n", - "enum": [ - "CUSTOM_METRIC_TYPE_AGGREGATE", - "CUSTOM_METRIC_TYPE_DERIVED", - "CUSTOM_METRIC_TYPE_DRIFT" + "oneOf": [ + { + "type": "string", + "description": "Can only be one of ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"``, ``\"CUSTOM_METRIC_TYPE_DERIVED\"``, or ``\"CUSTOM_METRIC_TYPE_DRIFT\"``.\nThe ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"`` and ``\"CUSTOM_METRIC_TYPE_DERIVED\"`` metrics\nare computed on a single table, whereas the ``\"CUSTOM_METRIC_TYPE_DRIFT\"`` compare metrics across\nbaseline and input table, or across the two consecutive time windows.\n- CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table\n- CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics\n- CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics\n", + "enum": [ + "CUSTOM_METRIC_TYPE_AGGREGATE", + "CUSTOM_METRIC_TYPE_DERIVED", + "CUSTOM_METRIC_TYPE_DRIFT" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "catalog.MonitorNotifications": { @@ -2122,10 +2293,18 @@ ] }, "catalog.VolumeType": { - "type": "string", - "enum": [ - "EXTERNAL", - "MANAGED" + "oneOf": [ + { + "type": "string", + "enum": [ + "EXTERNAL", + "MANAGED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.Adlsgen2Info": { @@ -2224,12 +2403,20 @@ ] }, "compute.AwsAvailability": { - "type": "string", - "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.\n", - "enum": [ - "SPOT", - "ON_DEMAND", - "SPOT_WITH_FALLBACK" + "oneOf": [ + { + "type": "string", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.\n", + "enum": [ + "SPOT", + "ON_DEMAND", + "SPOT_WITH_FALLBACK" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.AzureAttributes": { @@ -2262,12 +2449,20 @@ ] }, "compute.AzureAvailability": { - "type": "string", - "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero (which only happens on pool clusters), this availability\ntype will be used for the entire cluster.", - "enum": [ - "SPOT_AZURE", - "ON_DEMAND_AZURE", - "SPOT_WITH_FALLBACK_AZURE" + "oneOf": [ + { + "type": "string", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero (which only happens on pool clusters), this availability\ntype will be used for the entire cluster.", + "enum": [ + "SPOT_AZURE", + "ON_DEMAND_AZURE", + "SPOT_WITH_FALLBACK_AZURE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.ClientsTypes": { @@ -2304,6 +2499,10 @@ "s3": { "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.S3StorageInfo" + }, + "volumes": { + "description": "destination needs to be provided. e.g.\n`{ \"volumes\" : { \"destination\" : \"/Volumes/catalog/schema/volume/cluster_log\" } }`", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.VolumesStorageInfo" } }, "additionalProperties": false @@ -2340,7 +2539,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes" }, "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nThree kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.ClusterLogConf" }, "cluster_name": { @@ -2444,19 +2643,27 @@ ] }, "compute.DataSecurityMode": { - "type": "string", - "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used with `kind`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.\n", - "enum": [ - "DATA_SECURITY_MODE_AUTO", - "DATA_SECURITY_MODE_STANDARD", - "DATA_SECURITY_MODE_DEDICATED", - "NONE", - "SINGLE_USER", - "USER_ISOLATION", - "LEGACY_TABLE_ACL", - "LEGACY_PASSTHROUGH", - "LEGACY_SINGLE_USER", - "LEGACY_SINGLE_USER_STANDARD" + "oneOf": [ + { + "type": "string", + "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used with `kind`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.\n", + "enum": [ + "DATA_SECURITY_MODE_AUTO", + "DATA_SECURITY_MODE_STANDARD", + "DATA_SECURITY_MODE_DEDICATED", + "NONE", + "SINGLE_USER", + "USER_ISOLATION", + "LEGACY_TABLE_ACL", + "LEGACY_PASSTHROUGH", + "LEGACY_SINGLE_USER", + "LEGACY_SINGLE_USER_STANDARD" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.DbfsStorageInfo": { @@ -2524,11 +2731,19 @@ ] }, "compute.EbsVolumeType": { - "type": "string", - "description": "The type of EBS volumes that will be launched with this cluster.", - "enum": [ - "GENERAL_PURPOSE_SSD", - "THROUGHPUT_OPTIMIZED_HDD" + "oneOf": [ + { + "type": "string", + "description": "The type of EBS volumes that will be launched with this cluster.", + "enum": [ + "GENERAL_PURPOSE_SSD", + "THROUGHPUT_OPTIMIZED_HDD" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.Environment": { @@ -2595,12 +2810,20 @@ ] }, "compute.GcpAvailability": { - "type": "string", - "description": "This field determines whether the instance pool will contain preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", - "enum": [ - "PREEMPTIBLE_GCP", - "ON_DEMAND_GCP", - "PREEMPTIBLE_WITH_FALLBACK_GCP" + "oneOf": [ + { + "type": "string", + "description": "This field determines whether the instance pool will contain preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", + "enum": [ + "PREEMPTIBLE_GCP", + "ON_DEMAND_GCP", + "PREEMPTIBLE_WITH_FALLBACK_GCP" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.GcsStorageInfo": { @@ -2834,12 +3057,20 @@ ] }, "compute.RuntimeEngine": { - "type": "string", - "description": "Determines the cluster's runtime engine, either standard or Photon.\n\nThis field is not compatible with legacy `spark_version` values that contain `-photon-`.\nRemove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.\n\nIf left unspecified, the runtime engine defaults to standard unless the spark_version\ncontains -photon-, in which case Photon will be used.\n", - "enum": [ - "NULL", - "STANDARD", - "PHOTON" + "oneOf": [ + { + "type": "string", + "description": "Determines the cluster's runtime engine, either standard or Photon.\n\nThis field is not compatible with legacy `spark_version` values that contain `-photon-`.\nRemove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.\n\nIf left unspecified, the runtime engine defaults to standard unless the spark_version\ncontains -photon-, in which case Photon will be used.\n", + "enum": [ + "NULL", + "STANDARD", + "PHOTON" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.S3StorageInfo": { @@ -2893,7 +3124,7 @@ "type": "object", "properties": { "destination": { - "description": "Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`", + "description": "Unity Catalog volumes file destination, e.g. `/Volumes/catalog/schema/volume/dir/file`", "$ref": "#/$defs/string" } }, @@ -2951,10 +3182,18 @@ ] }, "dashboards.LifecycleState": { - "type": "string", - "enum": [ - "ACTIVE", - "TRASHED" + "oneOf": [ + { + "type": "string", + "enum": [ + "ACTIVE", + "TRASHED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.CleanRoomsNotebookTask": { @@ -2992,10 +3231,18 @@ ] }, "jobs.Condition": { - "type": "string", - "enum": [ - "ANY_UPDATED", - "ALL_UPDATED" + "oneOf": [ + { + "type": "string", + "enum": [ + "ANY_UPDATED", + "ALL_UPDATED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.ConditionTask": { @@ -3030,15 +3277,23 @@ ] }, "jobs.ConditionTaskOp": { - "type": "string", - "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", - "enum": [ - "EQUAL_TO", - "GREATER_THAN", - "GREATER_THAN_OR_EQUAL", - "LESS_THAN", - "LESS_THAN_OR_EQUAL", - "NOT_EQUAL" + "oneOf": [ + { + "type": "string", + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", + "enum": [ + "EQUAL_TO", + "GREATER_THAN", + "GREATER_THAN_OR_EQUAL", + "LESS_THAN", + "LESS_THAN_OR_EQUAL", + "NOT_EQUAL" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.Continuous": { @@ -3194,23 +3449,39 @@ ] }, "jobs.Format": { - "type": "string", - "enum": [ - "SINGLE_TASK", - "MULTI_TASK" + "oneOf": [ + { + "type": "string", + "enum": [ + "SINGLE_TASK", + "MULTI_TASK" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.GitProvider": { - "type": "string", - "enum": [ - "gitHub", - "bitbucketCloud", - "azureDevOpsServices", - "gitHubEnterprise", - "bitbucketServer", - "gitLab", - "gitLabEnterpriseEdition", - "awsCodeCommit" + "oneOf": [ + { + "type": "string", + "enum": [ + "gitHub", + "bitbucketCloud", + "azureDevOpsServices", + "gitHubEnterprise", + "bitbucketServer", + "gitLab", + "gitLabEnterpriseEdition", + "awsCodeCommit" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.GitSnapshot": { @@ -3323,18 +3594,34 @@ ] }, "jobs.JobDeploymentKind": { - "type": "string", - "description": "* `BUNDLE`: The job is managed by Databricks Asset Bundle.", - "enum": [ - "BUNDLE" + "oneOf": [ + { + "type": "string", + "description": "* `BUNDLE`: The job is managed by Databricks Asset Bundle.", + "enum": [ + "BUNDLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobEditMode": { - "type": "string", - "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", - "enum": [ - "UI_LOCKED", - "EDITABLE" + "oneOf": [ + { + "type": "string", + "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", + "enum": [ + "UI_LOCKED", + "EDITABLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobEmailNotifications": { @@ -3454,7 +3741,7 @@ "description": "Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job.\n\nEither `user_name` or `service_principal_name` should be specified. If not, an error is thrown.", "properties": { "service_principal_name": { - "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.", + "description": "The application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.", "$ref": "#/$defs/string" }, "user_name": { @@ -3502,29 +3789,53 @@ ] }, "jobs.JobSourceDirtyState": { - "type": "string", - "description": "Dirty state indicates the job is not fully synced with the job specification\nin the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", - "enum": [ - "NOT_SYNCED", - "DISCONNECTED" + "oneOf": [ + { + "type": "string", + "description": "Dirty state indicates the job is not fully synced with the job specification\nin the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", + "enum": [ + "NOT_SYNCED", + "DISCONNECTED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobsHealthMetric": { - "type": "string", - "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview.", - "enum": [ - "RUN_DURATION_SECONDS", - "STREAMING_BACKLOG_BYTES", - "STREAMING_BACKLOG_RECORDS", - "STREAMING_BACKLOG_SECONDS", - "STREAMING_BACKLOG_FILES" + "oneOf": [ + { + "type": "string", + "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview.", + "enum": [ + "RUN_DURATION_SECONDS", + "STREAMING_BACKLOG_BYTES", + "STREAMING_BACKLOG_RECORDS", + "STREAMING_BACKLOG_SECONDS", + "STREAMING_BACKLOG_FILES" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobsHealthOperator": { - "type": "string", - "description": "Specifies the operator used to compare the health metric value with the specified threshold.", - "enum": [ - "GREATER_THAN" + "oneOf": [ + { + "type": "string", + "description": "Specifies the operator used to compare the health metric value with the specified threshold.", + "enum": [ + "GREATER_THAN" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobsHealthRule": { @@ -3608,10 +3919,34 @@ ] }, "jobs.PauseStatus": { - "type": "string", - "enum": [ - "UNPAUSED", - "PAUSED" + "oneOf": [ + { + "type": "string", + "enum": [ + "UNPAUSED", + "PAUSED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "jobs.PerformanceTarget": { + "oneOf": [ + { + "type": "string", + "description": "PerformanceTarget defines how performant (lower latency) or cost efficient the execution of run on serverless compute should be.\nThe performance mode on the job or pipeline should map to a performance setting that is passed to Cluster Manager\n(see cluster-common PerformanceTarget).", + "enum": [ + "PERFORMANCE_OPTIMIZED", + "COST_OPTIMIZED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.PeriodicTriggerConfiguration": { @@ -3641,11 +3976,19 @@ ] }, "jobs.PeriodicTriggerConfigurationTimeUnit": { - "type": "string", - "enum": [ - "HOURS", - "DAYS", - "WEEKS" + "oneOf": [ + { + "type": "string", + "enum": [ + "HOURS", + "DAYS", + "WEEKS" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.PipelineParams": { @@ -3747,15 +4090,23 @@ ] }, "jobs.RunIf": { - "type": "string", - "description": "An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\nPossible values are:\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", - "enum": [ - "ALL_SUCCESS", - "ALL_DONE", - "NONE_FAILED", - "AT_LEAST_ONE_SUCCESS", - "ALL_FAILED", - "AT_LEAST_ONE_FAILED" + "oneOf": [ + { + "type": "string", + "description": "An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\nPossible values are:\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", + "enum": [ + "ALL_SUCCESS", + "ALL_DONE", + "NONE_FAILED", + "AT_LEAST_ONE_SUCCESS", + "ALL_FAILED", + "AT_LEAST_ONE_FAILED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.RunJobTask": { @@ -3815,11 +4166,19 @@ ] }, "jobs.Source": { - "type": "string", - "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\\\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", - "enum": [ - "WORKSPACE", - "GIT" + "oneOf": [ + { + "type": "string", + "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\\\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", + "enum": [ + "WORKSPACE", + "GIT" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.SparkJarTask": { @@ -3838,6 +4197,10 @@ "parameters": { "description": "Parameters passed to the main method.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", "$ref": "#/$defs/slice/string" + }, + "run_as_repl": { + "description": "Deprecated. A value of `false` is no longer supported.", + "$ref": "#/$defs/bool" } }, "additionalProperties": false @@ -4519,12 +4882,20 @@ ] }, "ml.ModelVersionStatus": { - "type": "string", - "description": "Current status of `model_version`", - "enum": [ - "PENDING_REGISTRATION", - "FAILED_REGISTRATION", - "READY" + "oneOf": [ + { + "type": "string", + "description": "Current status of `model_version`", + "enum": [ + "PENDING_REGISTRATION", + "FAILED_REGISTRATION", + "READY" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "ml.ModelVersionTag": { @@ -4569,11 +4940,40 @@ } ] }, + "pipelines.DayOfWeek": { + "oneOf": [ + { + "type": "string", + "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", + "enum": [ + "MONDAY", + "TUESDAY", + "WEDNESDAY", + "THURSDAY", + "FRIDAY", + "SATURDAY", + "SUNDAY" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "pipelines.DeploymentKind": { - "type": "string", - "description": "The deployment method that manages the pipeline:\n- BUNDLE: The pipeline is managed by a Databricks Asset Bundle.\n", - "enum": [ - "BUNDLE" + "oneOf": [ + { + "type": "string", + "description": "The deployment method that manages the pipeline:\n- BUNDLE: The pipeline is managed by a Databricks Asset Bundle.\n", + "enum": [ + "BUNDLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "pipelines.FileLibrary": { @@ -4879,11 +5279,19 @@ ] }, "pipelines.PipelineClusterAutoscaleMode": { - "type": "string", - "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n", - "enum": [ - "ENHANCED", - "LEGACY" + "oneOf": [ + { + "type": "string", + "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n", + "enum": [ + "ENHANCED", + "LEGACY" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "pipelines.PipelineDeployment": { @@ -5003,7 +5411,7 @@ "properties": { "days_of_week": { "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", - "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek" + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek" }, "start_hour": { "description": "An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day.\nContinuous pipeline restart is triggered only within a five-hour window starting at this hour.", @@ -5025,17 +5433,27 @@ } ] }, - "pipelines.RestartWindowDaysOfWeek": { - "type": "string", - "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", - "enum": [ - "MONDAY", - "TUESDAY", - "WEDNESDAY", - "THURSDAY", - "FRIDAY", - "SATURDAY", - "SUNDAY" + "pipelines.RunAs": { + "oneOf": [ + { + "type": "object", + "description": "Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline.\n\nOnly `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown.", + "properties": { + "service_principal_name": { + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.", + "$ref": "#/$defs/string" + }, + "user_name": { + "description": "The email of an active workspace user. Users can only set this field to their own email.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "pipelines.SchemaSpec": { @@ -5145,11 +5563,19 @@ ] }, "pipelines.TableSpecificConfigScdType": { - "type": "string", - "description": "The SCD type to use to ingest the table.", - "enum": [ - "SCD_TYPE_1", - "SCD_TYPE_2" + "oneOf": [ + { + "type": "string", + "description": "The SCD type to use to ingest the table.", + "enum": [ + "SCD_TYPE_1", + "SCD_TYPE_2" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.Ai21LabsConfig": { @@ -5158,11 +5584,11 @@ "type": "object", "properties": { "ai21labs_api_key": { - "description": "The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`.", + "description": "The Databricks secret key reference for an AI21 Labs API key. If you\nprefer to paste your API key directly, see `ai21labs_api_key_plaintext`.\nYou must provide an API key using one of the following fields:\n`ai21labs_api_key` or `ai21labs_api_key_plaintext`.", "$ref": "#/$defs/string" }, "ai21labs_api_key_plaintext": { - "description": "An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`.", + "description": "An AI21 Labs API key provided as a plaintext string. If you prefer to\nreference your key using Databricks Secrets, see `ai21labs_api_key`. You\nmust provide an API key using one of the following fields:\n`ai21labs_api_key` or `ai21labs_api_key_plaintext`.", "$ref": "#/$defs/string" } }, @@ -5184,7 +5610,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrails" }, "inference_table_config": { - "description": "Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality.", + "description": "Configuration for payload logging using inference tables.\nUse these tables to monitor and audit data being sent to and received from model APIs and to improve model quality.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AiGatewayInferenceTableConfig" }, "rate_limits": { @@ -5192,7 +5618,7 @@ "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimit" }, "usage_tracking_config": { - "description": "Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs.", + "description": "Configuration to enable usage tracking using system tables.\nThese tables allow you to monitor operational usage on endpoints and their associated costs.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AiGatewayUsageTrackingConfig" } }, @@ -5210,7 +5636,7 @@ "type": "object", "properties": { "invalid_keywords": { - "description": "List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.", + "description": "List of invalid keywords.\nAI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.", "$ref": "#/$defs/slice/string" }, "pii": { @@ -5222,7 +5648,7 @@ "$ref": "#/$defs/bool" }, "valid_topics": { - "description": "The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.", + "description": "The list of allowed topics.\nGiven a chat request, this guardrail flags the request if its topic is not in the allowed topics.", "$ref": "#/$defs/slice/string" } }, @@ -5240,14 +5666,11 @@ "type": "object", "properties": { "behavior": { - "description": "Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned.", + "description": "Configuration for input guardrail filters.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailPiiBehaviorBehavior" } }, - "additionalProperties": false, - "required": [ - "behavior" - ] + "additionalProperties": false }, { "type": "string", @@ -5256,11 +5679,18 @@ ] }, "serving.AiGatewayGuardrailPiiBehaviorBehavior": { - "type": "string", - "description": "Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned.", - "enum": [ - "NONE", - "BLOCK" + "oneOf": [ + { + "type": "string", + "enum": [ + "NONE", + "BLOCK" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AiGatewayGuardrails": { @@ -5291,7 +5721,7 @@ "type": "object", "properties": { "catalog_name": { - "description": "The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name.", + "description": "The name of the catalog in Unity Catalog. Required when enabling inference tables.\nNOTE: On update, you have to disable inference table first in order to change the catalog name.", "$ref": "#/$defs/string" }, "enabled": { @@ -5299,11 +5729,11 @@ "$ref": "#/$defs/bool" }, "schema_name": { - "description": "The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name.", + "description": "The name of the schema in Unity Catalog. Required when enabling inference tables.\nNOTE: On update, you have to disable inference table first in order to change the schema name.", "$ref": "#/$defs/string" }, "table_name_prefix": { - "description": "The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name.", + "description": "The prefix of the table in Unity Catalog.\nNOTE: On update, you have to disable inference table first in order to change the prefix name.", "$ref": "#/$defs/string" } }, @@ -5322,10 +5752,10 @@ "properties": { "calls": { "description": "Used to specify how many calls are allowed for a key within the renewal_period.", - "$ref": "#/$defs/int" + "$ref": "#/$defs/int64" }, "key": { - "description": "Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", + "description": "Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported,\nwith 'endpoint' being the default if not specified.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitKey" }, "renewal_period": { @@ -5346,18 +5776,32 @@ ] }, "serving.AiGatewayRateLimitKey": { - "type": "string", - "description": "Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", - "enum": [ - "user", - "endpoint" + "oneOf": [ + { + "type": "string", + "enum": [ + "user", + "endpoint" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AiGatewayRateLimitRenewalPeriod": { - "type": "string", - "description": "Renewal period field for a rate limit. Currently, only 'minute' is supported.", - "enum": [ - "minute" + "oneOf": [ + { + "type": "string", + "enum": [ + "minute" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AiGatewayUsageTrackingConfig": { @@ -5384,11 +5828,11 @@ "type": "object", "properties": { "aws_access_key_id": { - "description": "The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`.", + "description": "The Databricks secret key reference for an AWS access key ID with\npermissions to interact with Bedrock services. If you prefer to paste\nyour API key directly, see `aws_access_key_id_plaintext`. You must provide an API\nkey using one of the following fields: `aws_access_key_id` or\n`aws_access_key_id_plaintext`.", "$ref": "#/$defs/string" }, "aws_access_key_id_plaintext": { - "description": "An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`.", + "description": "An AWS access key ID with permissions to interact with Bedrock services\nprovided as a plaintext string. If you prefer to reference your key using\nDatabricks Secrets, see `aws_access_key_id`. You must provide an API key\nusing one of the following fields: `aws_access_key_id` or\n`aws_access_key_id_plaintext`.", "$ref": "#/$defs/string" }, "aws_region": { @@ -5396,15 +5840,15 @@ "$ref": "#/$defs/string" }, "aws_secret_access_key": { - "description": "The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`.", + "description": "The Databricks secret key reference for an AWS secret access key paired\nwith the access key ID, with permissions to interact with Bedrock\nservices. If you prefer to paste your API key directly, see\n`aws_secret_access_key_plaintext`. You must provide an API key using one\nof the following fields: `aws_secret_access_key` or\n`aws_secret_access_key_plaintext`.", "$ref": "#/$defs/string" }, "aws_secret_access_key_plaintext": { - "description": "An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`.", + "description": "An AWS secret access key paired with the access key ID, with permissions\nto interact with Bedrock services provided as a plaintext string. If you\nprefer to reference your key using Databricks Secrets, see\n`aws_secret_access_key`. You must provide an API key using one of the\nfollowing fields: `aws_secret_access_key` or\n`aws_secret_access_key_plaintext`.", "$ref": "#/$defs/string" }, "bedrock_provider": { - "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.", + "description": "The underlying provider in Amazon Bedrock. Supported values (case\ninsensitive) include: Anthropic, Cohere, AI21Labs, Amazon.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfigBedrockProvider" } }, @@ -5421,13 +5865,20 @@ ] }, "serving.AmazonBedrockConfigBedrockProvider": { - "type": "string", - "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.", - "enum": [ - "anthropic", - "cohere", - "ai21labs", - "amazon" + "oneOf": [ + { + "type": "string", + "enum": [ + "anthropic", + "cohere", + "ai21labs", + "amazon" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AnthropicConfig": { @@ -5436,11 +5887,11 @@ "type": "object", "properties": { "anthropic_api_key": { - "description": "The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`.", + "description": "The Databricks secret key reference for an Anthropic API key. If you\nprefer to paste your API key directly, see `anthropic_api_key_plaintext`.\nYou must provide an API key using one of the following fields:\n`anthropic_api_key` or `anthropic_api_key_plaintext`.", "$ref": "#/$defs/string" }, "anthropic_api_key_plaintext": { - "description": "The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`.", + "description": "The Anthropic API key provided as a plaintext string. If you prefer to\nreference your key using Databricks Secrets, see `anthropic_api_key`. You\nmust provide an API key using one of the following fields:\n`anthropic_api_key` or `anthropic_api_key_plaintext`.", "$ref": "#/$defs/string" } }, @@ -5488,15 +5939,15 @@ "type": "object", "properties": { "cohere_api_base": { - "description": "This is an optional field to provide a customized base URL for the Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n", + "description": "This is an optional field to provide a customized base URL for the Cohere\nAPI. If left unspecified, the standard Cohere base URL is used.", "$ref": "#/$defs/string" }, "cohere_api_key": { - "description": "The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`.", + "description": "The Databricks secret key reference for a Cohere API key. If you prefer\nto paste your API key directly, see `cohere_api_key_plaintext`. You must\nprovide an API key using one of the following fields: `cohere_api_key` or\n`cohere_api_key_plaintext`.", "$ref": "#/$defs/string" }, "cohere_api_key_plaintext": { - "description": "The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`.", + "description": "The Cohere API key provided as a plaintext string. If you prefer to\nreference your key using Databricks Secrets, see `cohere_api_key`. You\nmust provide an API key using one of the following fields:\n`cohere_api_key` or `cohere_api_key_plaintext`.", "$ref": "#/$defs/string" } }, @@ -5514,15 +5965,15 @@ "type": "object", "properties": { "databricks_api_token": { - "description": "The Databricks secret key reference for a Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model.\nIf you prefer to paste your API key directly, see `databricks_api_token_plaintext`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n", + "description": "The Databricks secret key reference for a Databricks API token that\ncorresponds to a user or service principal with Can Query access to the\nmodel serving endpoint pointed to by this external model. If you prefer\nto paste your API key directly, see `databricks_api_token_plaintext`. You\nmust provide an API key using one of the following fields:\n`databricks_api_token` or `databricks_api_token_plaintext`.", "$ref": "#/$defs/string" }, "databricks_api_token_plaintext": { - "description": "The Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `databricks_api_token`.\nYou must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.\n", + "description": "The Databricks API token that corresponds to a user or service principal\nwith Can Query access to the model serving endpoint pointed to by this\nexternal model provided as a plaintext string. If you prefer to reference\nyour key using Databricks Secrets, see `databricks_api_token`. You must\nprovide an API key using one of the following fields:\n`databricks_api_token` or `databricks_api_token_plaintext`.", "$ref": "#/$defs/string" }, "databricks_workspace_url": { - "description": "The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.\n", + "description": "The URL of the Databricks workspace containing the model serving endpoint\npointed to by this external model.", "$ref": "#/$defs/string" } }, @@ -5543,19 +5994,19 @@ "type": "object", "properties": { "auto_capture_config": { - "description": "Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.", + "description": "Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.\nNote: this field is deprecated for creating new provisioned throughput endpoints,\nor updating existing provisioned throughput endpoints that never have inference table configured;\nin these cases please use AI Gateway to manage inference tables.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.AutoCaptureConfigInput" }, "served_entities": { - "description": "A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities.", + "description": "The list of served entities under the serving endpoint config.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput" }, "served_models": { - "description": "(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models.", + "description": "(Deprecated, use served_entities instead) The list of served models under the serving endpoint config.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput" }, "traffic_config": { - "description": "The traffic config defining how invocations to the serving endpoint should be routed.", + "description": "The traffic configuration associated with the serving endpoint config.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.TrafficConfig" } }, @@ -5634,7 +6085,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.PaLmConfig" }, "provider": { - "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n", + "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', 'palm', and 'custom'.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ExternalModelProvider" }, "task": { @@ -5656,17 +6107,24 @@ ] }, "serving.ExternalModelProvider": { - "type": "string", - "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n", - "enum": [ - "ai21labs", - "anthropic", - "amazon-bedrock", - "cohere", - "databricks-model-serving", - "google-cloud-vertex-ai", - "openai", - "palm" + "oneOf": [ + { + "type": "string", + "enum": [ + "ai21labs", + "anthropic", + "amazon-bedrock", + "cohere", + "databricks-model-serving", + "google-cloud-vertex-ai", + "openai", + "palm" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.GoogleCloudVertexAiConfig": { @@ -5675,23 +6133,27 @@ "type": "object", "properties": { "private_key": { - "description": "The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`", + "description": "The Databricks secret key reference for a private key for the service\naccount which has access to the Google Cloud Vertex AI Service. See [Best\npractices for managing service account keys]. If you prefer to paste your\nAPI key directly, see `private_key_plaintext`. You must provide an API\nkey using one of the following fields: `private_key` or\n`private_key_plaintext`\n\n[Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys", "$ref": "#/$defs/string" }, "private_key_plaintext": { - "description": "The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`.", + "description": "The private key for the service account which has access to the Google\nCloud Vertex AI Service provided as a plaintext secret. See [Best\npractices for managing service account keys]. If you prefer to reference\nyour key using Databricks Secrets, see `private_key`. You must provide an\nAPI key using one of the following fields: `private_key` or\n`private_key_plaintext`.\n\n[Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys", "$ref": "#/$defs/string" }, "project_id": { - "description": "This is the Google Cloud project id that the service account is associated with.", + "description": "This is the Google Cloud project id that the service account is\nassociated with.", "$ref": "#/$defs/string" }, "region": { - "description": "This is the region for the Google Cloud Vertex AI Service. See [supported regions](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. Some models are only available in specific regions.", + "description": "This is the region for the Google Cloud Vertex AI Service. See [supported\nregions] for more details. Some models are only available in specific\nregions.\n\n[supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations", "$ref": "#/$defs/string" } }, - "additionalProperties": false + "additionalProperties": false, + "required": [ + "project_id", + "region" + ] }, { "type": "string", @@ -5703,49 +6165,50 @@ "oneOf": [ { "type": "object", + "description": "Configs needed to create an OpenAI model route.", "properties": { "microsoft_entra_client_id": { - "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.\n", + "description": "This field is only required for Azure AD OpenAI and is the Microsoft\nEntra Client ID.", "$ref": "#/$defs/string" }, "microsoft_entra_client_secret": { - "description": "The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.\nIf you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n", + "description": "The Databricks secret key reference for a client secret used for\nMicrosoft Entra ID authentication. If you prefer to paste your client\nsecret directly, see `microsoft_entra_client_secret_plaintext`. You must\nprovide an API key using one of the following fields:\n`microsoft_entra_client_secret` or\n`microsoft_entra_client_secret_plaintext`.", "$ref": "#/$defs/string" }, "microsoft_entra_client_secret_plaintext": { - "description": "The client secret used for Microsoft Entra ID authentication provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n", + "description": "The client secret used for Microsoft Entra ID authentication provided as\na plaintext string. If you prefer to reference your key using Databricks\nSecrets, see `microsoft_entra_client_secret`. You must provide an API key\nusing one of the following fields: `microsoft_entra_client_secret` or\n`microsoft_entra_client_secret_plaintext`.", "$ref": "#/$defs/string" }, "microsoft_entra_tenant_id": { - "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.\n", + "description": "This field is only required for Azure AD OpenAI and is the Microsoft\nEntra Tenant ID.", "$ref": "#/$defs/string" }, "openai_api_base": { - "description": "This is a field to provide a customized base URl for the OpenAI API.\nFor Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service\nprovided by Azure.\nFor other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used.\n", + "description": "This is a field to provide a customized base URl for the OpenAI API. For\nAzure OpenAI, this field is required, and is the base URL for the Azure\nOpenAI API service provided by Azure. For other OpenAI API types, this\nfield is optional, and if left unspecified, the standard OpenAI base URL\nis used.", "$ref": "#/$defs/string" }, "openai_api_key": { - "description": "The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`.", + "description": "The Databricks secret key reference for an OpenAI API key using the\nOpenAI or Azure service. If you prefer to paste your API key directly,\nsee `openai_api_key_plaintext`. You must provide an API key using one of\nthe following fields: `openai_api_key` or `openai_api_key_plaintext`.", "$ref": "#/$defs/string" }, "openai_api_key_plaintext": { - "description": "The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`.", + "description": "The OpenAI API key using the OpenAI or Azure service provided as a\nplaintext string. If you prefer to reference your key using Databricks\nSecrets, see `openai_api_key`. You must provide an API key using one of\nthe following fields: `openai_api_key` or `openai_api_key_plaintext`.", "$ref": "#/$defs/string" }, "openai_api_type": { - "description": "This is an optional field to specify the type of OpenAI API to use.\nFor Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security\naccess validation protocol. For access token validation, use azure. For authentication using Azure Active\nDirectory (Azure AD) use, azuread.\n", + "description": "This is an optional field to specify the type of OpenAI API to use. For\nAzure OpenAI, this field is required, and adjust this parameter to\nrepresent the preferred security access validation protocol. For access\ntoken validation, use azure. For authentication using Azure Active\nDirectory (Azure AD) use, azuread.", "$ref": "#/$defs/string" }, "openai_api_version": { - "description": "This is an optional field to specify the OpenAI API version.\nFor Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to\nutilize, specified by a date.\n", + "description": "This is an optional field to specify the OpenAI API version. For Azure\nOpenAI, this field is required, and is the version of the Azure OpenAI\nservice to utilize, specified by a date.", "$ref": "#/$defs/string" }, "openai_deployment_name": { - "description": "This field is only required for Azure OpenAI and is the name of the deployment resource for the\nAzure OpenAI service.\n", + "description": "This field is only required for Azure OpenAI and is the name of the\ndeployment resource for the Azure OpenAI service.", "$ref": "#/$defs/string" }, "openai_organization": { - "description": "This is an optional field to specify the organization in OpenAI or Azure OpenAI.\n", + "description": "This is an optional field to specify the organization in OpenAI or Azure\nOpenAI.", "$ref": "#/$defs/string" } }, @@ -5763,11 +6226,11 @@ "type": "object", "properties": { "palm_api_key": { - "description": "The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`.", + "description": "The Databricks secret key reference for a PaLM API key. If you prefer to\npaste your API key directly, see `palm_api_key_plaintext`. You must\nprovide an API key using one of the following fields: `palm_api_key` or\n`palm_api_key_plaintext`.", "$ref": "#/$defs/string" }, "palm_api_key_plaintext": { - "description": "The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`.", + "description": "The PaLM API key provided as a plaintext string. If you prefer to\nreference your key using Databricks Secrets, see `palm_api_key`. You must\nprovide an API key using one of the following fields: `palm_api_key` or\n`palm_api_key_plaintext`.", "$ref": "#/$defs/string" } }, @@ -5786,7 +6249,7 @@ "properties": { "calls": { "description": "Used to specify how many calls are allowed for a key within the renewal_period.", - "$ref": "#/$defs/int" + "$ref": "#/$defs/int64" }, "key": { "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", @@ -5810,18 +6273,32 @@ ] }, "serving.RateLimitKey": { - "type": "string", - "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", - "enum": [ - "user", - "endpoint" + "oneOf": [ + { + "type": "string", + "enum": [ + "user", + "endpoint" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.RateLimitRenewalPeriod": { - "type": "string", - "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.", - "enum": [ - "minute" + "oneOf": [ + { + "type": "string", + "enum": [ + "minute" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.Route": { @@ -5856,19 +6333,18 @@ "type": "object", "properties": { "entity_name": { - "description": "The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC),\nor a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of\n__catalog_name__.__schema_name__.__model_name__.\n", + "description": "The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of **catalog_name.schema_name.model_name**.", "$ref": "#/$defs/string" }, "entity_version": { - "description": "The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.", "$ref": "#/$defs/string" }, "environment_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity.\nNote: this is an experimental feature and subject to change. \nExample entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", + "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", "$ref": "#/$defs/map/string" }, "external_model": { - "description": "The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled)\ncan be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model,\nit cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.\nThe task type of all external models within an endpoint must be the same.\n", + "description": "The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ExternalModel" }, "instance_profile_arn": { @@ -5884,7 +6360,7 @@ "$ref": "#/$defs/int" }, "name": { - "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores.\nIf not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other\nentities, it defaults to \u003centity-name\u003e-\u003centity-version\u003e.\n", + "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version.", "$ref": "#/$defs/string" }, "scale_to_zero_enabled": { @@ -5892,12 +6368,12 @@ "$ref": "#/$defs/bool" }, "workload_size": { - "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.\n", + "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.", "$ref": "#/$defs/string" }, "workload_type": { - "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", - "$ref": "#/$defs/string" + "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is \"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServingModelWorkloadType" } }, "additionalProperties": false @@ -5914,11 +6390,11 @@ "type": "object", "properties": { "environment_vars": { - "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this model.\nNote: this is an experimental feature and subject to change. \nExample model environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", + "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", "$ref": "#/$defs/map/string" }, "instance_profile_arn": { - "description": "ARN of the instance profile that the served model will use to access AWS resources.", + "description": "ARN of the instance profile that the served entity uses to access AWS resources.", "$ref": "#/$defs/string" }, "max_provisioned_throughput": { @@ -5930,27 +6406,25 @@ "$ref": "#/$defs/int" }, "model_name": { - "description": "The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model,\nin the form of __catalog_name__.__schema_name__.__model_name__.\n", "$ref": "#/$defs/string" }, "model_version": { - "description": "The version of the model in Databricks Model Registry or Unity Catalog to be served.", "$ref": "#/$defs/string" }, "name": { - "description": "The name of a served model. It must be unique across an endpoint. If not specified, this field will default to \u003cmodel-name\u003e-\u003cmodel-version\u003e.\nA served model name can consist of alphanumeric characters, dashes, and underscores.\n", + "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version.", "$ref": "#/$defs/string" }, "scale_to_zero_enabled": { - "description": "Whether the compute resources for the served model should scale down to zero.", + "description": "Whether the compute resources for the served entity should scale down to zero.", "$ref": "#/$defs/bool" }, "workload_size": { - "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n", + "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadSize" }, "workload_type": { - "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", + "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is \"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).", "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadType" } }, @@ -5968,23 +6442,55 @@ ] }, "serving.ServedModelInputWorkloadSize": { - "type": "string", - "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n", - "enum": [ - "Small", - "Medium", - "Large" + "oneOf": [ + { + "type": "string", + "enum": [ + "Small", + "Medium", + "Large" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.ServedModelInputWorkloadType": { - "type": "string", - "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", - "enum": [ - "CPU", - "GPU_SMALL", - "GPU_MEDIUM", - "GPU_LARGE", - "MULTIGPU_MEDIUM" + "oneOf": [ + { + "type": "string", + "enum": [ + "CPU", + "GPU_MEDIUM", + "GPU_SMALL", + "GPU_LARGE", + "MULTIGPU_MEDIUM" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "serving.ServingModelWorkloadType": { + "oneOf": [ + { + "type": "string", + "enum": [ + "CPU", + "GPU_MEDIUM", + "GPU_SMALL", + "GPU_LARGE", + "MULTIGPU_MEDIUM" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.TrafficConfig": { @@ -6619,6 +7125,20 @@ } ] }, + "pipelines.DayOfWeek": { + "oneOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "pipelines.IngestionConfig": { "oneOf": [ { @@ -6675,20 +7195,6 @@ } ] }, - "pipelines.RestartWindowDaysOfWeek": { - "oneOf": [ - { - "type": "array", - "items": { - "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek" - } - }, - { - "type": "string", - "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" - } - ] - }, "serving.AiGatewayRateLimit": { "oneOf": [ { @@ -6800,12 +7306,13 @@ "properties": { "artifacts": { "description": "Defines the attributes to build an artifact", - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact" + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Artifact", + "markdownDescription": "Defines the attributes to build artifacts, where each key is the name of the artifact, and the value is a Map that defines the artifact build settings. For information about the `artifacts` mapping, see [artifacts](https://docs.databricks.com/dev-tools/bundles/settings.html#artifacts).\n\nArtifact settings defined in the top level of the bundle configuration can be overridden in the `targets` mapping. See [link](https://docs.databricks.com/dev-tools/bundles/artifact-overrides.html)." }, "bundle": { - "description": "The attributes of the bundle.", + "description": "The bundle attributes when deploying to this target.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Bundle", - "markdownDescription": "The attributes of the bundle. See [bundle](https://docs.databricks.com/dev-tools/bundles/settings.html#bundle)" + "markdownDescription": "The bundle attributes when deploying to this target," }, "experimental": { "description": "Defines attributes for experimental features.", @@ -6814,12 +7321,12 @@ "include": { "description": "Specifies a list of path globs that contain configuration files to include within the bundle.", "$ref": "#/$defs/slice/string", - "markdownDescription": "Specifies a list of path globs that contain configuration files to include within the bundle. See [include](https://docs.databricks.com/dev-tools/bundles/settings.html#include)" + "markdownDescription": "Specifies a list of path globs that contain configuration files to include within the bundle. See [include](https://docs.databricks.com/dev-tools/bundles/settings.html#include)." }, "permissions": { - "description": "Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle", + "description": "Defines a permission for a specific entity.", "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission", - "markdownDescription": "Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [permissions](https://docs.databricks.com/dev-tools/bundles/settings.html#permissions) and [link](https://docs.databricks.com/dev-tools/bundles/permissions.html)." + "markdownDescription": "A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity.\n\nSee [permissions](https://docs.databricks.com/dev-tools/bundles/settings.html#permissions) and [link](https://docs.databricks.com/dev-tools/bundles/permissions.html)." }, "presets": { "description": "Defines bundle deployment presets.", @@ -6827,22 +7334,24 @@ "markdownDescription": "Defines bundle deployment presets. See [presets](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html#presets)." }, "resources": { - "description": "Specifies information about the Databricks resources used by the bundle", + "description": "A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Resources", - "markdownDescription": "Specifies information about the Databricks resources used by the bundle. See [link](https://docs.databricks.com/dev-tools/bundles/resources.html)." + "markdownDescription": "A Map that defines the resources for the bundle, where each key is the name of the resource, and the value is a Map that defines the resource. For more information about \u003cDABS\u003e supported resources, and resource definition reference, see [link](https://docs.databricks.com/dev-tools/bundles/resources.html).\n\n```yaml\nresources:\n \u003cresource-type\u003e:\n \u003cresource-name\u003e:\n \u003cresource-field-name\u003e: \u003cresource-field-value\u003e\n```" }, "run_as": { - "description": "The identity to use to run the bundle.", - "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs" + "description": "The identity to use when running \u003cDABS\u003e workflows.", + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs", + "markdownDescription": "The identity to use when running \u003cDABS\u003e workflows. See [link](https://docs.databricks.com/dev-tools/bundles/run-as.html)." }, "sync": { "description": "The files and file paths to include or exclude in the bundle.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Sync", - "markdownDescription": "The files and file paths to include or exclude in the bundle. See [link](https://docs.databricks.com/dev-tools/bundles/)" + "markdownDescription": "The files and file paths to include or exclude in the bundle. See [sync](https://docs.databricks.com/dev-tools/bundles/settings.html#sync)." }, "targets": { "description": "Defines deployment targets for the bundle.", - "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Target" + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Target", + "markdownDescription": "Defines deployment targets for the bundle. See [targets](https://docs.databricks.com/dev-tools/bundles/settings.html#targets)" }, "variables": { "description": "A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable.", @@ -6850,8 +7359,9 @@ }, "workspace": { "description": "Defines the Databricks workspace for the bundle.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace", + "markdownDescription": "Defines the Databricks workspace for the bundle. See [workspace](https://docs.databricks.com/dev-tools/bundles/settings.html#workspace)." } }, - "additionalProperties": false + "additionalProperties": {} } \ No newline at end of file diff --git a/bundle/scripts/scripts_test.go b/bundle/scripts/scripts_test.go deleted file mode 100644 index 0c92bc2c3..000000000 --- a/bundle/scripts/scripts_test.go +++ /dev/null @@ -1,51 +0,0 @@ -package scripts - -import ( - "bufio" - "context" - "strings" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/libs/exec" - "github.com/stretchr/testify/require" -) - -func TestExecutesHook(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Experimental: &config.Experimental{ - Scripts: map[config.ScriptHook]config.Command{ - config.ScriptPreBuild: "echo 'Hello'", - }, - }, - }, - } - - executor, err := exec.NewCommandExecutor(b.BundleRootPath) - require.NoError(t, err) - _, out, err := executeHook(context.Background(), executor, b, config.ScriptPreBuild) - require.NoError(t, err) - - reader := bufio.NewReader(out) - line, err := reader.ReadString('\n') - - require.NoError(t, err) - require.Equal(t, "Hello", strings.TrimSpace(line)) -} - -func TestExecuteMutator(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Experimental: &config.Experimental{ - Scripts: map[config.ScriptHook]config.Command{ - config.ScriptPreBuild: "echo 'Hello'", - }, - }, - }, - } - - diags := bundle.Apply(context.Background(), b, Execute(config.ScriptPreInit)) - require.NoError(t, diags.Error()) -} diff --git a/bundle/seq.go b/bundle/seq.go deleted file mode 100644 index c1260a3f0..000000000 --- a/bundle/seq.go +++ /dev/null @@ -1,30 +0,0 @@ -package bundle - -import ( - "context" - - "github.com/databricks/cli/libs/diag" -) - -type seqMutator struct { - mutators []Mutator -} - -func (s *seqMutator) Name() string { - return "seq" -} - -func (s *seqMutator) Apply(ctx context.Context, b *Bundle) diag.Diagnostics { - var diags diag.Diagnostics - for _, m := range s.mutators { - diags = diags.Extend(Apply(ctx, b, m)) - if diags.HasError() { - break - } - } - return diags -} - -func Seq(ms ...Mutator) Mutator { - return &seqMutator{mutators: ms} -} diff --git a/bundle/seq_test.go b/bundle/seq_test.go deleted file mode 100644 index 74f975ed8..000000000 --- a/bundle/seq_test.go +++ /dev/null @@ -1,91 +0,0 @@ -package bundle - -import ( - "context" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestSeqMutator(t *testing.T) { - m1 := &testMutator{} - m2 := &testMutator{} - m3 := &testMutator{} - seqMutator := Seq(m1, m2, m3) - - b := &Bundle{} - diags := Apply(context.Background(), b, seqMutator) - assert.NoError(t, diags.Error()) - - assert.Equal(t, 1, m1.applyCalled) - assert.Equal(t, 1, m2.applyCalled) - assert.Equal(t, 1, m3.applyCalled) -} - -func TestSeqWithDeferredMutator(t *testing.T) { - m1 := &testMutator{} - m2 := &testMutator{} - m3 := &testMutator{} - m4 := &testMutator{} - seqMutator := Seq(m1, Defer(m2, m3), m4) - - b := &Bundle{} - diags := Apply(context.Background(), b, seqMutator) - assert.NoError(t, diags.Error()) - - assert.Equal(t, 1, m1.applyCalled) - assert.Equal(t, 1, m2.applyCalled) - assert.Equal(t, 1, m3.applyCalled) - assert.Equal(t, 1, m4.applyCalled) -} - -func TestSeqWithErrorAndDeferredMutator(t *testing.T) { - errorMut := &mutatorWithError{errorMsg: "error msg"} - m1 := &testMutator{} - m2 := &testMutator{} - m3 := &testMutator{} - seqMutator := Seq(errorMut, Defer(m1, m2), m3) - - b := &Bundle{} - diags := Apply(context.Background(), b, seqMutator) - assert.Error(t, diags.Error()) - - assert.Equal(t, 1, errorMut.applyCalled) - assert.Equal(t, 0, m1.applyCalled) - assert.Equal(t, 0, m2.applyCalled) - assert.Equal(t, 0, m3.applyCalled) -} - -func TestSeqWithErrorInsideDeferredMutator(t *testing.T) { - errorMut := &mutatorWithError{errorMsg: "error msg"} - m1 := &testMutator{} - m2 := &testMutator{} - m3 := &testMutator{} - seqMutator := Seq(m1, Defer(errorMut, m2), m3) - - b := &Bundle{} - diags := Apply(context.Background(), b, seqMutator) - assert.Error(t, diags.Error()) - - assert.Equal(t, 1, m1.applyCalled) - assert.Equal(t, 1, errorMut.applyCalled) - assert.Equal(t, 1, m2.applyCalled) - assert.Equal(t, 0, m3.applyCalled) -} - -func TestSeqWithErrorInsideFinallyStage(t *testing.T) { - errorMut := &mutatorWithError{errorMsg: "error msg"} - m1 := &testMutator{} - m2 := &testMutator{} - m3 := &testMutator{} - seqMutator := Seq(m1, Defer(m2, errorMut), m3) - - b := &Bundle{} - diags := Apply(context.Background(), b, seqMutator) - assert.Error(t, diags.Error()) - - assert.Equal(t, 1, m1.applyCalled) - assert.Equal(t, 1, m2.applyCalled) - assert.Equal(t, 1, errorMut.applyCalled) - assert.Equal(t, 0, m3.applyCalled) -} diff --git a/bundle/tests/apps_test.go b/bundle/tests/apps_test.go index 7fee60d14..c3a0da2ca 100644 --- a/bundle/tests/apps_test.go +++ b/bundle/tests/apps_test.go @@ -13,11 +13,10 @@ func TestApps(t *testing.T) { b := load(t, "./apps") assert.Equal(t, "apps", b.Config.Bundle.Name) - diags := bundle.Apply(context.Background(), b, - bundle.Seq( - mutator.SetVariables(), - mutator.ResolveVariableReferences("variables"), - )) + diags := bundle.ApplySeq(context.Background(), b, + mutator.SetVariables(), + mutator.ResolveVariableReferences("variables"), + ) assert.Empty(t, diags) app := b.Config.Resources.Apps["my_app"] @@ -37,11 +36,10 @@ func TestAppsOverride(t *testing.T) { b := loadTarget(t, "./apps", "development") assert.Equal(t, "apps", b.Config.Bundle.Name) - diags := bundle.Apply(context.Background(), b, - bundle.Seq( - mutator.SetVariables(), - mutator.ResolveVariableReferences("variables"), - )) + diags := bundle.ApplySeq(context.Background(), b, + mutator.SetVariables(), + mutator.ResolveVariableReferences("variables"), + ) assert.Empty(t, diags) app := b.Config.Resources.Apps["my_app"] assert.Equal(t, "my-app", app.Name) diff --git a/bundle/tests/environment_git_test.go b/bundle/tests/environment_git_test.go index 848b972b1..901d2867b 100644 --- a/bundle/tests/environment_git_test.go +++ b/bundle/tests/environment_git_test.go @@ -13,7 +13,6 @@ import ( func TestGitAutoLoadWithEnvironment(t *testing.T) { b := load(t, "./environments_autoload_git") bundle.Apply(context.Background(), b, mutator.LoadGitDetails()) - assert.True(t, b.Config.Bundle.Git.Inferred) validUrl := strings.Contains(b.Config.Bundle.Git.OriginURL, "/cli") || strings.Contains(b.Config.Bundle.Git.OriginURL, "/bricks") assert.True(t, validUrl, "Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL) } @@ -21,7 +20,6 @@ func TestGitAutoLoadWithEnvironment(t *testing.T) { func TestGitManuallySetBranchWithEnvironment(t *testing.T) { b := loadTarget(t, "./environments_autoload_git", "production") bundle.Apply(context.Background(), b, mutator.LoadGitDetails()) - assert.False(t, b.Config.Bundle.Git.Inferred) assert.Equal(t, "main", b.Config.Bundle.Git.Branch) validUrl := strings.Contains(b.Config.Bundle.Git.OriginURL, "/cli") || strings.Contains(b.Config.Bundle.Git.OriginURL, "/bricks") assert.True(t, validUrl, "Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL) diff --git a/bundle/tests/git_test.go b/bundle/tests/git_test.go index 41293e450..dd79e26a4 100644 --- a/bundle/tests/git_test.go +++ b/bundle/tests/git_test.go @@ -14,7 +14,6 @@ import ( func TestGitAutoLoad(t *testing.T) { b := load(t, "./autoload_git") bundle.Apply(context.Background(), b, mutator.LoadGitDetails()) - assert.True(t, b.Config.Bundle.Git.Inferred) validUrl := strings.Contains(b.Config.Bundle.Git.OriginURL, "/cli") || strings.Contains(b.Config.Bundle.Git.OriginURL, "/bricks") assert.True(t, validUrl, "Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL) } @@ -22,7 +21,6 @@ func TestGitAutoLoad(t *testing.T) { func TestGitManuallySetBranch(t *testing.T) { b := loadTarget(t, "./autoload_git", "production") bundle.Apply(context.Background(), b, mutator.LoadGitDetails()) - assert.False(t, b.Config.Bundle.Git.Inferred) assert.Equal(t, "main", b.Config.Bundle.Git.Branch) validUrl := strings.Contains(b.Config.Bundle.Git.OriginURL, "/cli") || strings.Contains(b.Config.Bundle.Git.OriginURL, "/bricks") assert.True(t, validUrl, "Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL) @@ -36,7 +34,6 @@ func TestGitBundleBranchValidation(t *testing.T) { b := load(t, "./git_branch_validation") bundle.Apply(context.Background(), b, mutator.LoadGitDetails()) - assert.False(t, b.Config.Bundle.Git.Inferred) assert.Equal(t, "feature-a", b.Config.Bundle.Git.Branch) assert.Equal(t, "feature-b", b.Config.Bundle.Git.ActualBranch) diff --git a/bundle/tests/include_test.go b/bundle/tests/include_test.go index 15f8fcec1..07ec4a775 100644 --- a/bundle/tests/include_test.go +++ b/bundle/tests/include_test.go @@ -17,7 +17,7 @@ func TestIncludeInvalid(t *testing.T) { ctx := context.Background() b, err := bundle.Load(ctx, "./include_invalid") require.NoError(t, err) - diags := bundle.Apply(ctx, b, phases.Load()) + diags := phases.Load(ctx, b) require.Error(t, diags.Error()) assert.ErrorContains(t, diags.Error(), "notexists.yml defined in 'include' section does not match any files") } diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index 9b246b7cc..6748e6409 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -20,7 +20,7 @@ func load(t *testing.T, path string) *bundle.Bundle { ctx := context.Background() b, err := bundle.Load(ctx, path) require.NoError(t, err) - diags := bundle.Apply(ctx, b, phases.Load()) + diags := phases.Load(ctx, b) require.NoError(t, diags.Error()) return b } @@ -38,8 +38,9 @@ func loadTargetWithDiags(path, env string) (*bundle.Bundle, diag.Diagnostics) { return nil, diag.FromErr(err) } - diags := bundle.Apply(ctx, b, bundle.Seq( - phases.LoadNamedTarget(env), + diags := phases.LoadNamedTarget(ctx, b, env) + + diags = diags.Extend(bundle.ApplySeq(ctx, b, mutator.RewriteSyncPaths(), mutator.SyncDefaultPath(), mutator.SyncInferRoot(), @@ -69,10 +70,8 @@ func initializeTarget(t *testing.T, path, env string) (*bundle.Bundle, diag.Diag configureMock(t, b) ctx := dbr.MockRuntime(context.Background(), false) - diags := bundle.Apply(ctx, b, bundle.Seq( - mutator.SelectTarget(env), - phases.Initialize(), - )) + diags := bundle.Apply(ctx, b, mutator.SelectTarget(env)) + diags = diags.Extend(phases.Initialize(ctx, b)) return b, diags } diff --git a/bundle/tests/path_translation_test.go b/bundle/tests/path_translation_test.go deleted file mode 100644 index 05702d2a2..000000000 --- a/bundle/tests/path_translation_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package config_tests - -import ( - "context" - "path/filepath" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestPathTranslationFallback(t *testing.T) { - b := loadTarget(t, "./path_translation/fallback", "development") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - require.NoError(t, diags.Error()) - - j := b.Config.Resources.Jobs["my_job"] - assert.Len(t, j.Tasks, 6) - - assert.Equal(t, "notebook_example", filepath.ToSlash(j.Tasks[0].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[0].NotebookTask.NotebookPath)) - - assert.Equal(t, "spark_python_example", filepath.ToSlash(j.Tasks[1].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[1].SparkPythonTask.PythonFile)) - - assert.Equal(t, "dbt_example", filepath.ToSlash(j.Tasks[2].TaskKey)) - assert.Equal(t, "src/dbt_project", filepath.ToSlash(j.Tasks[2].DbtTask.ProjectDirectory)) - - assert.Equal(t, "sql_example", filepath.ToSlash(j.Tasks[3].TaskKey)) - assert.Equal(t, "src/sql.sql", filepath.ToSlash(j.Tasks[3].SqlTask.File.Path)) - - assert.Equal(t, "python_wheel_example", filepath.ToSlash(j.Tasks[4].TaskKey)) - assert.Equal(t, "dist/wheel1.whl", filepath.ToSlash(j.Tasks[4].Libraries[0].Whl)) - assert.Equal(t, "dist/wheel2.whl", filepath.ToSlash(j.Tasks[4].Libraries[1].Whl)) - - assert.Equal(t, "spark_jar_example", filepath.ToSlash(j.Tasks[5].TaskKey)) - assert.Equal(t, "target/jar1.jar", filepath.ToSlash(j.Tasks[5].Libraries[0].Jar)) - assert.Equal(t, "target/jar2.jar", filepath.ToSlash(j.Tasks[5].Libraries[1].Jar)) - - p := b.Config.Resources.Pipelines["my_pipeline"] - assert.Len(t, p.Libraries, 4) - - assert.Equal(t, "src/file1.py", filepath.ToSlash(p.Libraries[0].File.Path)) - assert.Equal(t, "src/notebook1", filepath.ToSlash(p.Libraries[1].Notebook.Path)) - assert.Equal(t, "src/file2.py", filepath.ToSlash(p.Libraries[2].File.Path)) - assert.Equal(t, "src/notebook2", filepath.ToSlash(p.Libraries[3].Notebook.Path)) -} - -func TestPathTranslationFallbackError(t *testing.T) { - b := loadTarget(t, "./path_translation/fallback", "error") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.ErrorContains(t, diags.Error(), `notebook this value is overridden not found`) -} - -func TestPathTranslationNominal(t *testing.T) { - b := loadTarget(t, "./path_translation/nominal", "development") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.NoError(t, diags.Error()) - - j := b.Config.Resources.Jobs["my_job"] - assert.Len(t, j.Tasks, 8) - - assert.Equal(t, "notebook_example", filepath.ToSlash(j.Tasks[0].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[0].NotebookTask.NotebookPath)) - - assert.Equal(t, "spark_python_example", filepath.ToSlash(j.Tasks[1].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[1].SparkPythonTask.PythonFile)) - - assert.Equal(t, "dbt_example", filepath.ToSlash(j.Tasks[2].TaskKey)) - assert.Equal(t, "src/dbt_project", filepath.ToSlash(j.Tasks[2].DbtTask.ProjectDirectory)) - - assert.Equal(t, "sql_example", filepath.ToSlash(j.Tasks[3].TaskKey)) - assert.Equal(t, "src/sql.sql", filepath.ToSlash(j.Tasks[3].SqlTask.File.Path)) - - assert.Equal(t, "python_wheel_example", filepath.ToSlash(j.Tasks[4].TaskKey)) - assert.Equal(t, "dist/wheel1.whl", filepath.ToSlash(j.Tasks[4].Libraries[0].Whl)) - assert.Equal(t, "dist/wheel2.whl", filepath.ToSlash(j.Tasks[4].Libraries[1].Whl)) - - assert.Equal(t, "spark_jar_example", filepath.ToSlash(j.Tasks[5].TaskKey)) - assert.Equal(t, "target/jar1.jar", filepath.ToSlash(j.Tasks[5].Libraries[0].Jar)) - assert.Equal(t, "target/jar2.jar", filepath.ToSlash(j.Tasks[5].Libraries[1].Jar)) - - assert.Equal(t, "for_each_notebook_example", filepath.ToSlash(j.Tasks[6].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[6].ForEachTask.Task.NotebookTask.NotebookPath)) - - assert.Equal(t, "for_each_spark_python_example", filepath.ToSlash(j.Tasks[7].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[7].ForEachTask.Task.SparkPythonTask.PythonFile)) - - p := b.Config.Resources.Pipelines["my_pipeline"] - assert.Len(t, p.Libraries, 4) - - assert.Equal(t, "src/file1.py", filepath.ToSlash(p.Libraries[0].File.Path)) - assert.Equal(t, "src/notebook1", filepath.ToSlash(p.Libraries[1].Notebook.Path)) - assert.Equal(t, "src/file2.py", filepath.ToSlash(p.Libraries[2].File.Path)) - assert.Equal(t, "src/notebook2", filepath.ToSlash(p.Libraries[3].Notebook.Path)) -} - -func TestPathTranslationNominalError(t *testing.T) { - b := loadTarget(t, "./path_translation/nominal", "error") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.ErrorContains(t, diags.Error(), `notebook this value is overridden not found`) -} diff --git a/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py b/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py b/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py b/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel_test.go b/bundle/tests/python_wheel_test.go index 06cb05270..22702ec44 100644 --- a/bundle/tests/python_wheel_test.go +++ b/bundle/tests/python_wheel_test.go @@ -18,7 +18,7 @@ func TestPythonWheelBuild(t *testing.T) { b := loadTarget(t, "./python_wheel/python_wheel", "default") ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Build()) + diags := phases.Build(ctx, b) require.NoError(t, diags.Error()) matches, err := filepath.Glob("./python_wheel/python_wheel/my_test_code/dist/my_test_code-*.whl") @@ -34,7 +34,7 @@ func TestPythonWheelBuildAutoDetect(t *testing.T) { b := loadTarget(t, "./python_wheel/python_wheel_no_artifact", "default") ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Build()) + diags := phases.Build(ctx, b) require.NoError(t, diags.Error()) matches, err := filepath.Glob("./python_wheel/python_wheel_no_artifact/dist/my_test_code-*.whl") @@ -50,7 +50,7 @@ func TestPythonWheelBuildAutoDetectWithNotebookTask(t *testing.T) { b := loadTarget(t, "./python_wheel/python_wheel_no_artifact_notebook", "default") ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Build()) + diags := phases.Build(ctx, b) require.NoError(t, diags.Error()) matches, err := filepath.Glob("./python_wheel/python_wheel_no_artifact_notebook/dist/my_test_code-*.whl") @@ -66,7 +66,7 @@ func TestPythonWheelWithDBFSLib(t *testing.T) { b := loadTarget(t, "./python_wheel/python_wheel_dbfs_lib", "default") ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Build()) + diags := phases.Build(ctx, b) require.NoError(t, diags.Error()) match := libraries.ExpandGlobReferences() @@ -78,7 +78,7 @@ func TestPythonWheelBuildNoBuildJustUpload(t *testing.T) { b := loadTarget(t, "./python_wheel/python_wheel_no_artifact_no_setup", "default") ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Build()) + diags := phases.Build(ctx, b) require.NoError(t, diags.Error()) mockFiler := mockfiler.NewMockFiler(t) @@ -90,10 +90,10 @@ func TestPythonWheelBuildNoBuildJustUpload(t *testing.T) { filer.CreateParentDirectories, ).Return(nil) - diags = bundle.Apply(ctx, b, bundle.Seq( + diags = bundle.ApplySeq(ctx, b, libraries.ExpandGlobReferences(), libraries.UploadWithClient(mockFiler), - )) + ) require.NoError(t, diags.Error()) require.Empty(t, diags) require.Equal(t, "/Workspace/foo/bar/.internal/my_test_code-0.0.1-py3-none-any.whl", b.Config.Resources.Jobs["test_job"].JobSettings.Tasks[0].Libraries[0].Whl) @@ -103,7 +103,7 @@ func TestPythonWheelBuildWithEnvironmentKey(t *testing.T) { b := loadTarget(t, "./python_wheel/environment_key", "default") ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Build()) + diags := phases.Build(ctx, b) require.NoError(t, diags.Error()) matches, err := filepath.Glob("./python_wheel/environment_key/my_test_code/dist/my_test_code-*.whl") @@ -119,7 +119,7 @@ func TestPythonWheelBuildMultiple(t *testing.T) { b := loadTarget(t, "./python_wheel/python_wheel_multiple", "default") ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Build()) + diags := phases.Build(ctx, b) require.NoError(t, diags.Error()) matches, err := filepath.Glob("./python_wheel/python_wheel_multiple/my_test_code/dist/my_test_code*.whl") @@ -135,7 +135,7 @@ func TestPythonWheelNoBuild(t *testing.T) { b := loadTarget(t, "./python_wheel/python_wheel_no_build", "default") ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Build()) + diags := phases.Build(ctx, b) require.NoError(t, diags.Error()) match := libraries.ExpandGlobReferences() diff --git a/bundle/tests/quality_monitor_test.go b/bundle/tests/quality_monitor_test.go deleted file mode 100644 index e95c7b7c1..000000000 --- a/bundle/tests/quality_monitor_test.go +++ /dev/null @@ -1,59 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/databricks-sdk-go/service/catalog" - "github.com/stretchr/testify/assert" -) - -func assertExpectedMonitor(t *testing.T, p *resources.QualityMonitor) { - assert.Equal(t, "timestamp", p.InferenceLog.TimestampCol) - assert.Equal(t, "prediction", p.InferenceLog.PredictionCol) - assert.Equal(t, "model_id", p.InferenceLog.ModelIdCol) - assert.Equal(t, catalog.MonitorInferenceLogProblemType("PROBLEM_TYPE_REGRESSION"), p.InferenceLog.ProblemType) -} - -func TestMonitorTableNames(t *testing.T) { - b := loadTarget(t, "./quality_monitor", "development") - assert.Len(t, b.Config.Resources.QualityMonitors, 1) - assert.Equal(t, config.Development, b.Config.Bundle.Mode) - - p := b.Config.Resources.QualityMonitors["my_monitor"] - assert.Equal(t, "main.test.dev", p.TableName) - assert.Equal(t, "/Shared/provider-test/databricks_monitoring/main.test.thing1", p.AssetsDir) - assert.Equal(t, "main.dev", p.OutputSchemaName) - - assertExpectedMonitor(t, p) -} - -func TestMonitorStaging(t *testing.T) { - b := loadTarget(t, "./quality_monitor", "staging") - assert.Len(t, b.Config.Resources.QualityMonitors, 1) - - p := b.Config.Resources.QualityMonitors["my_monitor"] - assert.Equal(t, "main.test.staging", p.TableName) - assert.Equal(t, "/Shared/provider-test/databricks_monitoring/main.test.thing1", p.AssetsDir) - assert.Equal(t, "main.staging", p.OutputSchemaName) - - assertExpectedMonitor(t, p) -} - -func TestMonitorProduction(t *testing.T) { - b := loadTarget(t, "./quality_monitor", "production") - assert.Len(t, b.Config.Resources.QualityMonitors, 1) - - p := b.Config.Resources.QualityMonitors["my_monitor"] - assert.Equal(t, "main.test.prod", p.TableName) - assert.Equal(t, "/Shared/provider-test/databricks_monitoring/main.test.thing1", p.AssetsDir) - assert.Equal(t, "main.prod", p.OutputSchemaName) - - inferenceLog := p.InferenceLog - assert.Equal(t, []string{"1 day", "1 hour"}, inferenceLog.Granularities) - assert.Equal(t, "timestamp_prod", p.InferenceLog.TimestampCol) - assert.Equal(t, "prediction_prod", p.InferenceLog.PredictionCol) - assert.Equal(t, "model_id_prod", p.InferenceLog.ModelIdCol) - assert.Equal(t, catalog.MonitorInferenceLogProblemType("PROBLEM_TYPE_REGRESSION"), p.InferenceLog.ProblemType) -} diff --git a/bundle/tests/relative_path_translation_test.go b/bundle/tests/relative_path_translation_test.go deleted file mode 100644 index 0f553ac3d..000000000 --- a/bundle/tests/relative_path_translation_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestRelativePathTranslationDefault(t *testing.T) { - b, diags := initializeTarget(t, "./relative_path_translation", "default") - require.NoError(t, diags.Error()) - - t0 := b.Config.Resources.Jobs["job"].Tasks[0] - assert.Equal(t, "/Workspace/remote/src/file1.py", t0.SparkPythonTask.PythonFile) - t1 := b.Config.Resources.Jobs["job"].Tasks[1] - assert.Equal(t, "/Workspace/remote/src/file1.py", t1.SparkPythonTask.PythonFile) -} - -func TestRelativePathTranslationOverride(t *testing.T) { - b, diags := initializeTarget(t, "./relative_path_translation", "override") - require.NoError(t, diags.Error()) - - t0 := b.Config.Resources.Jobs["job"].Tasks[0] - assert.Equal(t, "/Workspace/remote/src/file2.py", t0.SparkPythonTask.PythonFile) - t1 := b.Config.Resources.Jobs["job"].Tasks[1] - assert.Equal(t, "/Workspace/remote/src/file2.py", t1.SparkPythonTask.PythonFile) -} diff --git a/bundle/tests/relative_path_with_includes/artifact_a/.gitkeep b/bundle/tests/relative_path_with_includes/artifact_a/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bundle/tests/relative_path_with_includes/subfolder/artifact_b/.gitkeep b/bundle/tests/relative_path_with_includes/subfolder/artifact_b/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bundle/tests/relative_path_with_includes_test.go b/bundle/tests/relative_path_with_includes_test.go index 6e13628be..7249cac1f 100644 --- a/bundle/tests/relative_path_with_includes_test.go +++ b/bundle/tests/relative_path_with_includes_test.go @@ -17,8 +17,8 @@ func TestRelativePathsWithIncludes(t *testing.T) { diags := bundle.Apply(context.Background(), b, m) assert.NoError(t, diags.Error()) - assert.Equal(t, "artifact_a", b.Config.Artifacts["test_a"].Path) - assert.Equal(t, filepath.Join("subfolder", "artifact_b"), b.Config.Artifacts["test_b"].Path) + assert.Equal(t, b.SyncRootPath+"/artifact_a", b.Config.Artifacts["test_a"].Path) + assert.Equal(t, b.SyncRootPath+"/subfolder/artifact_b", b.Config.Artifacts["test_b"].Path) assert.ElementsMatch( t, @@ -37,6 +37,6 @@ func TestRelativePathsWithIncludes(t *testing.T) { b.Config.Sync.Exclude, ) - assert.Equal(t, filepath.Join("dist", "job_a.whl"), b.Config.Resources.Jobs["job_a"].Tasks[0].Libraries[0].Whl) - assert.Equal(t, filepath.Join("subfolder", "dist", "job_b.whl"), b.Config.Resources.Jobs["job_b"].Tasks[0].Libraries[0].Whl) + assert.Equal(t, "dist/job_a.whl", b.Config.Resources.Jobs["job_a"].Tasks[0].Libraries[0].Whl) + assert.Equal(t, "subfolder/dist/job_b.whl", b.Config.Resources.Jobs["job_b"].Tasks[0].Libraries[0].Whl) } diff --git a/bundle/tests/validate_test.go b/bundle/tests/validate_test.go index 9cd7c201b..a71b604b0 100644 --- a/bundle/tests/validate_test.go +++ b/bundle/tests/validate_test.go @@ -132,7 +132,7 @@ func TestValidateUniqueResourceIdentifiers(t *testing.T) { require.NoError(t, err) // The UniqueResourceKeys mutator is run as part of the Load phase. - diags := bundle.Apply(ctx, b, phases.Load()) + diags := phases.Load(ctx, b) assert.Equal(t, tc.diagnostics, diags) }) } diff --git a/bundle/trampoline/python_dbr_warning.go b/bundle/trampoline/python_dbr_warning.go index 0318df7c9..18fbbb353 100644 --- a/bundle/trampoline/python_dbr_warning.go +++ b/bundle/trampoline/python_dbr_warning.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn/dynvar" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go" "golang.org/x/mod/semver" @@ -60,11 +61,37 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { } if task.ExistingClusterId != "" { - version, err := getSparkVersionForCluster(ctx, b.WorkspaceClient(), task.ExistingClusterId) - // If there's error getting spark version for cluster, do not mark it as incompatible - if err != nil { - log.Warnf(ctx, "unable to get spark version for cluster %s, err: %s", task.ExistingClusterId, err.Error()) - return false + var version string + var err error + // If the cluster id is a variable and it's not resolved, it means it references a cluster defined in the same bundle. + // So we can get the version from the cluster definition. + // It's defined in a form of resources.clusters..id + if strings.HasPrefix(task.ExistingClusterId, "${") { + p, ok := dynvar.PureReferenceToPath(task.ExistingClusterId) + if !ok || len(p) < 3 { + log.Warnf(ctx, "unable to parse cluster key from %s", task.ExistingClusterId) + return false + } + + if p[0].Key() != "resources" || p[1].Key() != "clusters" { + log.Warnf(ctx, "incorrect variable reference for cluster id %s", task.ExistingClusterId) + return false + } + + clusterKey := p[2].Key() + cluster, ok := b.Config.Resources.Clusters[clusterKey] + if !ok { + log.Warnf(ctx, "unable to find cluster with key %s", clusterKey) + return false + } + version = cluster.SparkVersion + } else { + version, err = getSparkVersionForCluster(ctx, b.WorkspaceClient(), task.ExistingClusterId) + // If there's error getting spark version for cluster, do not mark it as incompatible + if err != nil { + log.Warnf(ctx, "unable to get spark version for cluster %s, err: %s", task.ExistingClusterId, err.Error()) + return false + } } if lowerThanExpectedVersion(version) { @@ -82,7 +109,7 @@ func lowerThanExpectedVersion(sparkVersion string) bool { return false } - if parts[1][0] == 'x' { // treat versions like 13.x as the very latest minor (13.99) + if len(parts[1]) > 0 && parts[1][0] == 'x' { // treat versions like 13.x as the very latest minor (13.99) parts[1] = "99" } diff --git a/bundle/trampoline/python_dbr_warning_test.go b/bundle/trampoline/python_dbr_warning_test.go index d293c9477..96fac7329 100644 --- a/bundle/trampoline/python_dbr_warning_test.go +++ b/bundle/trampoline/python_dbr_warning_test.go @@ -346,6 +346,7 @@ func TestSparkVersionLowerThanExpected(t *testing.T) { "13.x-rc-scala-2.12": false, "client.1.10-scala2.12": false, "latest-stable-gpu-scala2.11": false, + "1.": false, "10.4.x-aarch64-photon-scala2.12": true, "10.4.x-scala2.12": true, "13.0.x-scala2.12": true, diff --git a/bundle/trampoline/python_wheel.go b/bundle/trampoline/python_wheel.go index 075804479..0951b340c 100644 --- a/bundle/trampoline/python_wheel.go +++ b/bundle/trampoline/python_wheel.go @@ -8,8 +8,8 @@ import ( "strings" "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/libs/diag" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/jobs" ) @@ -61,22 +61,30 @@ s = f.getvalue() dbutils.notebook.exit(s) ` +type transformWheelTask struct{} + +func (transformWheelTask) Name() string { + return "TransformWheelTask" +} + // This mutator takes the wheel task and transforms it into notebook // which installs uploaded wheels using %pip and then calling corresponding // entry point. func TransformWheelTask() bundle.Mutator { - return bundle.If( - func(_ context.Context, b *bundle.Bundle) (bool, error) { - res := b.Config.Experimental != nil && b.Config.Experimental.PythonWheelWrapper - return res, nil - }, - NewTrampoline( - "python_wheel", - &pythonTrampoline{}, - NOTEBOOK_TEMPLATE, - ), - mutator.NoOp(), - ) + return transformWheelTask{} +} + +func (transformWheelTask) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + isEnabled := b.Config.Experimental != nil && b.Config.Experimental.PythonWheelWrapper + if !isEnabled { + return nil + } + + return bundle.Apply(ctx, b, NewTrampoline( + "python_wheel", + &pythonTrampoline{}, + NOTEBOOK_TEMPLATE, + )) } type pythonTrampoline struct{} diff --git a/cmd/account/budget-policy/budget-policy.go b/cmd/account/budget-policy/budget-policy.go new file mode 100755 index 000000000..fb9f8e5a6 --- /dev/null +++ b/cmd/account/budget-policy/budget-policy.go @@ -0,0 +1,366 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package budget_policy + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + "github.com/databricks/databricks-sdk-go/service/billing" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "budget-policy", + Short: `A service serves REST API about Budget policies.`, + Long: `A service serves REST API about Budget policies`, + GroupID: "billing", + Annotations: map[string]string{ + "package": "billing", + }, + } + + // Add methods + cmd.AddCommand(newCreate()) + cmd.AddCommand(newDelete()) + cmd.AddCommand(newGet()) + cmd.AddCommand(newList()) + cmd.AddCommand(newUpdate()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start create command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var createOverrides []func( + *cobra.Command, + *billing.CreateBudgetPolicyRequest, +) + +func newCreate() *cobra.Command { + cmd := &cobra.Command{} + + var createReq billing.CreateBudgetPolicyRequest + var createJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + // TODO: array: custom_tags + cmd.Flags().StringVar(&createReq.PolicyName, "policy-name", createReq.PolicyName, `The name of the policy.`) + cmd.Flags().StringVar(&createReq.RequestId, "request-id", createReq.RequestId, `A unique identifier for this request.`) + + cmd.Use = "create" + cmd.Short = `Create a budget policy.` + cmd.Long = `Create a budget policy. + + Creates a new policy.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustAccountClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + a := root.AccountClient(ctx) + + if cmd.Flags().Changed("json") { + diags := createJson.Unmarshal(&createReq) + if diags.HasError() { + return diags.Error() + } + if len(diags) > 0 { + err := cmdio.RenderDiagnosticsToErrorOut(ctx, diags) + if err != nil { + return err + } + } + } + + response, err := a.BudgetPolicy.Create(ctx, createReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range createOverrides { + fn(cmd, &createReq) + } + + return cmd +} + +// start delete command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var deleteOverrides []func( + *cobra.Command, + *billing.DeleteBudgetPolicyRequest, +) + +func newDelete() *cobra.Command { + cmd := &cobra.Command{} + + var deleteReq billing.DeleteBudgetPolicyRequest + + // TODO: short flags + + cmd.Use = "delete POLICY_ID" + cmd.Short = `Delete a budget policy.` + cmd.Long = `Delete a budget policy. + + Deletes a policy + + Arguments: + POLICY_ID: The Id of the policy.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustAccountClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + a := root.AccountClient(ctx) + + deleteReq.PolicyId = args[0] + + err = a.BudgetPolicy.Delete(ctx, deleteReq) + if err != nil { + return err + } + return nil + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range deleteOverrides { + fn(cmd, &deleteReq) + } + + return cmd +} + +// start get command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getOverrides []func( + *cobra.Command, + *billing.GetBudgetPolicyRequest, +) + +func newGet() *cobra.Command { + cmd := &cobra.Command{} + + var getReq billing.GetBudgetPolicyRequest + + // TODO: short flags + + cmd.Use = "get POLICY_ID" + cmd.Short = `Get a budget policy.` + cmd.Long = `Get a budget policy. + + Retrieves a policy by it's ID. + + Arguments: + POLICY_ID: The Id of the policy.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustAccountClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + a := root.AccountClient(ctx) + + getReq.PolicyId = args[0] + + response, err := a.BudgetPolicy.Get(ctx, getReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getOverrides { + fn(cmd, &getReq) + } + + return cmd +} + +// start list command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var listOverrides []func( + *cobra.Command, + *billing.ListBudgetPoliciesRequest, +) + +func newList() *cobra.Command { + cmd := &cobra.Command{} + + var listReq billing.ListBudgetPoliciesRequest + + // TODO: short flags + + // TODO: complex arg: filter_by + cmd.Flags().IntVar(&listReq.PageSize, "page-size", listReq.PageSize, `The maximum number of budget policies to return.`) + cmd.Flags().StringVar(&listReq.PageToken, "page-token", listReq.PageToken, `A page token, received from a previous ListServerlessPolicies call.`) + // TODO: complex arg: sort_spec + + cmd.Use = "list" + cmd.Short = `List policies.` + cmd.Long = `List policies. + + Lists all policies. Policies are returned in the alphabetically ascending + order of their names.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustAccountClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + a := root.AccountClient(ctx) + + response := a.BudgetPolicy.List(ctx, listReq) + return cmdio.RenderIterator(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range listOverrides { + fn(cmd, &listReq) + } + + return cmd +} + +// start update command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var updateOverrides []func( + *cobra.Command, + *billing.UpdateBudgetPolicyRequest, +) + +func newUpdate() *cobra.Command { + cmd := &cobra.Command{} + + var updateReq billing.UpdateBudgetPolicyRequest + updateReq.Policy = &billing.BudgetPolicy{} + var updateJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + // TODO: complex arg: limit_config + + // TODO: array: custom_tags + cmd.Flags().StringVar(&updateReq.Policy.PolicyName, "policy-name", updateReq.Policy.PolicyName, `The name of the policy.`) + + cmd.Use = "update POLICY_ID" + cmd.Short = `Update a budget policy.` + cmd.Long = `Update a budget policy. + + Updates a policy + + Arguments: + POLICY_ID: The Id of the policy. This field is generated by Databricks and globally + unique.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustAccountClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + a := root.AccountClient(ctx) + + if cmd.Flags().Changed("json") { + diags := updateJson.Unmarshal(&updateReq.Policy) + if diags.HasError() { + return diags.Error() + } + if len(diags) > 0 { + err := cmdio.RenderDiagnosticsToErrorOut(ctx, diags) + if err != nil { + return err + } + } + } + updateReq.PolicyId = args[0] + + response, err := a.BudgetPolicy.Update(ctx, updateReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range updateOverrides { + fn(cmd, &updateReq) + } + + return cmd +} + +// end service BudgetPolicy diff --git a/cmd/account/cmd.go b/cmd/account/cmd.go index f34966fd9..758e2af5e 100644 --- a/cmd/account/cmd.go +++ b/cmd/account/cmd.go @@ -7,6 +7,7 @@ import ( account_access_control "github.com/databricks/cli/cmd/account/access-control" billable_usage "github.com/databricks/cli/cmd/account/billable-usage" + budget_policy "github.com/databricks/cli/cmd/account/budget-policy" budgets "github.com/databricks/cli/cmd/account/budgets" credentials "github.com/databricks/cli/cmd/account/credentials" custom_app_integration "github.com/databricks/cli/cmd/account/custom-app-integration" @@ -43,6 +44,7 @@ func New() *cobra.Command { cmd.AddCommand(account_access_control.New()) cmd.AddCommand(billable_usage.New()) + cmd.AddCommand(budget_policy.New()) cmd.AddCommand(credentials.New()) cmd.AddCommand(custom_app_integration.New()) cmd.AddCommand(encryption_keys.New()) diff --git a/cmd/account/custom-app-integration/custom-app-integration.go b/cmd/account/custom-app-integration/custom-app-integration.go index 1eec1018e..61cfe0a09 100755 --- a/cmd/account/custom-app-integration/custom-app-integration.go +++ b/cmd/account/custom-app-integration/custom-app-integration.go @@ -65,6 +65,7 @@ func newCreate() *cobra.Command { // TODO: array: redirect_urls // TODO: array: scopes // TODO: complex arg: token_access_policy + // TODO: array: user_authorized_scopes cmd.Use = "create" cmd.Short = `Create Custom OAuth App Integration.` @@ -307,7 +308,9 @@ func newUpdate() *cobra.Command { cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) // TODO: array: redirect_urls + // TODO: array: scopes // TODO: complex arg: token_access_policy + // TODO: array: user_authorized_scopes cmd.Use = "update INTEGRATION_ID" cmd.Short = `Updates Custom OAuth App Integration.` diff --git a/cmd/account/enable-ip-access-lists/enable-ip-access-lists.go b/cmd/account/enable-ip-access-lists/enable-ip-access-lists.go new file mode 100755 index 000000000..24d30c9c6 --- /dev/null +++ b/cmd/account/enable-ip-access-lists/enable-ip-access-lists.go @@ -0,0 +1,218 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package enable_ip_access_lists + +import ( + "fmt" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + "github.com/databricks/databricks-sdk-go/service/settings" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "enable-ip-access-lists", + Short: `Controls the enforcement of IP access lists for accessing the account console.`, + Long: `Controls the enforcement of IP access lists for accessing the account console. + Allowing you to enable or disable restricted access based on IP addresses.`, + + // This service is being previewed; hide from help output. + Hidden: true, + } + + // Add methods + cmd.AddCommand(newDelete()) + cmd.AddCommand(newGet()) + cmd.AddCommand(newUpdate()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start delete command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var deleteOverrides []func( + *cobra.Command, + *settings.DeleteAccountIpAccessEnableRequest, +) + +func newDelete() *cobra.Command { + cmd := &cobra.Command{} + + var deleteReq settings.DeleteAccountIpAccessEnableRequest + + // TODO: short flags + + cmd.Flags().StringVar(&deleteReq.Etag, "etag", deleteReq.Etag, `etag used for versioning.`) + + cmd.Use = "delete" + cmd.Short = `Delete the account IP access toggle setting.` + cmd.Long = `Delete the account IP access toggle setting. + + Reverts the value of the account IP access toggle setting to default (ON)` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustAccountClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + a := root.AccountClient(ctx) + + response, err := a.Settings.EnableIpAccessLists().Delete(ctx, deleteReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range deleteOverrides { + fn(cmd, &deleteReq) + } + + return cmd +} + +// start get command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getOverrides []func( + *cobra.Command, + *settings.GetAccountIpAccessEnableRequest, +) + +func newGet() *cobra.Command { + cmd := &cobra.Command{} + + var getReq settings.GetAccountIpAccessEnableRequest + + // TODO: short flags + + cmd.Flags().StringVar(&getReq.Etag, "etag", getReq.Etag, `etag used for versioning.`) + + cmd.Use = "get" + cmd.Short = `Get the account IP access toggle setting.` + cmd.Long = `Get the account IP access toggle setting. + + Gets the value of the account IP access toggle setting.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustAccountClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + a := root.AccountClient(ctx) + + response, err := a.Settings.EnableIpAccessLists().Get(ctx, getReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getOverrides { + fn(cmd, &getReq) + } + + return cmd +} + +// start update command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var updateOverrides []func( + *cobra.Command, + *settings.UpdateAccountIpAccessEnableRequest, +) + +func newUpdate() *cobra.Command { + cmd := &cobra.Command{} + + var updateReq settings.UpdateAccountIpAccessEnableRequest + var updateJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + cmd.Use = "update" + cmd.Short = `Update the account IP access toggle setting.` + cmd.Long = `Update the account IP access toggle setting. + + Updates the value of the account IP access toggle setting.` + + cmd.Annotations = make(map[string]string) + + cmd.PreRunE = root.MustAccountClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + a := root.AccountClient(ctx) + + if cmd.Flags().Changed("json") { + diags := updateJson.Unmarshal(&updateReq) + if diags.HasError() { + return diags.Error() + } + if len(diags) > 0 { + err := cmdio.RenderDiagnosticsToErrorOut(ctx, diags) + if err != nil { + return err + } + } + } else { + return fmt.Errorf("please provide command input in JSON format by specifying the --json flag") + } + + response, err := a.Settings.EnableIpAccessLists().Update(ctx, updateReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range updateOverrides { + fn(cmd, &updateReq) + } + + return cmd +} + +// end service EnableIpAccessLists diff --git a/cmd/account/federation-policy/federation-policy.go b/cmd/account/federation-policy/federation-policy.go index d78ac709a..ad45c0405 100755 --- a/cmd/account/federation-policy/federation-policy.go +++ b/cmd/account/federation-policy/federation-policy.go @@ -71,9 +71,6 @@ func New() *cobra.Command { Annotations: map[string]string{ "package": "oauth2", }, - - // This service is being previewed; hide from help output. - Hidden: true, } // Add methods @@ -110,8 +107,9 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&createReq.PolicyId, "policy-id", createReq.PolicyId, `The identifier for the federation policy.`) cmd.Flags().StringVar(&createReq.Policy.Description, "description", createReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy cmd.Use = "create" @@ -180,7 +178,10 @@ func newDelete() *cobra.Command { cmd.Use = "delete POLICY_ID" cmd.Short = `Delete account federation policy.` - cmd.Long = `Delete account federation policy.` + cmd.Long = `Delete account federation policy. + + Arguments: + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -233,7 +234,10 @@ func newGet() *cobra.Command { cmd.Use = "get POLICY_ID" cmd.Short = `Get account federation policy.` - cmd.Long = `Get account federation policy.` + cmd.Long = `Get account federation policy. + + Arguments: + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -338,25 +342,22 @@ func newUpdate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&updateReq.UpdateMask, "update-mask", updateReq.UpdateMask, `The field mask specifies which fields of the policy to update.`) cmd.Flags().StringVar(&updateReq.Policy.Description, "description", updateReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy - cmd.Use = "update POLICY_ID UPDATE_MASK" + cmd.Use = "update POLICY_ID" cmd.Short = `Update account federation policy.` cmd.Long = `Update account federation policy. Arguments: - POLICY_ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - check := root.ExactArgs(2) + check := root.ExactArgs(1) return check(cmd, args) } @@ -378,7 +379,6 @@ func newUpdate() *cobra.Command { } } updateReq.PolicyId = args[0] - updateReq.UpdateMask = args[1] response, err := a.FederationPolicy.Update(ctx, updateReq) if err != nil { diff --git a/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go b/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go index 77f73bcd0..451523b7e 100755 --- a/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go +++ b/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go @@ -78,9 +78,6 @@ func New() *cobra.Command { Annotations: map[string]string{ "package": "oauth2", }, - - // This service is being previewed; hide from help output. - Hidden: true, } // Add methods @@ -117,8 +114,9 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&createReq.PolicyId, "policy-id", createReq.PolicyId, `The identifier for the federation policy.`) cmd.Flags().StringVar(&createReq.Policy.Description, "description", createReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy cmd.Use = "create SERVICE_PRINCIPAL_ID" @@ -198,7 +196,7 @@ func newDelete() *cobra.Command { Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: ` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -259,7 +257,7 @@ func newGet() *cobra.Command { Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: ` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -376,26 +374,23 @@ func newUpdate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&updateReq.UpdateMask, "update-mask", updateReq.UpdateMask, `The field mask specifies which fields of the policy to update.`) cmd.Flags().StringVar(&updateReq.Policy.Description, "description", updateReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy - cmd.Use = "update SERVICE_PRINCIPAL_ID POLICY_ID UPDATE_MASK" + cmd.Use = "update SERVICE_PRINCIPAL_ID POLICY_ID" cmd.Short = `Update service principal federation policy.` cmd.Long = `Update service principal federation policy. Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - check := root.ExactArgs(3) + check := root.ExactArgs(2) return check(cmd, args) } @@ -421,7 +416,6 @@ func newUpdate() *cobra.Command { return fmt.Errorf("invalid SERVICE_PRINCIPAL_ID: %s", args[0]) } updateReq.PolicyId = args[1] - updateReq.UpdateMask = args[2] response, err := a.ServicePrincipalFederationPolicy.Update(ctx, updateReq) if err != nil { diff --git a/cmd/account/settings/settings.go b/cmd/account/settings/settings.go index 9a9cd44bf..cd30743f7 100755 --- a/cmd/account/settings/settings.go +++ b/cmd/account/settings/settings.go @@ -7,6 +7,7 @@ import ( csp_enablement_account "github.com/databricks/cli/cmd/account/csp-enablement-account" disable_legacy_features "github.com/databricks/cli/cmd/account/disable-legacy-features" + enable_ip_access_lists "github.com/databricks/cli/cmd/account/enable-ip-access-lists" esm_enablement_account "github.com/databricks/cli/cmd/account/esm-enablement-account" personal_compute "github.com/databricks/cli/cmd/account/personal-compute" ) @@ -29,6 +30,7 @@ func New() *cobra.Command { // Add subservices cmd.AddCommand(csp_enablement_account.New()) cmd.AddCommand(disable_legacy_features.New()) + cmd.AddCommand(enable_ip_access_lists.New()) cmd.AddCommand(esm_enablement_account.New()) cmd.AddCommand(personal_compute.New()) diff --git a/cmd/api/api.go b/cmd/api/api.go index c3a3eb0b6..fad8a026f 100644 --- a/cmd/api/api.go +++ b/cmd/api/api.go @@ -62,7 +62,7 @@ func makeCommand(method string) *cobra.Command { var response any headers := map[string]string{"Content-Type": "application/json"} - err = api.Do(cmd.Context(), method, path, headers, request, &response) + err = api.Do(cmd.Context(), method, path, headers, nil, request, &response) if err != nil { return err } diff --git a/cmd/auth/token.go b/cmd/auth/token.go index fbf8b68f6..f3468df40 100644 --- a/cmd/auth/token.go +++ b/cmd/auth/token.go @@ -46,6 +46,10 @@ func newTokenCommand(persistentAuth *auth.PersistentAuth) *cobra.Command { cmd := &cobra.Command{ Use: "token [HOST]", Short: "Get authentication token", + Long: `Get authentication token from the local cache in ~/.databricks/token-cache.json. +Refresh the access token if it is expired. Note: This command only works with +U2M authentication (using the 'databricks auth login' command). M2M authentication +using a client ID and secret is not supported.`, } var tokenTimeout time.Duration diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index 560b07e39..407a14d8d 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -69,14 +69,19 @@ func newDeployCommand() *cobra.Command { } } - diags = diags.Extend( - bundle.Apply(ctx, b, bundle.Seq( - phases.Initialize(), - validate.FastValidate(), - phases.Build(), - phases.Deploy(outputHandler), - )), - ) + diags = diags.Extend(phases.Initialize(ctx, b)) + + if !diags.HasError() { + diags = diags.Extend(bundle.Apply(ctx, b, validate.FastValidate())) + } + + if !diags.HasError() { + diags = diags.Extend(phases.Build(ctx, b)) + } + + if !diags.HasError() { + diags = diags.Extend(phases.Deploy(ctx, b, outputHandler)) + } } renderOpts := render.RenderOptions{RenderSummaryTable: false} diff --git a/cmd/bundle/deployment/bind.go b/cmd/bundle/deployment/bind.go index 71f441d3d..b11984c51 100644 --- a/cmd/bundle/deployment/bind.go +++ b/cmd/bundle/deployment/bind.go @@ -53,15 +53,15 @@ func newBindCommand() *cobra.Command { return nil }) - diags = bundle.Apply(ctx, b, bundle.Seq( - phases.Initialize(), - phases.Bind(&terraform.BindOptions{ + diags = phases.Initialize(ctx, b) + if !diags.HasError() { + diags = diags.Extend(phases.Bind(ctx, b, &terraform.BindOptions{ AutoApprove: autoApprove, ResourceType: resource.TerraformResourceName(), ResourceKey: args[0], ResourceId: args[1], - }), - )) + })) + } if err := diags.Error(); err != nil { return fmt.Errorf("failed to bind the resource, err: %w", err) } diff --git a/cmd/bundle/deployment/unbind.go b/cmd/bundle/deployment/unbind.go index 9de5285a5..3fe5fbce6 100644 --- a/cmd/bundle/deployment/unbind.go +++ b/cmd/bundle/deployment/unbind.go @@ -38,10 +38,10 @@ func newUnbindCommand() *cobra.Command { return nil }) - diags = bundle.Apply(cmd.Context(), b, bundle.Seq( - phases.Initialize(), - phases.Unbind(resource.TerraformResourceName(), args[0]), - )) + diags = phases.Initialize(ctx, b) + if !diags.HasError() { + diags = diags.Extend(phases.Unbind(ctx, b, resource.TerraformResourceName(), args[0])) + } if err := diags.Error(); err != nil { return err } diff --git a/cmd/bundle/destroy.go b/cmd/bundle/destroy.go index 0b2f14875..82580f994 100644 --- a/cmd/bundle/destroy.go +++ b/cmd/bundle/destroy.go @@ -61,20 +61,25 @@ func newDestroyCommand() *cobra.Command { return errors.New("please specify --auto-approve since selected logging format is json") } - diags = bundle.Apply(ctx, b, bundle.Seq( - phases.Initialize(), - // We need to resolve artifact variable (how we do it in build phase) - // because some of the to-be-destroyed resource might use this variable. - // Not resolving might lead to terraform "Reference to undeclared resource" error - mutator.ResolveVariableReferences( - "artifacts", - ), - phases.Destroy(), - )) + diags = phases.Initialize(ctx, b) if err := diags.Error(); err != nil { return err } - return nil + + diags = diags.Extend( + // We need to resolve artifact variable (how we do it in build phase) + // because some of the to-be-destroyed resource might use this variable. + // Not resolving might lead to terraform "Reference to undeclared resource" error + bundle.Apply(ctx, b, mutator.ResolveVariableReferences("artifacts")), + ) + + if err := diags.Error(); err != nil { + return err + } + + diags = diags.Extend(phases.Destroy(ctx, b)) + // QQQ we're not reporting warnings there. This would be addressed by switching to streaming warnings/errors instead of accumulating. + return diags.Error() } return cmd diff --git a/cmd/bundle/generate/app.go b/cmd/bundle/generate/app.go index 819b62b38..9dbd4fe46 100644 --- a/cmd/bundle/generate/app.go +++ b/cmd/bundle/generate/app.go @@ -36,8 +36,8 @@ func NewGenerateAppCommand() *cobra.Command { cmd.Flags().StringVar(&appName, "existing-app-name", "", `App name to generate config for`) cmd.MarkFlagRequired("existing-app-name") - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join("resources"), `Directory path where the output bundle config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join("src", "app"), `Directory path where the app files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Directory path where the output bundle config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src/app", `Directory path where the app files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/generate/dashboard.go b/cmd/bundle/generate/dashboard.go index fa3c91b2a..92cd2f164 100644 --- a/cmd/bundle/generate/dashboard.go +++ b/cmd/bundle/generate/dashboard.go @@ -345,8 +345,12 @@ func (d *dashboard) initialize(b *bundle.Bundle) diag.Diagnostics { } func (d *dashboard) runForResource(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - diags := bundle.Apply(ctx, b, bundle.Seq( - phases.Initialize(), + diags := phases.Initialize(ctx, b) + if diags.HasError() { + return diags + } + + diags = diags.Extend(bundle.ApplySeq(ctx, b, terraform.Interpolate(), terraform.Write(), terraform.StatePull(), @@ -441,8 +445,8 @@ func NewGenerateDashboardCommand() *cobra.Command { cmd.Flags().MarkHidden("existing-dashboard-id") // Output flags. - cmd.Flags().StringVarP(&d.resourceDir, "resource-dir", "d", "./resources", `directory to write the configuration to`) - cmd.Flags().StringVarP(&d.dashboardDir, "dashboard-dir", "s", "./src", `directory to write the dashboard representation to`) + cmd.Flags().StringVarP(&d.resourceDir, "resource-dir", "d", "resources", `directory to write the configuration to`) + cmd.Flags().StringVarP(&d.dashboardDir, "dashboard-dir", "s", "src", `directory to write the dashboard representation to`) cmd.Flags().BoolVarP(&d.force, "force", "f", false, `force overwrite existing files in the output directory`) // Exactly one of the lookup flags must be provided. diff --git a/cmd/bundle/generate/job.go b/cmd/bundle/generate/job.go index 827d270e5..438b235c9 100644 --- a/cmd/bundle/generate/job.go +++ b/cmd/bundle/generate/job.go @@ -32,13 +32,8 @@ func NewGenerateJobCommand() *cobra.Command { cmd.Flags().Int64Var(&jobId, "existing-job-id", 0, `Job ID of the job to generate config for`) cmd.MarkFlagRequired("existing-job-id") - wd, err := os.Getwd() - if err != nil { - wd = "." - } - - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join(wd, "resources"), `Dir path where the output config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join(wd, "src"), `Dir path where the downloaded files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Dir path where the output config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src", `Dir path where the downloaded files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { @@ -55,10 +50,22 @@ func NewGenerateJobCommand() *cobra.Command { } downloader := newDownloader(w, sourceDir, configDir) - for _, task := range job.Settings.Tasks { - err := downloader.MarkTaskForDownload(ctx, &task) - if err != nil { - return err + + // Don't download files if the job is using Git source + // When Git source is used, the job will be using the files from the Git repository + // but specific tasks might override this behaviour by using `source: WORKSPACE` setting. + // In this case, we don't want to download the files as well for these specific tasks + // because it leads to confusion with relative paths between workspace and GIT files. + // Instead we keep these tasks as is and let the user handle the files manually. + // The configuration will be deployable as tasks paths for source: WORKSPACE tasks will be absolute workspace paths. + if job.Settings.GitSource != nil { + cmdio.LogString(ctx, "Job is using Git source, skipping downloading files") + } else { + for _, task := range job.Settings.Tasks { + err := downloader.MarkTaskForDownload(ctx, &task) + if err != nil { + return err + } } } diff --git a/cmd/bundle/generate/pipeline.go b/cmd/bundle/generate/pipeline.go index 863b0b2f7..9bf9e9947 100644 --- a/cmd/bundle/generate/pipeline.go +++ b/cmd/bundle/generate/pipeline.go @@ -32,13 +32,8 @@ func NewGeneratePipelineCommand() *cobra.Command { cmd.Flags().StringVar(&pipelineId, "existing-pipeline-id", "", `ID of the pipeline to generate config for`) cmd.MarkFlagRequired("existing-pipeline-id") - wd, err := os.Getwd() - if err != nil { - wd = "." - } - - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join(wd, "resources"), `Dir path where the output config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join(wd, "src"), `Dir path where the downloaded files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Dir path where the output config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src", `Dir path where the downloaded files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { @@ -97,7 +92,7 @@ func NewGeneratePipelineCommand() *cobra.Command { } saver := yamlsaver.NewSaverWithStyle( - // Including all PipelineSpec and nested fields which are map[string]string type + // Including all CreatePipeline and nested fields which are map[string]string type map[string]yaml.Style{ "spark_conf": yaml.DoubleQuotedStyle, "custom_tags": yaml.DoubleQuotedStyle, diff --git a/cmd/bundle/generate/utils.go b/cmd/bundle/generate/utils.go index cbea0bfcc..c2c9bbb55 100644 --- a/cmd/bundle/generate/utils.go +++ b/cmd/bundle/generate/utils.go @@ -138,9 +138,7 @@ func (n *downloader) FlushToDisk(ctx context.Context, force bool) error { } errs, errCtx := errgroup.WithContext(ctx) - for k, v := range n.files { - targetPath := k - filePath := v + for targetPath, filePath := range n.files { errs.Go(func() error { reader, err := n.w.Workspace.Download(errCtx, filePath) if err != nil { diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 687c141ec..1911abe19 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -1,175 +1,15 @@ package bundle import ( - "context" "errors" "fmt" - "io/fs" - "os" - "path/filepath" - "slices" - "strings" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/dbr" - "github.com/databricks/cli/libs/filer" - "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) -var gitUrlPrefixes = []string{ - "https://", - "git@", -} - -type nativeTemplate struct { - name string - gitUrl string - description string - aliases []string - hidden bool -} - -const customTemplate = "custom..." - -var nativeTemplates = []nativeTemplate{ - { - name: "default-python", - description: "The default Python template for Notebooks / Delta Live Tables / Workflows", - }, - { - name: "default-sql", - description: "The default SQL template for .sql files that run with Databricks SQL", - }, - { - name: "dbt-sql", - description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", - }, - { - name: "mlops-stacks", - gitUrl: "https://github.com/databricks/mlops-stacks", - description: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)", - aliases: []string{"mlops-stack"}, - }, - { - name: "default-pydabs", - gitUrl: "https://databricks.github.io/workflows-authoring-toolkit/pydabs-template.git", - hidden: true, - description: "The default PyDABs template", - }, - { - name: customTemplate, - description: "Bring your own template", - }, -} - -// Return template descriptions for command-line help -func nativeTemplateHelpDescriptions() string { - var lines []string - for _, template := range nativeTemplates { - if template.name != customTemplate && !template.hidden { - lines = append(lines, fmt.Sprintf("- %s: %s", template.name, template.description)) - } - } - return strings.Join(lines, "\n") -} - -// Return template options for an interactive prompt -func nativeTemplateOptions() []cmdio.Tuple { - names := make([]cmdio.Tuple, 0, len(nativeTemplates)) - for _, template := range nativeTemplates { - if template.hidden { - continue - } - tuple := cmdio.Tuple{ - Name: template.name, - Id: template.description, - } - names = append(names, tuple) - } - return names -} - -func getNativeTemplateByDescription(description string) string { - for _, template := range nativeTemplates { - if template.description == description { - return template.name - } - } - return "" -} - -func getUrlForNativeTemplate(name string) string { - for _, template := range nativeTemplates { - if template.name == name { - return template.gitUrl - } - if slices.Contains(template.aliases, name) { - return template.gitUrl - } - } - return "" -} - -func getFsForNativeTemplate(name string) (fs.FS, error) { - builtin, err := template.Builtin() - if err != nil { - return nil, err - } - - // If this is a built-in template, the return value will be non-nil. - var templateFS fs.FS - for _, entry := range builtin { - if entry.Name == name { - templateFS = entry.FS - break - } - } - - return templateFS, nil -} - -func isRepoUrl(url string) bool { - result := false - for _, prefix := range gitUrlPrefixes { - if strings.HasPrefix(url, prefix) { - result = true - break - } - } - return result -} - -// Computes the repo name from the repo URL. Treats the last non empty word -// when splitting at '/' as the repo name. For example: for url git@github.com:databricks/cli.git -// the name would be "cli.git" -func repoName(url string) string { - parts := strings.Split(strings.TrimRight(url, "/"), "/") - return parts[len(parts)-1] -} - -func constructOutputFiler(ctx context.Context, outputDir string) (filer.Filer, error) { - outputDir, err := filepath.Abs(outputDir) - if err != nil { - return nil, err - } - - // If the CLI is running on DBR and we're writing to the workspace file system, - // use the extension-aware workspace filesystem filer to instantiate the template. - // - // It is not possible to write notebooks through the workspace filesystem's FUSE mount. - // Therefore this is the only way we can initialize templates that contain notebooks - // when running the CLI on DBR and initializing a template to the workspace. - // - if strings.HasPrefix(outputDir, "/Workspace/") && dbr.RunsOnRuntime(ctx) { - return filer.NewWorkspaceFilesExtensionsClient(root.WorkspaceClient(ctx), outputDir) - } - - return filer.NewLocalClient(outputDir) -} - func newInitCommand() *cobra.Command { cmd := &cobra.Command{ Use: "init [TEMPLATE_PATH]", @@ -182,7 +22,7 @@ TEMPLATE_PATH optionally specifies which template to use. It can be one of the f - a local file system path with a template directory - a Git repository URL, e.g. https://github.com/my/repository -See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, nativeTemplateHelpDescriptions()), +See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, template.HelpDescriptions()), } var configFile string @@ -202,88 +42,32 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf return errors.New("only one of --tag or --branch can be specified") } - // Git ref to use for template initialization - ref := branch - if tag != "" { - ref = tag + var templatePathOrUrl string + if len(args) > 0 { + templatePathOrUrl = args[0] + } + r := template.Resolver{ + TemplatePathOrUrl: templatePathOrUrl, + ConfigFile: configFile, + OutputDir: outputDir, + TemplateDir: templateDir, + Tag: tag, + Branch: branch, } ctx := cmd.Context() - var templatePath string - if len(args) > 0 { - templatePath = args[0] - } else { - var err error - if !cmdio.IsPromptSupported(ctx) { - return errors.New("please specify a template") - } - description, err := cmdio.SelectOrdered(ctx, nativeTemplateOptions(), "Template to use") - if err != nil { - return err - } - templatePath = getNativeTemplateByDescription(description) - } - - outputFiler, err := constructOutputFiler(ctx, outputDir) - if err != nil { - return err - } - - if templatePath == customTemplate { + tmpl, err := r.Resolve(ctx) + if errors.Is(err, template.ErrCustomSelected) { cmdio.LogString(ctx, "Please specify a path or Git repository to use a custom template.") cmdio.LogString(ctx, "See https://docs.databricks.com/en/dev-tools/bundles/templates.html to learn more about custom templates.") return nil } - - // Expand templatePath to a git URL if it's an alias for a known native template - // and we know it's git URL. - if gitUrl := getUrlForNativeTemplate(templatePath); gitUrl != "" { - templatePath = gitUrl - } - - if !isRepoUrl(templatePath) { - if templateDir != "" { - return errors.New("--template-dir can only be used with a Git repository URL") - } - - templateFS, err := getFsForNativeTemplate(templatePath) - if err != nil { - return err - } - - // If this is not a built-in template, then it must be a local file system path. - if templateFS == nil { - templateFS = os.DirFS(templatePath) - } - - // skip downloading the repo because input arg is not a URL. We assume - // it's a path on the local file system in that case - return template.Materialize(ctx, configFile, templateFS, outputFiler) - } - - // Create a temporary directory with the name of the repository. The '*' - // character is replaced by a random string in the generated temporary directory. - repoDir, err := os.MkdirTemp("", repoName(templatePath)+"-*") if err != nil { return err } + defer tmpl.Reader.Cleanup(ctx) - // start the spinner - promptSpinner := cmdio.Spinner(ctx) - promptSpinner <- "Downloading the template\n" - - // TODO: Add automated test that the downloaded git repo is cleaned up. - // Clone the repository in the temporary directory - err = git.Clone(ctx, templatePath, ref, repoDir) - close(promptSpinner) - if err != nil { - return err - } - - // Clean up downloaded repository once the template is materialized. - defer os.RemoveAll(repoDir) - templateFS := os.DirFS(filepath.Join(repoDir, templateDir)) - return template.Materialize(ctx, configFile, templateFS, outputFiler) + return tmpl.Writer.Materialize(ctx, tmpl.Reader) } return cmd } diff --git a/cmd/bundle/open.go b/cmd/bundle/open.go index 5a26e1ea7..733758a8e 100644 --- a/cmd/bundle/open.go +++ b/cmd/bundle/open.go @@ -67,7 +67,7 @@ func newOpenCommand() *cobra.Command { return diags.Error() } - diags = bundle.Apply(ctx, b, phases.Initialize()) + diags = phases.Initialize(ctx, b) if err := diags.Error(); err != nil { return err } @@ -86,20 +86,20 @@ func newOpenCommand() *cobra.Command { noCache := errors.Is(stateFileErr, os.ErrNotExist) || errors.Is(configFileErr, os.ErrNotExist) if forcePull || noCache { - diags = bundle.Apply(ctx, b, bundle.Seq( + diags = bundle.ApplySeq(ctx, b, terraform.StatePull(), terraform.Interpolate(), terraform.Write(), - )) + ) if err := diags.Error(); err != nil { return err } } - diags = bundle.Apply(ctx, b, bundle.Seq( + diags = bundle.ApplySeq(ctx, b, terraform.Load(), mutator.InitializeURLs(), - )) + ) if err := diags.Error(); err != nil { return err } diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index df35d7222..574ad1016 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -111,7 +111,7 @@ task or a Python wheel task, the second example applies. return diags.Error() } - diags = bundle.Apply(ctx, b, phases.Initialize()) + diags = phases.Initialize(ctx, b) if err := diags.Error(); err != nil { return err } @@ -121,12 +121,12 @@ task or a Python wheel task, the second example applies. return err } - diags = bundle.Apply(ctx, b, bundle.Seq( + diags = bundle.ApplySeq(ctx, b, terraform.Interpolate(), terraform.Write(), terraform.StatePull(), terraform.Load(terraform.ErrorOnEmptyState), - )) + ) if err := diags.Error(); err != nil { return err } @@ -173,6 +173,7 @@ task or a Python wheel task, the second example applies. if err != nil { return err } + _, _ = cmd.OutOrStdout().Write([]byte{'\n'}) default: return fmt.Errorf("unknown output type %s", root.OutputType(cmd)) } diff --git a/cmd/bundle/summary.go b/cmd/bundle/summary.go index 7c669c845..a0e93b78b 100644 --- a/cmd/bundle/summary.go +++ b/cmd/bundle/summary.go @@ -35,7 +35,7 @@ func newSummaryCommand() *cobra.Command { return diags.Error() } - diags = bundle.Apply(ctx, b, phases.Initialize()) + diags = phases.Initialize(ctx, b) if err := diags.Error(); err != nil { return err } @@ -49,18 +49,20 @@ func newSummaryCommand() *cobra.Command { noCache := errors.Is(stateFileErr, os.ErrNotExist) || errors.Is(configFileErr, os.ErrNotExist) if forcePull || noCache { - diags = bundle.Apply(ctx, b, bundle.Seq( + diags = bundle.ApplySeq(ctx, b, terraform.StatePull(), terraform.Interpolate(), terraform.Write(), - )) + ) if err := diags.Error(); err != nil { return err } } - diags = bundle.Apply(ctx, b, - bundle.Seq(terraform.Load(), mutator.InitializeURLs())) + diags = bundle.ApplySeq(ctx, b, + terraform.Load(), + mutator.InitializeURLs(), + ) if err := diags.Error(); err != nil { return err } @@ -74,6 +76,7 @@ func newSummaryCommand() *cobra.Command { return err } _, _ = cmd.OutOrStdout().Write(buf) + _, _ = cmd.OutOrStdout().Write([]byte{'\n'}) default: return fmt.Errorf("unknown output type %s", root.OutputType(cmd)) } diff --git a/cmd/bundle/sync.go b/cmd/bundle/sync.go index 274bba0e0..3ada07b74 100644 --- a/cmd/bundle/sync.go +++ b/cmd/bundle/sync.go @@ -71,7 +71,7 @@ func newSyncCommand() *cobra.Command { } // Run initialize phase to make sure paths are set. - diags = bundle.Apply(ctx, b, phases.Initialize()) + diags = phases.Initialize(ctx, b) if err := diags.Error(); err != nil { return err } diff --git a/cmd/bundle/validate.go b/cmd/bundle/validate.go index 41fa87f30..0ff9c7867 100644 --- a/cmd/bundle/validate.go +++ b/cmd/bundle/validate.go @@ -20,7 +20,9 @@ func renderJsonOutput(cmd *cobra.Command, b *bundle.Bundle) error { if err != nil { return err } - _, _ = cmd.OutOrStdout().Write(buf) + out := cmd.OutOrStdout() + _, _ = out.Write(buf) + _, _ = out.Write([]byte{'\n'}) return nil } @@ -44,7 +46,7 @@ func newValidateCommand() *cobra.Command { } if !diags.HasError() { - diags = diags.Extend(bundle.Apply(ctx, b, phases.Initialize())) + diags = diags.Extend(phases.Initialize(ctx, b)) } if !diags.HasError() { diff --git a/cmd/cmd.go b/cmd/cmd.go index 5b53a4ae5..4f5337fd3 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -12,10 +12,10 @@ import ( "github.com/databricks/cli/cmd/fs" "github.com/databricks/cli/cmd/labs" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/cmd/selftest" "github.com/databricks/cli/cmd/sync" "github.com/databricks/cli/cmd/version" "github.com/databricks/cli/cmd/workspace" - "github.com/databricks/cli/cmd/workspace/apps" "github.com/spf13/cobra" ) @@ -68,7 +68,6 @@ func New(ctx context.Context) *cobra.Command { // Add other subcommands. cli.AddCommand(api.New()) - cli.AddCommand(apps.New()) cli.AddCommand(auth.New()) cli.AddCommand(bundle.New()) cli.AddCommand(configure.New()) @@ -76,6 +75,7 @@ func New(ctx context.Context) *cobra.Command { cli.AddCommand(labs.New(ctx)) cli.AddCommand(sync.New()) cli.AddCommand(version.New()) + cli.AddCommand(selftest.New()) return cli } diff --git a/cmd/labs/CODEOWNERS b/cmd/labs/CODEOWNERS deleted file mode 100644 index cc93a75e6..000000000 --- a/cmd/labs/CODEOWNERS +++ /dev/null @@ -1 +0,0 @@ -* @nfx diff --git a/cmd/labs/project/installer.go b/cmd/labs/project/installer.go index 7d31623bb..2e42ce43d 100644 --- a/cmd/labs/project/installer.go +++ b/cmd/labs/project/installer.go @@ -15,7 +15,6 @@ import ( "github.com/databricks/cli/libs/databrickscfg/profile" "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/process" - "github.com/databricks/cli/libs/python" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/sql" @@ -33,6 +32,7 @@ type hook struct { RequireDatabricksConnect bool `yaml:"require_databricks_connect,omitempty"` MinRuntimeVersion string `yaml:"min_runtime_version,omitempty"` WarehouseTypes whTypes `yaml:"warehouse_types,omitempty"` + Extras string `yaml:"extras,omitempty"` } func (h *hook) RequireRunningCluster() bool { @@ -223,7 +223,7 @@ func (i *installer) setupPythonVirtualEnvironment(ctx context.Context, w *databr feedback := cmdio.Spinner(ctx) defer close(feedback) feedback <- "Detecting all installed Python interpreters on the system" - pythonInterpreters, err := python.DetectInterpreters(ctx) + pythonInterpreters, err := DetectInterpreters(ctx) if err != nil { return fmt.Errorf("detect: %w", err) } @@ -259,6 +259,10 @@ func (i *installer) setupPythonVirtualEnvironment(ctx context.Context, w *databr } } feedback <- "Installing Python library dependencies" + if i.Installer.Extras != "" { + // install main and optional dependencies + return i.installPythonDependencies(ctx, fmt.Sprintf(".[%s]", i.Installer.Extras)) + } return i.installPythonDependencies(ctx, ".") } diff --git a/libs/python/interpreters.go b/cmd/labs/project/interpreters.go similarity index 99% rename from libs/python/interpreters.go rename to cmd/labs/project/interpreters.go index 6071309a8..00f099ed4 100644 --- a/libs/python/interpreters.go +++ b/cmd/labs/project/interpreters.go @@ -1,4 +1,4 @@ -package python +package project import ( "context" diff --git a/libs/python/interpreters_unix_test.go b/cmd/labs/project/interpreters_unix_test.go similarity index 99% rename from libs/python/interpreters_unix_test.go rename to cmd/labs/project/interpreters_unix_test.go index 57adc9279..a5bbb6468 100644 --- a/libs/python/interpreters_unix_test.go +++ b/cmd/labs/project/interpreters_unix_test.go @@ -1,6 +1,6 @@ //go:build unix -package python +package project import ( "context" diff --git a/libs/python/interpreters_win_test.go b/cmd/labs/project/interpreters_win_test.go similarity index 97% rename from libs/python/interpreters_win_test.go rename to cmd/labs/project/interpreters_win_test.go index f99981529..2316daa30 100644 --- a/libs/python/interpreters_win_test.go +++ b/cmd/labs/project/interpreters_win_test.go @@ -1,6 +1,6 @@ //go:build windows -package python +package project import ( "context" diff --git a/cmd/labs/project/schema.json b/cmd/labs/project/schema.json index a779b15e4..7aa65813c 100644 --- a/cmd/labs/project/schema.json +++ b/cmd/labs/project/schema.json @@ -42,6 +42,11 @@ }, "warehouse_types": { "enum": [ "PRO", "CLASSIC", "TYPE_UNSPECIFIED" ] + }, + "extras": { + "type": "string", + "pattern": "^([^,]+)(,([^,]+))*$", + "default": "" } } }, diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py index 6873257d5..a162da342 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py @@ -1 +1 @@ -print(f'setting up important infrastructure') +print(f"setting up important infrastructure") diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml index 0ac4bf826..b8a0e695e 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml @@ -8,6 +8,7 @@ install: warehouse_types: - PRO script: install.py + extras: "" entrypoint: main.py min_python: 3.9 commands: diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py index 769ee73ee..e5866d6ae 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py @@ -2,26 +2,34 @@ import os, sys, json payload = json.loads(sys.argv[1]) -if 'echo' == payload['command']: - json.dump({ - 'command': payload['command'], - 'flags': payload['flags'], - 'env': {k:v for k,v in os.environ.items()} - }, sys.stdout) +if "echo" == payload["command"]: + json.dump( + { + "command": payload["command"], + "flags": payload["flags"], + "env": {k: v for k, v in os.environ.items()}, + }, + sys.stdout, + ) sys.exit(0) -if 'table' == payload['command']: +if "table" == payload["command"]: sys.stderr.write("some intermediate info\n") - json.dump({'records': [ - {'key': 'First', 'value': 'Second'}, - {'key': 'Third', 'value': 'Fourth'}, - ]}, sys.stdout) + json.dump( + { + "records": [ + {"key": "First", "value": "Second"}, + {"key": "Third", "value": "Fourth"}, + ] + }, + sys.stdout, + ) sys.exit(0) -print(f'host is {os.environ["DATABRICKS_HOST"]}') +print(f"host is {os.environ['DATABRICKS_HOST']}") -print(f'[{payload["command"]}] command flags are {payload["flags"]}') +print(f"[{payload['command']}] command flags are {payload['flags']}") -answer = input('What is your name? ') +answer = input("What is your name? ") -print(f'Hello, {answer}!') +print(f"Hello, {answer}!") diff --git a/libs/python/testdata/other-binaries-filtered/python b/cmd/labs/project/testdata/other-binaries-filtered/python similarity index 100% rename from libs/python/testdata/other-binaries-filtered/python rename to cmd/labs/project/testdata/other-binaries-filtered/python diff --git a/libs/python/testdata/other-binaries-filtered/python3-whatever b/cmd/labs/project/testdata/other-binaries-filtered/python3-whatever similarity index 100% rename from libs/python/testdata/other-binaries-filtered/python3-whatever rename to cmd/labs/project/testdata/other-binaries-filtered/python3-whatever diff --git a/libs/python/testdata/other-binaries-filtered/python3.10 b/cmd/labs/project/testdata/other-binaries-filtered/python3.10 similarity index 100% rename from libs/python/testdata/other-binaries-filtered/python3.10 rename to cmd/labs/project/testdata/other-binaries-filtered/python3.10 diff --git a/libs/python/testdata/other-binaries-filtered/python3.10.100 b/cmd/labs/project/testdata/other-binaries-filtered/python3.10.100 similarity index 100% rename from libs/python/testdata/other-binaries-filtered/python3.10.100 rename to cmd/labs/project/testdata/other-binaries-filtered/python3.10.100 diff --git a/libs/python/testdata/other-binaries-filtered/python3.11 b/cmd/labs/project/testdata/other-binaries-filtered/python3.11 similarity index 100% rename from libs/python/testdata/other-binaries-filtered/python3.11 rename to cmd/labs/project/testdata/other-binaries-filtered/python3.11 diff --git a/libs/python/testdata/other-binaries-filtered/python4.8 b/cmd/labs/project/testdata/other-binaries-filtered/python4.8 similarity index 100% rename from libs/python/testdata/other-binaries-filtered/python4.8 rename to cmd/labs/project/testdata/other-binaries-filtered/python4.8 diff --git a/libs/python/testdata/other-binaries-filtered/python5 b/cmd/labs/project/testdata/other-binaries-filtered/python5 similarity index 100% rename from libs/python/testdata/other-binaries-filtered/python5 rename to cmd/labs/project/testdata/other-binaries-filtered/python5 diff --git a/libs/python/testdata/other-binaries-filtered/python6 b/cmd/labs/project/testdata/other-binaries-filtered/python6 similarity index 100% rename from libs/python/testdata/other-binaries-filtered/python6 rename to cmd/labs/project/testdata/other-binaries-filtered/python6 diff --git a/libs/python/testdata/other-binaries-filtered/python7 b/cmd/labs/project/testdata/other-binaries-filtered/python7 similarity index 100% rename from libs/python/testdata/other-binaries-filtered/python7 rename to cmd/labs/project/testdata/other-binaries-filtered/python7 diff --git a/libs/python/testdata/other-binaries-filtered/pythonw b/cmd/labs/project/testdata/other-binaries-filtered/pythonw similarity index 100% rename from libs/python/testdata/other-binaries-filtered/pythonw rename to cmd/labs/project/testdata/other-binaries-filtered/pythonw diff --git a/libs/python/testdata/other-binaries-filtered/real-python3.11.4 b/cmd/labs/project/testdata/other-binaries-filtered/real-python3.11.4 similarity index 100% rename from libs/python/testdata/other-binaries-filtered/real-python3.11.4 rename to cmd/labs/project/testdata/other-binaries-filtered/real-python3.11.4 diff --git a/libs/python/testdata/other-binaries-filtered/whatever b/cmd/labs/project/testdata/other-binaries-filtered/whatever similarity index 100% rename from libs/python/testdata/other-binaries-filtered/whatever rename to cmd/labs/project/testdata/other-binaries-filtered/whatever diff --git a/libs/python/testdata/world-writeable/python8.4 b/cmd/labs/project/testdata/world-writeable/python8.4 similarity index 100% rename from libs/python/testdata/world-writeable/python8.4 rename to cmd/labs/project/testdata/world-writeable/python8.4 diff --git a/cmd/root/auth.go b/cmd/root/auth.go index 49abfd414..e2dac68cc 100644 --- a/cmd/root/auth.go +++ b/cmd/root/auth.go @@ -195,6 +195,12 @@ func MustWorkspaceClient(cmd *cobra.Command, args []string) error { cfg.Profile = profile } + _, isTargetFlagSet := targetFlagValue(cmd) + // If the profile flag is set but the target flag is not, we should skip loading the bundle configuration. + if !isTargetFlagSet && hasProfileFlag { + cmd.SetContext(SkipLoadBundle(cmd.Context())) + } + ctx := cmd.Context() ctx = context.WithValue(ctx, &configUsed, cfg) cmd.SetContext(ctx) @@ -209,7 +215,7 @@ func MustWorkspaceClient(cmd *cobra.Command, args []string) error { if b != nil { ctx = context.WithValue(ctx, &configUsed, b.Config.Workspace.Config()) cmd.SetContext(ctx) - client, err := b.InitializeWorkspaceClient() + client, err := b.WorkspaceClientE() if err != nil { return err } diff --git a/cmd/root/bundle.go b/cmd/root/bundle.go index 8b98f2cf2..b40803707 100644 --- a/cmd/root/bundle.go +++ b/cmd/root/bundle.go @@ -14,26 +14,35 @@ import ( // getTarget returns the name of the target to operate in. func getTarget(cmd *cobra.Command) (value string) { + target, isFlagSet := targetFlagValue(cmd) + if isFlagSet { + return target + } + + // If it's not set, use the environment variable. + target, _ = env.Target(cmd.Context()) + return target +} + +func targetFlagValue(cmd *cobra.Command) (string, bool) { // The command line flag takes precedence. flag := cmd.Flag("target") if flag != nil { - value = flag.Value.String() + value := flag.Value.String() if value != "" { - return + return value, true } } oldFlag := cmd.Flag("environment") if oldFlag != nil { - value = oldFlag.Value.String() + value := oldFlag.Value.String() if value != "" { - return + return value, true } } - // If it's not set, use the environment variable. - target, _ := env.Target(cmd.Context()) - return target + return "", false } func getProfile(cmd *cobra.Command) (value string) { @@ -65,22 +74,37 @@ func configureProfile(cmd *cobra.Command, b *bundle.Bundle) diag.Diagnostics { // configureBundle loads the bundle configuration and configures flag values, if any. func configureBundle(cmd *cobra.Command, b *bundle.Bundle) (*bundle.Bundle, diag.Diagnostics) { - var m bundle.Mutator - if target := getTarget(cmd); target == "" { - m = phases.LoadDefaultTarget() - } else { - m = phases.LoadNamedTarget(target) - } - // Load bundle and select target. ctx := cmd.Context() - diags := bundle.Apply(ctx, b, m) + var diags diag.Diagnostics + if target := getTarget(cmd); target == "" { + diags = phases.LoadDefaultTarget(ctx, b) + } else { + diags = phases.LoadNamedTarget(ctx, b, target) + } + if diags.HasError() { return b, diags } // Configure the workspace profile if the flag has been set. diags = diags.Extend(configureProfile(cmd, b)) + if diags.HasError() { + return b, diags + } + + // Set the auth configuration in the command context. This can be used + // downstream to initialize a API client. + // + // Note that just initializing a workspace client and loading auth configuration + // is a fast operation. It does not perform network I/O or invoke processes (for example the Azure CLI). + client, err := b.WorkspaceClientE() + if err != nil { + return b, diags.Extend(diag.FromErr(err)) + } + ctx = context.WithValue(ctx, &configUsed, client.Config) + cmd.SetContext(ctx) + return b, diags } @@ -134,7 +158,7 @@ func targetCompletion(cmd *cobra.Command, args []string, toComplete string) ([]s } // Load bundle but don't select a target (we're completing those). - diags := bundle.Apply(ctx, b, phases.Load()) + diags := phases.Load(ctx, b) if err := diags.Error(); err != nil { cobra.CompErrorln(err.Error()) return nil, cobra.ShellCompDirectiveError diff --git a/cmd/root/bundle_test.go b/cmd/root/bundle_test.go index 1998b19e6..3517b02e4 100644 --- a/cmd/root/bundle_test.go +++ b/cmd/root/bundle_test.go @@ -8,7 +8,6 @@ import ( "runtime" "testing" - "github.com/databricks/cli/bundle" "github.com/databricks/cli/internal/testutil" "github.com/spf13/cobra" "github.com/stretchr/testify/assert" @@ -38,7 +37,7 @@ func emptyCommand(t *testing.T) *cobra.Command { return cmd } -func setupWithHost(t *testing.T, cmd *cobra.Command, host string) *bundle.Bundle { +func setupWithHost(t *testing.T, cmd *cobra.Command, host string) error { setupDatabricksCfg(t) rootPath := t.TempDir() @@ -51,12 +50,11 @@ workspace: err := os.WriteFile(filepath.Join(rootPath, "databricks.yml"), []byte(contents), 0o644) require.NoError(t, err) - b, diags := MustConfigureBundle(cmd) - require.NoError(t, diags.Error()) - return b + _, diags := MustConfigureBundle(cmd) + return diags.Error() } -func setupWithProfile(t *testing.T, cmd *cobra.Command, profile string) *bundle.Bundle { +func setupWithProfile(t *testing.T, cmd *cobra.Command, profile string) error { setupDatabricksCfg(t) rootPath := t.TempDir() @@ -69,29 +67,25 @@ workspace: err := os.WriteFile(filepath.Join(rootPath, "databricks.yml"), []byte(contents), 0o644) require.NoError(t, err) - b, diags := MustConfigureBundle(cmd) - require.NoError(t, diags.Error()) - return b + _, diags := MustConfigureBundle(cmd) + return diags.Error() } func TestBundleConfigureDefault(t *testing.T) { testutil.CleanupEnvironment(t) cmd := emptyCommand(t) - b := setupWithHost(t, cmd, "https://x.com") - - client, err := b.InitializeWorkspaceClient() + err := setupWithHost(t, cmd, "https://x.com") require.NoError(t, err) - assert.Equal(t, "https://x.com", client.Config.Host) + + assert.Equal(t, "https://x.com", ConfigUsed(cmd.Context()).Host) } func TestBundleConfigureWithMultipleMatches(t *testing.T) { testutil.CleanupEnvironment(t) cmd := emptyCommand(t) - b := setupWithHost(t, cmd, "https://a.com") - - _, err := b.InitializeWorkspaceClient() + err := setupWithHost(t, cmd, "https://a.com") assert.ErrorContains(t, err, "multiple profiles matched: PROFILE-1, PROFILE-2") } @@ -101,9 +95,8 @@ func TestBundleConfigureWithNonExistentProfileFlag(t *testing.T) { cmd := emptyCommand(t) err := cmd.Flag("profile").Value.Set("NOEXIST") require.NoError(t, err) - b := setupWithHost(t, cmd, "https://x.com") - _, err = b.InitializeWorkspaceClient() + err = setupWithHost(t, cmd, "https://x.com") assert.ErrorContains(t, err, "has no NOEXIST profile configured") } @@ -113,9 +106,8 @@ func TestBundleConfigureWithMismatchedProfile(t *testing.T) { cmd := emptyCommand(t) err := cmd.Flag("profile").Value.Set("PROFILE-1") require.NoError(t, err) - b := setupWithHost(t, cmd, "https://x.com") - _, err = b.InitializeWorkspaceClient() + err = setupWithHost(t, cmd, "https://x.com") assert.ErrorContains(t, err, "config host mismatch: profile uses host https://a.com, but CLI configured to use https://x.com") } @@ -125,12 +117,11 @@ func TestBundleConfigureWithCorrectProfile(t *testing.T) { cmd := emptyCommand(t) err := cmd.Flag("profile").Value.Set("PROFILE-1") require.NoError(t, err) - b := setupWithHost(t, cmd, "https://a.com") + err = setupWithHost(t, cmd, "https://a.com") - client, err := b.InitializeWorkspaceClient() require.NoError(t, err) - assert.Equal(t, "https://a.com", client.Config.Host) - assert.Equal(t, "PROFILE-1", client.Config.Profile) + assert.Equal(t, "https://a.com", ConfigUsed(cmd.Context()).Host) + assert.Equal(t, "PROFILE-1", ConfigUsed(cmd.Context()).Profile) } func TestBundleConfigureWithMismatchedProfileEnvVariable(t *testing.T) { @@ -138,9 +129,8 @@ func TestBundleConfigureWithMismatchedProfileEnvVariable(t *testing.T) { t.Setenv("DATABRICKS_CONFIG_PROFILE", "PROFILE-1") cmd := emptyCommand(t) - b := setupWithHost(t, cmd, "https://x.com") - _, err := b.InitializeWorkspaceClient() + err := setupWithHost(t, cmd, "https://x.com") assert.ErrorContains(t, err, "config host mismatch: profile uses host https://a.com, but CLI configured to use https://x.com") } @@ -151,12 +141,11 @@ func TestBundleConfigureWithProfileFlagAndEnvVariable(t *testing.T) { cmd := emptyCommand(t) err := cmd.Flag("profile").Value.Set("PROFILE-1") require.NoError(t, err) - b := setupWithHost(t, cmd, "https://a.com") - client, err := b.InitializeWorkspaceClient() + err = setupWithHost(t, cmd, "https://a.com") require.NoError(t, err) - assert.Equal(t, "https://a.com", client.Config.Host) - assert.Equal(t, "PROFILE-1", client.Config.Profile) + assert.Equal(t, "https://a.com", ConfigUsed(cmd.Context()).Host) + assert.Equal(t, "PROFILE-1", ConfigUsed(cmd.Context()).Profile) } func TestBundleConfigureProfileDefault(t *testing.T) { @@ -164,13 +153,12 @@ func TestBundleConfigureProfileDefault(t *testing.T) { // The profile in the databricks.yml file is used cmd := emptyCommand(t) - b := setupWithProfile(t, cmd, "PROFILE-1") - client, err := b.InitializeWorkspaceClient() + err := setupWithProfile(t, cmd, "PROFILE-1") require.NoError(t, err) - assert.Equal(t, "https://a.com", client.Config.Host) - assert.Equal(t, "a", client.Config.Token) - assert.Equal(t, "PROFILE-1", client.Config.Profile) + assert.Equal(t, "https://a.com", ConfigUsed(cmd.Context()).Host) + assert.Equal(t, "a", ConfigUsed(cmd.Context()).Token) + assert.Equal(t, "PROFILE-1", ConfigUsed(cmd.Context()).Profile) } func TestBundleConfigureProfileFlag(t *testing.T) { @@ -180,13 +168,12 @@ func TestBundleConfigureProfileFlag(t *testing.T) { cmd := emptyCommand(t) err := cmd.Flag("profile").Value.Set("PROFILE-2") require.NoError(t, err) - b := setupWithProfile(t, cmd, "PROFILE-1") - client, err := b.InitializeWorkspaceClient() + err = setupWithProfile(t, cmd, "PROFILE-1") require.NoError(t, err) - assert.Equal(t, "https://a.com", client.Config.Host) - assert.Equal(t, "b", client.Config.Token) - assert.Equal(t, "PROFILE-2", client.Config.Profile) + assert.Equal(t, "https://a.com", ConfigUsed(cmd.Context()).Host) + assert.Equal(t, "b", ConfigUsed(cmd.Context()).Token) + assert.Equal(t, "PROFILE-2", ConfigUsed(cmd.Context()).Profile) } func TestBundleConfigureProfileEnvVariable(t *testing.T) { @@ -195,13 +182,12 @@ func TestBundleConfigureProfileEnvVariable(t *testing.T) { // The DATABRICKS_CONFIG_PROFILE environment variable takes precedence over the profile in the databricks.yml file t.Setenv("DATABRICKS_CONFIG_PROFILE", "PROFILE-2") cmd := emptyCommand(t) - b := setupWithProfile(t, cmd, "PROFILE-1") - client, err := b.InitializeWorkspaceClient() + err := setupWithProfile(t, cmd, "PROFILE-1") require.NoError(t, err) - assert.Equal(t, "https://a.com", client.Config.Host) - assert.Equal(t, "b", client.Config.Token) - assert.Equal(t, "PROFILE-2", client.Config.Profile) + assert.Equal(t, "https://a.com", ConfigUsed(cmd.Context()).Host) + assert.Equal(t, "b", ConfigUsed(cmd.Context()).Token) + assert.Equal(t, "PROFILE-2", ConfigUsed(cmd.Context()).Profile) } func TestBundleConfigureProfileFlagAndEnvVariable(t *testing.T) { @@ -212,13 +198,12 @@ func TestBundleConfigureProfileFlagAndEnvVariable(t *testing.T) { cmd := emptyCommand(t) err := cmd.Flag("profile").Value.Set("PROFILE-2") require.NoError(t, err) - b := setupWithProfile(t, cmd, "PROFILE-1") - client, err := b.InitializeWorkspaceClient() + err = setupWithProfile(t, cmd, "PROFILE-1") require.NoError(t, err) - assert.Equal(t, "https://a.com", client.Config.Host) - assert.Equal(t, "b", client.Config.Token) - assert.Equal(t, "PROFILE-2", client.Config.Profile) + assert.Equal(t, "https://a.com", ConfigUsed(cmd.Context()).Host) + assert.Equal(t, "b", ConfigUsed(cmd.Context()).Token) + assert.Equal(t, "PROFILE-2", ConfigUsed(cmd.Context()).Profile) } func TestTargetFlagFull(t *testing.T) { diff --git a/cmd/root/root.go b/cmd/root/root.go index 3b37d0176..04815f48b 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -6,6 +6,7 @@ import ( "fmt" "log/slog" "os" + "runtime/debug" "strings" "github.com/databricks/cli/internal/build" @@ -96,11 +97,35 @@ func flagErrorFunc(c *cobra.Command, err error) error { // Execute adds all child commands to the root command and sets flags appropriately. // This is called by main.main(). It only needs to happen once to the rootCmd. -func Execute(ctx context.Context, cmd *cobra.Command) error { - // TODO: deferred panic recovery +func Execute(ctx context.Context, cmd *cobra.Command) (err error) { + defer func() { + r := recover() + + // No panic. Return normally. + if r == nil { + return + } + + version := build.GetInfo().Version + trace := debug.Stack() + + // Set the error so that the CLI exits with a non-zero exit code. + err = fmt.Errorf("panic: %v", r) + + fmt.Fprintf(cmd.ErrOrStderr(), `The Databricks CLI unexpectedly had a fatal error. +Please report this issue to Databricks in the form of a GitHub issue at: +https://github.com/databricks/cli + +CLI Version: %s + +Panic Payload: %v + +Stack Trace: +%s`, version, r, string(trace)) + }() // Run the command - cmd, err := cmd.ExecuteContextC(ctx) + cmd, err = cmd.ExecuteContextC(ctx) if err != nil && !errors.Is(err, ErrAlreadyPrinted) { // If cmdio logger initialization succeeds, then this function logs with the // initialized cmdio logger, otherwise with the default cmdio logger @@ -114,10 +139,15 @@ func Execute(ctx context.Context, cmd *cobra.Command) error { if err == nil { logger.Info("completed execution", slog.String("exit_code", "0")) - } else { - logger.Error("failed execution", + } else if errors.Is(err, ErrAlreadyPrinted) { + logger.Debug("failed execution", slog.String("exit_code", "1"), - slog.String("error", err.Error())) + ) + } else { + logger.Info("failed execution", + slog.String("exit_code", "1"), + slog.String("error", err.Error()), + ) } } diff --git a/cmd/selftest/panic.go b/cmd/selftest/panic.go new file mode 100644 index 000000000..58d8b24e5 --- /dev/null +++ b/cmd/selftest/panic.go @@ -0,0 +1,12 @@ +package selftest + +import "github.com/spf13/cobra" + +func newPanic() *cobra.Command { + return &cobra.Command{ + Use: "panic", + Run: func(cmd *cobra.Command, args []string) { + panic("the databricks selftest panic command always panics") + }, + } +} diff --git a/cmd/selftest/selftest.go b/cmd/selftest/selftest.go new file mode 100644 index 000000000..7d8cfcb76 --- /dev/null +++ b/cmd/selftest/selftest.go @@ -0,0 +1,16 @@ +package selftest + +import ( + "github.com/spf13/cobra" +) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "selftest", + Short: "Non functional CLI commands that are useful for testing", + Hidden: true, + } + + cmd.AddCommand(newPanic()) + return cmd +} diff --git a/cmd/workspace/access-control/access-control.go b/cmd/workspace/access-control/access-control.go new file mode 100755 index 000000000..7668265fb --- /dev/null +++ b/cmd/workspace/access-control/access-control.go @@ -0,0 +1,109 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package access_control + +import ( + "fmt" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "access-control", + Short: `Rule based Access Control for Databricks Resources.`, + Long: `Rule based Access Control for Databricks Resources.`, + GroupID: "iam", + Annotations: map[string]string{ + "package": "iam", + }, + + // This service is being previewed; hide from help output. + Hidden: true, + } + + // Add methods + cmd.AddCommand(newCheckPolicy()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start check-policy command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var checkPolicyOverrides []func( + *cobra.Command, + *iam.CheckPolicyRequest, +) + +func newCheckPolicy() *cobra.Command { + cmd := &cobra.Command{} + + var checkPolicyReq iam.CheckPolicyRequest + var checkPolicyJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(&checkPolicyJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + // TODO: complex arg: resource_info + + cmd.Use = "check-policy" + cmd.Short = `Check access policy to a resource.` + cmd.Long = `Check access policy to a resource.` + + cmd.Annotations = make(map[string]string) + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + if cmd.Flags().Changed("json") { + diags := checkPolicyJson.Unmarshal(&checkPolicyReq) + if diags.HasError() { + return diags.Error() + } + if len(diags) > 0 { + err := cmdio.RenderDiagnosticsToErrorOut(ctx, diags) + if err != nil { + return err + } + } + } else { + return fmt.Errorf("please provide command input in JSON format by specifying the --json flag") + } + + response, err := w.AccessControl.CheckPolicy(ctx, checkPolicyReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range checkPolicyOverrides { + fn(cmd, &checkPolicyReq) + } + + return cmd +} + +// end service AccessControl diff --git a/cmd/workspace/alerts/alerts.go b/cmd/workspace/alerts/alerts.go index fcf18652b..79467c405 100755 --- a/cmd/workspace/alerts/alerts.go +++ b/cmd/workspace/alerts/alerts.go @@ -335,10 +335,17 @@ func newUpdate() *cobra.Command { Arguments: ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + UPDATE_MASK: The field mask must be a single string, with multiple fields separated by + commas (no spaces). The field path is relative to the resource object, + using a dot (.) to navigate sub-fields (e.g., author.given_name). + Specification of elements in sequence or map fields is not allowed, as + only the entire collection field can be specified. Field names must + exactly match the resource field names. + + A field mask of * indicates full replacement. It’s recommended to + always explicitly list the fields being updated and avoid using * + wildcards, as it can lead to unintended results if the API changes in the + future.` cmd.Annotations = make(map[string]string) diff --git a/cmd/workspace/apps/apps.go b/cmd/workspace/apps/apps.go index a103ba7a8..6eb85d873 100755 --- a/cmd/workspace/apps/apps.go +++ b/cmd/workspace/apps/apps.go @@ -78,6 +78,7 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().BoolVar(&createReq.NoCompute, "no-compute", createReq.NoCompute, `If true, the app will not be started after creation.`) // TODO: complex arg: active_deployment // TODO: complex arg: app_status // TODO: complex arg: compute_status @@ -955,13 +956,6 @@ func newUpdate() *cobra.Command { cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - if cmd.Flags().Changed("json") { - err := root.ExactArgs(0)(cmd, args) - if err != nil { - return fmt.Errorf("when --json flag is specified, no positional arguments are required. Provide 'name' in your JSON input") - } - return nil - } check := root.ExactArgs(1) return check(cmd, args) } diff --git a/cmd/workspace/apps/overrides.go b/cmd/workspace/apps/overrides.go new file mode 100644 index 000000000..e14068717 --- /dev/null +++ b/cmd/workspace/apps/overrides.go @@ -0,0 +1,28 @@ +package apps + +import ( + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/spf13/cobra" +) + +func listOverride(listCmd *cobra.Command, listReq *apps.ListAppsRequest) { + listCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` + {{header "Name"}} {{header "Url"}} {{header "ComputeStatus"}} {{header "DeploymentStatus"}}`) + listCmd.Annotations["template"] = cmdio.Heredoc(` + {{range .}}{{.Name | green}} {{.Url}} {{if .ComputeStatus}}{{if eq .ComputeStatus.State "ACTIVE"}}{{green "%s" .ComputeStatus.State }}{{else}}{{blue "%s" .ComputeStatus.State}}{{end}}{{end}} {{if .ActiveDeployment}}{{if eq .ActiveDeployment.Status.State "SUCCEEDED"}}{{green "%s" .ActiveDeployment.Status.State }}{{else}}{{blue "%s" .ActiveDeployment.Status.State}}{{end}}{{end}} + {{end}}`) +} + +func listDeploymentsOverride(listDeploymentsCmd *cobra.Command, listDeploymentsReq *apps.ListAppDeploymentsRequest) { + listDeploymentsCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` + {{header "DeploymentId"}} {{header "State"}} {{header "CreatedAt"}}`) + listDeploymentsCmd.Annotations["template"] = cmdio.Heredoc(` + {{range .}}{{.DeploymentId}} {{if eq .Status.State "SUCCEEDED"}}{{green "%s" .Status.State }}{{else}}{{blue "%s" .Status.State}}{{end}} {{.CreateTime}} + {{end}}`) +} + +func init() { + listOverrides = append(listOverrides, listOverride) + listDeploymentsOverrides = append(listDeploymentsOverrides, listDeploymentsOverride) +} diff --git a/cmd/workspace/catalogs/catalogs.go b/cmd/workspace/catalogs/catalogs.go index 9294c192b..ce37b6d54 100755 --- a/cmd/workspace/catalogs/catalogs.go +++ b/cmd/workspace/catalogs/catalogs.go @@ -342,6 +342,7 @@ func newUpdate() *cobra.Command { cmd.Flags().Var(&updateReq.EnablePredictiveOptimization, "enable-predictive-optimization", `Whether predictive optimization should be enabled for this object and objects under it. Supported values: [DISABLE, ENABLE, INHERIT]`) cmd.Flags().Var(&updateReq.IsolationMode, "isolation-mode", `Whether the current securable is accessible from all workspaces or a specific set of workspaces. Supported values: [ISOLATED, OPEN]`) cmd.Flags().StringVar(&updateReq.NewName, "new-name", updateReq.NewName, `New name for the catalog.`) + // TODO: map via StringToStringVar: options cmd.Flags().StringVar(&updateReq.Owner, "owner", updateReq.Owner, `Username of current owner of catalog.`) // TODO: map via StringToStringVar: properties diff --git a/cmd/workspace/clean-rooms/clean-rooms.go b/cmd/workspace/clean-rooms/clean-rooms.go index 053e41e8a..4fe61d56b 100755 --- a/cmd/workspace/clean-rooms/clean-rooms.go +++ b/cmd/workspace/clean-rooms/clean-rooms.go @@ -75,8 +75,9 @@ func newCreate() *cobra.Command { Create a new clean room with the specified collaborators. This method is asynchronous; the returned name field inside the clean_room field can be used to poll the clean room status, using the :method:cleanrooms/get method. When - this method returns, the cluster will be in a PROVISIONING state. The cluster - will be usable once it enters an ACTIVE state. + this method returns, the clean room will be in a PROVISIONING state, with only + name, owner, comment, created_at and status populated. The clean room will be + usable once it enters an ACTIVE state. The caller must be a metastore admin or have the **CREATE_CLEAN_ROOM** privilege on the metastore.` diff --git a/cmd/workspace/cmd.go b/cmd/workspace/cmd.go index f07d0cf76..2bd3c59a5 100755 --- a/cmd/workspace/cmd.go +++ b/cmd/workspace/cmd.go @@ -3,6 +3,7 @@ package workspace import ( + access_control "github.com/databricks/cli/cmd/workspace/access-control" alerts "github.com/databricks/cli/cmd/workspace/alerts" alerts_legacy "github.com/databricks/cli/cmd/workspace/alerts-legacy" apps "github.com/databricks/cli/cmd/workspace/apps" @@ -38,6 +39,7 @@ import ( ip_access_lists "github.com/databricks/cli/cmd/workspace/ip-access-lists" jobs "github.com/databricks/cli/cmd/workspace/jobs" lakeview "github.com/databricks/cli/cmd/workspace/lakeview" + lakeview_embedded "github.com/databricks/cli/cmd/workspace/lakeview-embedded" libraries "github.com/databricks/cli/cmd/workspace/libraries" metastores "github.com/databricks/cli/cmd/workspace/metastores" model_registry "github.com/databricks/cli/cmd/workspace/model-registry" @@ -61,11 +63,13 @@ import ( quality_monitors "github.com/databricks/cli/cmd/workspace/quality-monitors" queries "github.com/databricks/cli/cmd/workspace/queries" queries_legacy "github.com/databricks/cli/cmd/workspace/queries-legacy" + query_execution "github.com/databricks/cli/cmd/workspace/query-execution" query_history "github.com/databricks/cli/cmd/workspace/query-history" query_visualizations "github.com/databricks/cli/cmd/workspace/query-visualizations" query_visualizations_legacy "github.com/databricks/cli/cmd/workspace/query-visualizations-legacy" recipient_activation "github.com/databricks/cli/cmd/workspace/recipient-activation" recipients "github.com/databricks/cli/cmd/workspace/recipients" + redash_config "github.com/databricks/cli/cmd/workspace/redash-config" registered_models "github.com/databricks/cli/cmd/workspace/registered-models" repos "github.com/databricks/cli/cmd/workspace/repos" resource_quotas "github.com/databricks/cli/cmd/workspace/resource-quotas" @@ -96,6 +100,7 @@ import ( func All() []*cobra.Command { var out []*cobra.Command + out = append(out, access_control.New()) out = append(out, alerts.New()) out = append(out, alerts_legacy.New()) out = append(out, apps.New()) @@ -131,6 +136,7 @@ func All() []*cobra.Command { out = append(out, ip_access_lists.New()) out = append(out, jobs.New()) out = append(out, lakeview.New()) + out = append(out, lakeview_embedded.New()) out = append(out, libraries.New()) out = append(out, metastores.New()) out = append(out, model_registry.New()) @@ -154,11 +160,13 @@ func All() []*cobra.Command { out = append(out, quality_monitors.New()) out = append(out, queries.New()) out = append(out, queries_legacy.New()) + out = append(out, query_execution.New()) out = append(out, query_history.New()) out = append(out, query_visualizations.New()) out = append(out, query_visualizations_legacy.New()) out = append(out, recipient_activation.New()) out = append(out, recipients.New()) + out = append(out, redash_config.New()) out = append(out, registered_models.New()) out = append(out, repos.New()) out = append(out, resource_quotas.New()) diff --git a/cmd/workspace/genie/genie.go b/cmd/workspace/genie/genie.go index 25fa9396d..99841637a 100755 --- a/cmd/workspace/genie/genie.go +++ b/cmd/workspace/genie/genie.go @@ -40,6 +40,7 @@ func New() *cobra.Command { cmd.AddCommand(newExecuteMessageQuery()) cmd.AddCommand(newGetMessage()) cmd.AddCommand(newGetMessageQueryResult()) + cmd.AddCommand(newGetMessageQueryResultByAttachment()) cmd.AddCommand(newStartConversation()) // Apply optional overrides to this command. @@ -344,6 +345,71 @@ func newGetMessageQueryResult() *cobra.Command { return cmd } +// start get-message-query-result-by-attachment command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getMessageQueryResultByAttachmentOverrides []func( + *cobra.Command, + *dashboards.GenieGetQueryResultByAttachmentRequest, +) + +func newGetMessageQueryResultByAttachment() *cobra.Command { + cmd := &cobra.Command{} + + var getMessageQueryResultByAttachmentReq dashboards.GenieGetQueryResultByAttachmentRequest + + // TODO: short flags + + cmd.Use = "get-message-query-result-by-attachment SPACE_ID CONVERSATION_ID MESSAGE_ID ATTACHMENT_ID" + cmd.Short = `Get conversation message SQL query result by attachment id.` + cmd.Long = `Get conversation message SQL query result by attachment id. + + Get the result of SQL query by attachment id This is only available if a + message has a query attachment and the message status is EXECUTING_QUERY. + + Arguments: + SPACE_ID: Genie space ID + CONVERSATION_ID: Conversation ID + MESSAGE_ID: Message ID + ATTACHMENT_ID: Attachment ID` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(4) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + getMessageQueryResultByAttachmentReq.SpaceId = args[0] + getMessageQueryResultByAttachmentReq.ConversationId = args[1] + getMessageQueryResultByAttachmentReq.MessageId = args[2] + getMessageQueryResultByAttachmentReq.AttachmentId = args[3] + + response, err := w.Genie.GetMessageQueryResultByAttachment(ctx, getMessageQueryResultByAttachmentReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getMessageQueryResultByAttachmentOverrides { + fn(cmd, &getMessageQueryResultByAttachmentReq) + } + + return cmd +} + // start start-conversation command // Slice with functions to override default command behavior. diff --git a/cmd/workspace/jobs/jobs.go b/cmd/workspace/jobs/jobs.go index b067937e2..0f911d400 100755 --- a/cmd/workspace/jobs/jobs.go +++ b/cmd/workspace/jobs/jobs.go @@ -625,11 +625,19 @@ func newGet() *cobra.Command { // TODO: short flags + cmd.Flags().StringVar(&getReq.PageToken, "page-token", getReq.PageToken, `Use next_page_token returned from the previous GetJob to request the next page of the job's sub-resources.`) + cmd.Use = "get JOB_ID" cmd.Short = `Get a single job.` cmd.Long = `Get a single job. Retrieves the details for a single job. + + In Jobs API 2.2, requests for a single job support pagination of tasks and + job_clusters when either exceeds 100 elements. Use the next_page_token + field to check for more results and pass its value as the page_token in + subsequent requests. Arrays with fewer than 100 elements in a page will be + empty on later pages. Arguments: JOB_ID: The canonical identifier of the job to retrieve information about. This @@ -847,13 +855,19 @@ func newGetRun() *cobra.Command { cmd.Flags().BoolVar(&getRunReq.IncludeHistory, "include-history", getRunReq.IncludeHistory, `Whether to include the repair history in the response.`) cmd.Flags().BoolVar(&getRunReq.IncludeResolvedValues, "include-resolved-values", getRunReq.IncludeResolvedValues, `Whether to include resolved parameter values in the response.`) - cmd.Flags().StringVar(&getRunReq.PageToken, "page-token", getRunReq.PageToken, `To list the next page of job tasks, set this field to the value of the next_page_token returned in the GetJob response.`) + cmd.Flags().StringVar(&getRunReq.PageToken, "page-token", getRunReq.PageToken, `Use next_page_token returned from the previous GetRun to request the next page of the run's sub-resources.`) cmd.Use = "get-run RUN_ID" cmd.Short = `Get a single job run.` cmd.Long = `Get a single job run. - Retrieve the metadata of a run. + Retrieves the metadata of a run. + + In Jobs API 2.2, requests for a single job run support pagination of tasks + and job_clusters when either exceeds 100 elements. Use the next_page_token + field to check for more results and pass its value as the page_token in + subsequent requests. Arrays with fewer than 100 elements in a page will be + empty on later pages. Arguments: RUN_ID: The canonical identifier of the run for which to retrieve the metadata. @@ -1340,6 +1354,7 @@ func newRunNow() *cobra.Command { // TODO: map via StringToStringVar: job_parameters // TODO: map via StringToStringVar: notebook_params // TODO: array: only + cmd.Flags().Var(&runNowReq.PerformanceTarget, "performance-target", `PerformanceTarget defines how performant or cost efficient the execution of run on serverless compute should be. Supported values: [COST_OPTIMIZED, PERFORMANCE_OPTIMIZED]`) // TODO: complex arg: pipeline_params // TODO: map via StringToStringVar: python_named_params // TODO: array: python_params diff --git a/cmd/workspace/lakeview-embedded/lakeview-embedded.go b/cmd/workspace/lakeview-embedded/lakeview-embedded.go new file mode 100755 index 000000000..ef04c2c13 --- /dev/null +++ b/cmd/workspace/lakeview-embedded/lakeview-embedded.go @@ -0,0 +1,98 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package lakeview_embedded + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/databricks/databricks-sdk-go/service/dashboards" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "lakeview-embedded", + Short: `Token-based Lakeview APIs for embedding dashboards in external applications.`, + Long: `Token-based Lakeview APIs for embedding dashboards in external applications.`, + GroupID: "dashboards", + Annotations: map[string]string{ + "package": "dashboards", + }, + } + + // Add methods + cmd.AddCommand(newGetPublishedDashboardEmbedded()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start get-published-dashboard-embedded command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getPublishedDashboardEmbeddedOverrides []func( + *cobra.Command, + *dashboards.GetPublishedDashboardEmbeddedRequest, +) + +func newGetPublishedDashboardEmbedded() *cobra.Command { + cmd := &cobra.Command{} + + var getPublishedDashboardEmbeddedReq dashboards.GetPublishedDashboardEmbeddedRequest + + // TODO: short flags + + cmd.Use = "get-published-dashboard-embedded DASHBOARD_ID" + cmd.Short = `Read a published dashboard in an embedded ui.` + cmd.Long = `Read a published dashboard in an embedded ui. + + Get the current published dashboard within an embedded context. + + Arguments: + DASHBOARD_ID: UUID identifying the published dashboard.` + + // This command is being previewed; hide from help output. + cmd.Hidden = true + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + getPublishedDashboardEmbeddedReq.DashboardId = args[0] + + err = w.LakeviewEmbedded.GetPublishedDashboardEmbedded(ctx, getPublishedDashboardEmbeddedReq) + if err != nil { + return err + } + return nil + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getPublishedDashboardEmbeddedOverrides { + fn(cmd, &getPublishedDashboardEmbeddedReq) + } + + return cmd +} + +// end service LakeviewEmbedded diff --git a/cmd/workspace/lakeview/lakeview.go b/cmd/workspace/lakeview/lakeview.go index 6686f16da..eb2f5d8fa 100755 --- a/cmd/workspace/lakeview/lakeview.go +++ b/cmd/workspace/lakeview/lakeview.go @@ -163,13 +163,6 @@ func newCreateSchedule() *cobra.Command { cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - if cmd.Flags().Changed("json") { - err := root.ExactArgs(0)(cmd, args) - if err != nil { - return fmt.Errorf("when --json flag is specified, no positional arguments are required. Provide 'cron_schedule' in your JSON input") - } - return nil - } check := root.ExactArgs(1) return check(cmd, args) } @@ -242,13 +235,6 @@ func newCreateSubscription() *cobra.Command { cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - if cmd.Flags().Changed("json") { - err := root.ExactArgs(0)(cmd, args) - if err != nil { - return fmt.Errorf("when --json flag is specified, no positional arguments are required. Provide 'subscriber' in your JSON input") - } - return nil - } check := root.ExactArgs(2) return check(cmd, args) } @@ -1195,13 +1181,6 @@ func newUpdateSchedule() *cobra.Command { cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - if cmd.Flags().Changed("json") { - err := root.ExactArgs(0)(cmd, args) - if err != nil { - return fmt.Errorf("when --json flag is specified, no positional arguments are required. Provide 'cron_schedule' in your JSON input") - } - return nil - } check := root.ExactArgs(2) return check(cmd, args) } diff --git a/cmd/workspace/pipelines/pipelines.go b/cmd/workspace/pipelines/pipelines.go index 38636e83b..e94d4c5a8 100755 --- a/cmd/workspace/pipelines/pipelines.go +++ b/cmd/workspace/pipelines/pipelines.go @@ -974,6 +974,7 @@ func newUpdate() *cobra.Command { cmd.Flags().BoolVar(&updateReq.Photon, "photon", updateReq.Photon, `Whether Photon is enabled for this pipeline.`) cmd.Flags().StringVar(&updateReq.PipelineId, "pipeline-id", updateReq.PipelineId, `Unique identifier for this pipeline.`) // TODO: complex arg: restart_window + // TODO: complex arg: run_as cmd.Flags().StringVar(&updateReq.Schema, "schema", updateReq.Schema, `The default schema (database) where tables are read from or published to.`) cmd.Flags().BoolVar(&updateReq.Serverless, "serverless", updateReq.Serverless, `Whether serverless compute is enabled for this pipeline.`) cmd.Flags().StringVar(&updateReq.Storage, "storage", updateReq.Storage, `DBFS root directory for storing checkpoints and tables.`) diff --git a/cmd/workspace/providers/providers.go b/cmd/workspace/providers/providers.go index 504beac5e..4d6262cff 100755 --- a/cmd/workspace/providers/providers.go +++ b/cmd/workspace/providers/providers.go @@ -64,7 +64,7 @@ func newCreate() *cobra.Command { cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) cmd.Flags().StringVar(&createReq.Comment, "comment", createReq.Comment, `Description about the provider.`) - cmd.Flags().StringVar(&createReq.RecipientProfileStr, "recipient-profile-str", createReq.RecipientProfileStr, `This field is required when the __authentication_type__ is **TOKEN** or not provided.`) + cmd.Flags().StringVar(&createReq.RecipientProfileStr, "recipient-profile-str", createReq.RecipientProfileStr, `This field is required when the __authentication_type__ is **TOKEN**, **OAUTH_CLIENT_CREDENTIALS** or not provided.`) cmd.Use = "create NAME AUTHENTICATION_TYPE" cmd.Short = `Create an auth provider.` @@ -430,7 +430,7 @@ func newUpdate() *cobra.Command { cmd.Flags().StringVar(&updateReq.Comment, "comment", updateReq.Comment, `Description about the provider.`) cmd.Flags().StringVar(&updateReq.NewName, "new-name", updateReq.NewName, `New name for the provider.`) cmd.Flags().StringVar(&updateReq.Owner, "owner", updateReq.Owner, `Username of Provider owner.`) - cmd.Flags().StringVar(&updateReq.RecipientProfileStr, "recipient-profile-str", updateReq.RecipientProfileStr, `This field is required when the __authentication_type__ is **TOKEN** or not provided.`) + cmd.Flags().StringVar(&updateReq.RecipientProfileStr, "recipient-profile-str", updateReq.RecipientProfileStr, `This field is required when the __authentication_type__ is **TOKEN**, **OAUTH_CLIENT_CREDENTIALS** or not provided.`) cmd.Use = "update NAME" cmd.Short = `Update a provider.` diff --git a/cmd/workspace/queries/queries.go b/cmd/workspace/queries/queries.go index 208f887da..bf74bb3f5 100755 --- a/cmd/workspace/queries/queries.go +++ b/cmd/workspace/queries/queries.go @@ -406,10 +406,17 @@ func newUpdate() *cobra.Command { Arguments: ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + UPDATE_MASK: The field mask must be a single string, with multiple fields separated by + commas (no spaces). The field path is relative to the resource object, + using a dot (.) to navigate sub-fields (e.g., author.given_name). + Specification of elements in sequence or map fields is not allowed, as + only the entire collection field can be specified. Field names must + exactly match the resource field names. + + A field mask of * indicates full replacement. It’s recommended to + always explicitly list the fields being updated and avoid using * + wildcards, as it can lead to unintended results if the API changes in the + future.` cmd.Annotations = make(map[string]string) diff --git a/cmd/workspace/query-execution/query-execution.go b/cmd/workspace/query-execution/query-execution.go new file mode 100755 index 000000000..ebbb90f89 --- /dev/null +++ b/cmd/workspace/query-execution/query-execution.go @@ -0,0 +1,245 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package query_execution + +import ( + "fmt" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + "github.com/databricks/databricks-sdk-go/service/dashboards" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "query-execution", + Short: `Query execution APIs for AI / BI Dashboards.`, + Long: `Query execution APIs for AI / BI Dashboards`, + GroupID: "dashboards", + Annotations: map[string]string{ + "package": "dashboards", + }, + + // This service is being previewed; hide from help output. + Hidden: true, + } + + // Add methods + cmd.AddCommand(newCancelPublishedQueryExecution()) + cmd.AddCommand(newExecutePublishedDashboardQuery()) + cmd.AddCommand(newPollPublishedQueryStatus()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start cancel-published-query-execution command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cancelPublishedQueryExecutionOverrides []func( + *cobra.Command, + *dashboards.CancelPublishedQueryExecutionRequest, +) + +func newCancelPublishedQueryExecution() *cobra.Command { + cmd := &cobra.Command{} + + var cancelPublishedQueryExecutionReq dashboards.CancelPublishedQueryExecutionRequest + + // TODO: short flags + + // TODO: array: tokens + + cmd.Use = "cancel-published-query-execution DASHBOARD_NAME DASHBOARD_REVISION_ID" + cmd.Short = `Cancel the results for the a query for a published, embedded dashboard.` + cmd.Long = `Cancel the results for the a query for a published, embedded dashboard.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(2) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + cancelPublishedQueryExecutionReq.DashboardName = args[0] + cancelPublishedQueryExecutionReq.DashboardRevisionId = args[1] + + response, err := w.QueryExecution.CancelPublishedQueryExecution(ctx, cancelPublishedQueryExecutionReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range cancelPublishedQueryExecutionOverrides { + fn(cmd, &cancelPublishedQueryExecutionReq) + } + + return cmd +} + +// start execute-published-dashboard-query command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var executePublishedDashboardQueryOverrides []func( + *cobra.Command, + *dashboards.ExecutePublishedDashboardQueryRequest, +) + +func newExecutePublishedDashboardQuery() *cobra.Command { + cmd := &cobra.Command{} + + var executePublishedDashboardQueryReq dashboards.ExecutePublishedDashboardQueryRequest + var executePublishedDashboardQueryJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(&executePublishedDashboardQueryJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + cmd.Flags().StringVar(&executePublishedDashboardQueryReq.OverrideWarehouseId, "override-warehouse-id", executePublishedDashboardQueryReq.OverrideWarehouseId, `A dashboard schedule can override the warehouse used as compute for processing the published dashboard queries.`) + + cmd.Use = "execute-published-dashboard-query DASHBOARD_NAME DASHBOARD_REVISION_ID" + cmd.Short = `Execute a query for a published dashboard.` + cmd.Long = `Execute a query for a published dashboard. + + Arguments: + DASHBOARD_NAME: Dashboard name and revision_id is required to retrieve + PublishedDatasetDataModel which contains the list of datasets, + warehouse_id, and embedded_credentials + DASHBOARD_REVISION_ID: ` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + if cmd.Flags().Changed("json") { + err := root.ExactArgs(0)(cmd, args) + if err != nil { + return fmt.Errorf("when --json flag is specified, no positional arguments are required. Provide 'dashboard_name', 'dashboard_revision_id' in your JSON input") + } + return nil + } + check := root.ExactArgs(2) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + if cmd.Flags().Changed("json") { + diags := executePublishedDashboardQueryJson.Unmarshal(&executePublishedDashboardQueryReq) + if diags.HasError() { + return diags.Error() + } + if len(diags) > 0 { + err := cmdio.RenderDiagnosticsToErrorOut(ctx, diags) + if err != nil { + return err + } + } + } + if !cmd.Flags().Changed("json") { + executePublishedDashboardQueryReq.DashboardName = args[0] + } + if !cmd.Flags().Changed("json") { + executePublishedDashboardQueryReq.DashboardRevisionId = args[1] + } + + err = w.QueryExecution.ExecutePublishedDashboardQuery(ctx, executePublishedDashboardQueryReq) + if err != nil { + return err + } + return nil + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range executePublishedDashboardQueryOverrides { + fn(cmd, &executePublishedDashboardQueryReq) + } + + return cmd +} + +// start poll-published-query-status command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var pollPublishedQueryStatusOverrides []func( + *cobra.Command, + *dashboards.PollPublishedQueryStatusRequest, +) + +func newPollPublishedQueryStatus() *cobra.Command { + cmd := &cobra.Command{} + + var pollPublishedQueryStatusReq dashboards.PollPublishedQueryStatusRequest + + // TODO: short flags + + // TODO: array: tokens + + cmd.Use = "poll-published-query-status DASHBOARD_NAME DASHBOARD_REVISION_ID" + cmd.Short = `Poll the results for the a query for a published, embedded dashboard.` + cmd.Long = `Poll the results for the a query for a published, embedded dashboard.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(2) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + pollPublishedQueryStatusReq.DashboardName = args[0] + pollPublishedQueryStatusReq.DashboardRevisionId = args[1] + + response, err := w.QueryExecution.PollPublishedQueryStatus(ctx, pollPublishedQueryStatusReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range pollPublishedQueryStatusOverrides { + fn(cmd, &pollPublishedQueryStatusReq) + } + + return cmd +} + +// end service QueryExecution diff --git a/cmd/workspace/query-visualizations/query-visualizations.go b/cmd/workspace/query-visualizations/query-visualizations.go index 621661952..2d50229ba 100755 --- a/cmd/workspace/query-visualizations/query-visualizations.go +++ b/cmd/workspace/query-visualizations/query-visualizations.go @@ -198,10 +198,17 @@ func newUpdate() *cobra.Command { Arguments: ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + UPDATE_MASK: The field mask must be a single string, with multiple fields separated by + commas (no spaces). The field path is relative to the resource object, + using a dot (.) to navigate sub-fields (e.g., author.given_name). + Specification of elements in sequence or map fields is not allowed, as + only the entire collection field can be specified. Field names must + exactly match the resource field names. + + A field mask of * indicates full replacement. It’s recommended to + always explicitly list the fields being updated and avoid using * + wildcards, as it can lead to unintended results if the API changes in the + future.` cmd.Annotations = make(map[string]string) diff --git a/cmd/workspace/recipients/recipients.go b/cmd/workspace/recipients/recipients.go index 56abd2014..6d6ce42f1 100755 --- a/cmd/workspace/recipients/recipients.go +++ b/cmd/workspace/recipients/recipients.go @@ -91,7 +91,7 @@ func newCreate() *cobra.Command { cmd.Long = `Create a share recipient. Creates a new recipient with the delta sharing authentication type in the - metastore. The caller must be a metastore admin or has the + metastore. The caller must be a metastore admin or have the **CREATE_RECIPIENT** privilege on the metastore. Arguments: @@ -186,28 +186,16 @@ func newDelete() *cobra.Command { cmd.Annotations = make(map[string]string) + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + cmd.PreRunE = root.MustWorkspaceClient cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { ctx := cmd.Context() w := root.WorkspaceClient(ctx) - if len(args) == 0 { - promptSpinner := cmdio.Spinner(ctx) - promptSpinner <- "No NAME argument specified. Loading names for Recipients drop-down." - names, err := w.Recipients.RecipientInfoNameToMetastoreIdMap(ctx, sharing.ListRecipientsRequest{}) - close(promptSpinner) - if err != nil { - return fmt.Errorf("failed to load names for Recipients drop-down. Please manually specify required arguments. Original error: %w", err) - } - id, err := cmdio.Select(ctx, names, "Name of the recipient") - if err != nil { - return err - } - args = append(args, id) - } - if len(args) != 1 { - return fmt.Errorf("expected to have name of the recipient") - } deleteReq.Name = args[0] err = w.Recipients.Delete(ctx, deleteReq) @@ -258,28 +246,16 @@ func newGet() *cobra.Command { cmd.Annotations = make(map[string]string) + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + cmd.PreRunE = root.MustWorkspaceClient cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { ctx := cmd.Context() w := root.WorkspaceClient(ctx) - if len(args) == 0 { - promptSpinner := cmdio.Spinner(ctx) - promptSpinner <- "No NAME argument specified. Loading names for Recipients drop-down." - names, err := w.Recipients.RecipientInfoNameToMetastoreIdMap(ctx, sharing.ListRecipientsRequest{}) - close(promptSpinner) - if err != nil { - return fmt.Errorf("failed to load names for Recipients drop-down. Please manually specify required arguments. Original error: %w", err) - } - id, err := cmdio.Select(ctx, names, "Name of the recipient") - if err != nil { - return err - } - args = append(args, id) - } - if len(args) != 1 { - return fmt.Errorf("expected to have name of the recipient") - } getReq.Name = args[0] response, err := w.Recipients.Get(ctx, getReq) @@ -384,7 +360,7 @@ func newRotateToken() *cobra.Command { the provided token info. The caller must be the owner of the recipient. Arguments: - NAME: The name of the recipient. + NAME: The name of the Recipient. EXISTING_TOKEN_EXPIRE_IN_SECONDS: The expiration time of the bearer token in ISO 8601 format. This will set the expiration_time of existing token only to a smaller timestamp, it cannot extend the expiration_time. Use 0 to expire the existing token @@ -479,28 +455,16 @@ func newSharePermissions() *cobra.Command { cmd.Annotations = make(map[string]string) + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + cmd.PreRunE = root.MustWorkspaceClient cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { ctx := cmd.Context() w := root.WorkspaceClient(ctx) - if len(args) == 0 { - promptSpinner := cmdio.Spinner(ctx) - promptSpinner <- "No NAME argument specified. Loading names for Recipients drop-down." - names, err := w.Recipients.RecipientInfoNameToMetastoreIdMap(ctx, sharing.ListRecipientsRequest{}) - close(promptSpinner) - if err != nil { - return fmt.Errorf("failed to load names for Recipients drop-down. Please manually specify required arguments. Original error: %w", err) - } - id, err := cmdio.Select(ctx, names, "The name of the Recipient") - if err != nil { - return err - } - args = append(args, id) - } - if len(args) != 1 { - return fmt.Errorf("expected to have the name of the recipient") - } sharePermissionsReq.Name = args[0] response, err := w.Recipients.SharePermissions(ctx, sharePermissionsReq) @@ -560,6 +524,11 @@ func newUpdate() *cobra.Command { cmd.Annotations = make(map[string]string) + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + cmd.PreRunE = root.MustWorkspaceClient cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { ctx := cmd.Context() @@ -577,30 +546,13 @@ func newUpdate() *cobra.Command { } } } - if len(args) == 0 { - promptSpinner := cmdio.Spinner(ctx) - promptSpinner <- "No NAME argument specified. Loading names for Recipients drop-down." - names, err := w.Recipients.RecipientInfoNameToMetastoreIdMap(ctx, sharing.ListRecipientsRequest{}) - close(promptSpinner) - if err != nil { - return fmt.Errorf("failed to load names for Recipients drop-down. Please manually specify required arguments. Original error: %w", err) - } - id, err := cmdio.Select(ctx, names, "Name of the recipient") - if err != nil { - return err - } - args = append(args, id) - } - if len(args) != 1 { - return fmt.Errorf("expected to have name of the recipient") - } updateReq.Name = args[0] - err = w.Recipients.Update(ctx, updateReq) + response, err := w.Recipients.Update(ctx, updateReq) if err != nil { return err } - return nil + return cmdio.Render(ctx, response) } // Disable completions since they are not applicable. diff --git a/cmd/workspace/redash-config/redash-config.go b/cmd/workspace/redash-config/redash-config.go new file mode 100755 index 000000000..1a0f37759 --- /dev/null +++ b/cmd/workspace/redash-config/redash-config.go @@ -0,0 +1,80 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package redash_config + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "redash-config", + Short: `Redash V2 service for workspace configurations (internal).`, + Long: `Redash V2 service for workspace configurations (internal)`, + GroupID: "sql", + Annotations: map[string]string{ + "package": "sql", + }, + + // This service is being previewed; hide from help output. + Hidden: true, + } + + // Add methods + cmd.AddCommand(newGetConfig()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start get-config command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getConfigOverrides []func( + *cobra.Command, +) + +func newGetConfig() *cobra.Command { + cmd := &cobra.Command{} + + cmd.Use = "get-config" + cmd.Short = `Read workspace configuration for Redash-v2.` + cmd.Long = `Read workspace configuration for Redash-v2.` + + cmd.Annotations = make(map[string]string) + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + response, err := w.RedashConfig.GetConfig(ctx) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getConfigOverrides { + fn(cmd) + } + + return cmd +} + +// end service RedashConfig diff --git a/cmd/workspace/serving-endpoints/serving-endpoints.go b/cmd/workspace/serving-endpoints/serving-endpoints.go index cc99177c7..645111646 100755 --- a/cmd/workspace/serving-endpoints/serving-endpoints.go +++ b/cmd/workspace/serving-endpoints/serving-endpoints.go @@ -49,6 +49,7 @@ func New() *cobra.Command { cmd.AddCommand(newGetOpenApi()) cmd.AddCommand(newGetPermissionLevels()) cmd.AddCommand(newGetPermissions()) + cmd.AddCommand(newHttpRequest()) cmd.AddCommand(newList()) cmd.AddCommand(newLogs()) cmd.AddCommand(newPatch()) @@ -153,16 +154,34 @@ func newCreate() *cobra.Command { cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) // TODO: complex arg: ai_gateway + // TODO: complex arg: config // TODO: array: rate_limits cmd.Flags().BoolVar(&createReq.RouteOptimized, "route-optimized", createReq.RouteOptimized, `Enable route optimization for the serving endpoint.`) // TODO: array: tags - cmd.Use = "create" + cmd.Use = "create NAME" cmd.Short = `Create a new serving endpoint.` - cmd.Long = `Create a new serving endpoint.` + cmd.Long = `Create a new serving endpoint. + + Arguments: + NAME: The name of the serving endpoint. This field is required and must be + unique across a Databricks workspace. An endpoint name can consist of + alphanumeric characters, dashes, and underscores.` cmd.Annotations = make(map[string]string) + cmd.Args = func(cmd *cobra.Command, args []string) error { + if cmd.Flags().Changed("json") { + err := root.ExactArgs(0)(cmd, args) + if err != nil { + return fmt.Errorf("when --json flag is specified, no positional arguments are required. Provide 'name' in your JSON input") + } + return nil + } + check := root.ExactArgs(1) + return check(cmd, args) + } + cmd.PreRunE = root.MustWorkspaceClient cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { ctx := cmd.Context() @@ -179,8 +198,9 @@ func newCreate() *cobra.Command { return err } } - } else { - return fmt.Errorf("please provide command input in JSON format by specifying the --json flag") + } + if !cmd.Flags().Changed("json") { + createReq.Name = args[0] } wait, err := w.ServingEndpoints.Create(ctx, createReq) @@ -233,10 +253,7 @@ func newDelete() *cobra.Command { cmd.Use = "delete NAME" cmd.Short = `Delete a serving endpoint.` - cmd.Long = `Delete a serving endpoint. - - Arguments: - NAME: The name of the serving endpoint. This field is required.` + cmd.Long = `Delete a serving endpoint.` cmd.Annotations = make(map[string]string) @@ -432,11 +449,12 @@ func newGetOpenApi() *cobra.Command { getOpenApiReq.Name = args[0] - err = w.ServingEndpoints.GetOpenApi(ctx, getOpenApiReq) + response, err := w.ServingEndpoints.GetOpenApi(ctx, getOpenApiReq) if err != nil { return err } - return nil + defer response.Contents.Close() + return cmdio.Render(ctx, response.Contents) } // Disable completions since they are not applicable. @@ -568,6 +586,78 @@ func newGetPermissions() *cobra.Command { return cmd } +// start http-request command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var httpRequestOverrides []func( + *cobra.Command, + *serving.ExternalFunctionRequest, +) + +func newHttpRequest() *cobra.Command { + cmd := &cobra.Command{} + + var httpRequestReq serving.ExternalFunctionRequest + + // TODO: short flags + + cmd.Flags().StringVar(&httpRequestReq.Headers, "headers", httpRequestReq.Headers, `Additional headers for the request.`) + cmd.Flags().StringVar(&httpRequestReq.Json, "json", httpRequestReq.Json, `The JSON payload to send in the request body.`) + cmd.Flags().StringVar(&httpRequestReq.Params, "params", httpRequestReq.Params, `Query parameters for the request.`) + + cmd.Use = "http-request CONNECTION_NAME METHOD PATH" + cmd.Short = `Make external services call using the credentials stored in UC Connection.` + cmd.Long = `Make external services call using the credentials stored in UC Connection. + + Arguments: + CONNECTION_NAME: The connection name to use. This is required to identify the external + connection. + METHOD: The HTTP method to use (e.g., 'GET', 'POST'). + PATH: The relative path for the API endpoint. This is required.` + + // This command is being previewed; hide from help output. + cmd.Hidden = true + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(3) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + httpRequestReq.ConnectionName = args[0] + _, err = fmt.Sscan(args[1], &httpRequestReq.Method) + if err != nil { + return fmt.Errorf("invalid METHOD: %s", args[1]) + } + httpRequestReq.Path = args[2] + + response, err := w.ServingEndpoints.HttpRequest(ctx, httpRequestReq) + if err != nil { + return err + } + defer response.Contents.Close() + return cmdio.Render(ctx, response.Contents) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range httpRequestOverrides { + fn(cmd, &httpRequestReq) + } + + return cmd +} + // start list command // Slice with functions to override default command behavior. @@ -849,7 +939,7 @@ func newPutAiGateway() *cobra.Command { cmd.Long = `Update AI Gateway of a serving endpoint. Used to update the AI Gateway of a serving endpoint. NOTE: Only external model - endpoints are currently supported. + and provisioned throughput endpoints are currently supported. Arguments: NAME: The name of the serving endpoint whose AI Gateway is being updated. This diff --git a/cmd/workspace/shares/shares.go b/cmd/workspace/shares/shares.go index f70963f29..62c3407f4 100755 --- a/cmd/workspace/shares/shares.go +++ b/cmd/workspace/shares/shares.go @@ -391,6 +391,7 @@ func newUpdate() *cobra.Command { cmd.Flags().StringVar(&updateReq.Comment, "comment", updateReq.Comment, `User-provided free-form text description.`) cmd.Flags().StringVar(&updateReq.NewName, "new-name", updateReq.NewName, `New name for the share.`) + cmd.Flags().StringVar(&updateReq.Owner, "owner", updateReq.Owner, `Username of current owner of share.`) cmd.Flags().StringVar(&updateReq.StorageRoot, "storage-root", updateReq.StorageRoot, `Storage root URL for the share.`) // TODO: array: updates diff --git a/go.mod b/go.mod index ed2ff12ad..6e3c51e79 100644 --- a/go.mod +++ b/go.mod @@ -5,31 +5,35 @@ go 1.23 toolchain go1.23.4 require ( + dario.cat/mergo v1.0.1 // BSD 3-Clause + github.com/BurntSushi/toml v1.4.0 // MIT github.com/Masterminds/semver/v3 v3.3.1 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 - github.com/databricks/databricks-sdk-go v0.54.0 // Apache 2.0 + github.com/databricks/databricks-sdk-go v0.58.1 // Apache 2.0 github.com/fatih/color v1.18.0 // MIT github.com/google/uuid v1.6.0 // BSD-3-Clause + github.com/gorilla/mux v1.8.1 // BSD 3-Clause github.com/hashicorp/go-version v1.7.0 // MPL 2.0 github.com/hashicorp/hc-install v0.9.1 // MPL 2.0 - github.com/hashicorp/terraform-exec v0.21.0 // MPL 2.0 - github.com/hashicorp/terraform-json v0.23.0 // MPL 2.0 + github.com/hashicorp/terraform-exec v0.22.0 // MPL 2.0 + github.com/hashicorp/terraform-json v0.24.0 // MPL 2.0 github.com/hexops/gotextdiff v1.0.3 // BSD 3-Clause "New" or "Revised" License github.com/manifoldco/promptui v0.9.0 // BSD-3-Clause github.com/mattn/go-isatty v0.0.20 // MIT github.com/nwidger/jsoncolor v0.3.2 // MIT github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // BSD-2-Clause github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 // MIT - github.com/spf13/cobra v1.8.1 // Apache 2.0 - github.com/spf13/pflag v1.0.5 // BSD-3-Clause + github.com/spf13/cobra v1.9.1 // Apache 2.0 + github.com/spf13/pflag v1.0.6 // BSD-3-Clause github.com/stretchr/testify v1.10.0 // MIT github.com/wI2L/jsondiff v0.6.1 // MIT golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 - golang.org/x/mod v0.22.0 - golang.org/x/oauth2 v0.25.0 - golang.org/x/sync v0.10.0 - golang.org/x/term v0.28.0 - golang.org/x/text v0.21.0 + golang.org/x/mod v0.23.0 + golang.org/x/oauth2 v0.26.0 + golang.org/x/sync v0.11.0 + golang.org/x/sys v0.30.0 + golang.org/x/term v0.29.0 + golang.org/x/text v0.22.0 gopkg.in/ini.v1 v1.67.0 // Apache 2.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -61,7 +65,7 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.1 // indirect github.com/tidwall/sjson v1.2.5 // indirect - github.com/zclconf/go-cty v1.15.0 // indirect + github.com/zclconf/go-cty v1.16.1 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect go.opentelemetry.io/otel v1.24.0 // indirect @@ -69,7 +73,6 @@ require ( go.opentelemetry.io/otel/trace v1.24.0 // indirect golang.org/x/crypto v0.31.0 // indirect golang.org/x/net v0.33.0 // indirect - golang.org/x/sys v0.29.0 // indirect golang.org/x/time v0.5.0 // indirect google.golang.org/api v0.182.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240521202816-d264139d666e // indirect diff --git a/go.sum b/go.sum index 2b9290b71..2caabeb95 100644 --- a/go.sum +++ b/go.sum @@ -5,9 +5,11 @@ cloud.google.com/go/auth/oauth2adapt v0.2.2 h1:+TTV8aXpjeChS9M+aTtN/TjdQnzJvmzKF cloud.google.com/go/auth/oauth2adapt v0.2.2/go.mod h1:wcYjgpZI9+Yu7LyYBg4pqSiaRkfEK3GQcpb7C/uyF1Q= cloud.google.com/go/compute/metadata v0.3.0 h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc= cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= -dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= -dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= +dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= +dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0= +github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/Masterminds/semver/v3 v3.3.1 h1:QtNSWtVZ3nBfk8mAOu/B6v7FMJ+NHTIgUPi7rj+4nv4= github.com/Masterminds/semver/v3 v3.3.1/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= @@ -29,11 +31,11 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cloudflare/circl v1.3.7 h1:qlCDlTPz2n9fu58M0Nh1J/JzcFpfgkFHHX3O35r5vcU= github.com/cloudflare/circl v1.3.7/go.mod h1:sRTcRWXGLrKw6yIGJ+l7amYJFfAXbZG0kBSc8r4zxgA= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/cyphar/filepath-securejoin v0.2.5 h1:6iR5tXJ/e6tJZzzdMc1km3Sa7RRIVBKAK32O2s7AYfo= github.com/cyphar/filepath-securejoin v0.2.5/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/databricks/databricks-sdk-go v0.54.0 h1:L8gsA3NXs+uYU3QtW/OUgjxMQxOH24k0MT9JhB3zLlM= -github.com/databricks/databricks-sdk-go v0.54.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= +github.com/databricks/databricks-sdk-go v0.58.1 h1:dUs9ZmFi7hYiL3NwLSAbxqQu66E3BzwM8EU/wcCTJ10= +github.com/databricks/databricks-sdk-go v0.58.1/go.mod h1:JpLizplEs+up9/Z4Xf2x++o3sM9eTTWFGzIXAptKJzI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -95,6 +97,8 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfF github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw4Z96qg= github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI= +github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= +github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= @@ -105,10 +109,10 @@ github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKe github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/hc-install v0.9.1 h1:gkqTfE3vVbafGQo6VZXcy2v5yoz2bE0+nhZXruCuODQ= github.com/hashicorp/hc-install v0.9.1/go.mod h1:pWWvN/IrfeBK4XPeXXYkL6EjMufHkCK5DvwxeLKuBf0= -github.com/hashicorp/terraform-exec v0.21.0 h1:uNkLAe95ey5Uux6KJdua6+cv8asgILFVWkd/RG0D2XQ= -github.com/hashicorp/terraform-exec v0.21.0/go.mod h1:1PPeMYou+KDUSSeRE9szMZ/oHf4fYUmB923Wzbq1ICg= -github.com/hashicorp/terraform-json v0.23.0 h1:sniCkExU4iKtTADReHzACkk8fnpQXrdD2xoR+lppBkI= -github.com/hashicorp/terraform-json v0.23.0/go.mod h1:MHdXbBAbSg0GvzuWazEGKAn/cyNfIB7mN6y7KJN6y2c= +github.com/hashicorp/terraform-exec v0.22.0 h1:G5+4Sz6jYZfRYUCg6eQgDsqTzkNXV+fP8l+uRmZHj64= +github.com/hashicorp/terraform-exec v0.22.0/go.mod h1:bjVbsncaeh8jVdhttWYZuBGj21FcYw6Ia/XfHcNO7lQ= +github.com/hashicorp/terraform-json v0.24.0 h1:rUiyF+x1kYawXeRth6fKFm/MdfBS6+lW4NbeATsYz8Q= +github.com/hashicorp/terraform-json v0.24.0/go.mod h1:Nfj5ubo9xbu9uiAoZVBsNOjvNKB66Oyrvtit74kC7ow= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -143,10 +147,10 @@ github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/skeema/knownhosts v1.3.0 h1:AM+y0rI04VksttfwjkSTNQorvGqmwATnvnAHpSgc0LY= github.com/skeema/knownhosts v1.3.0/go.mod h1:sPINvnADmT/qYH1kfv+ePMmOBTH6Tbl7b5LvTDjFK7M= -github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= -github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= +github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -172,8 +176,8 @@ github.com/wI2L/jsondiff v0.6.1 h1:ISZb9oNWbP64LHnu4AUhsMF5W0FIj5Ok3Krip9Shqpw= github.com/wI2L/jsondiff v0.6.1/go.mod h1:KAEIojdQq66oJiHhDyQez2x+sRit0vIzC9KeK0yizxM= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= -github.com/zclconf/go-cty v1.15.0 h1:tTCRWxsexYUmtt/wVxgDClUe+uQusuI443uL6e+5sXQ= -github.com/zclconf/go-cty v1.15.0/go.mod h1:VvMs5i0vgZdhYawQNq5kePSpLAoz8u1xvZgrPIxfnZE= +github.com/zclconf/go-cty v1.16.1 h1:a5TZEPzBFFR53udlIKApXzj8JIF4ZNQ6abH79z5R1S0= +github.com/zclconf/go-cty v1.16.1/go.mod h1:VvMs5i0vgZdhYawQNq5kePSpLAoz8u1xvZgrPIxfnZE= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 h1:4Pp6oUg3+e/6M4C0A/3kJ2VYa++dsWVTtGgLVj5xtHg= @@ -196,8 +200,8 @@ golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= -golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM= +golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -207,13 +211,13 @@ golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70= -golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.26.0 h1:afQXWNNaeC4nvZ0Ed9XvCCzXM6UHJG7iCg0W4fPqSBE= +golang.org/x/oauth2 v0.26.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= -golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= +golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -224,14 +228,14 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= -golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= -golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= +golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/integration/bundle/apps_test.go b/integration/bundle/apps_test.go index f15d8aabc..12bd2fcbf 100644 --- a/integration/bundle/apps_test.go +++ b/integration/bundle/apps_test.go @@ -6,8 +6,10 @@ import ( "testing" "github.com/databricks/cli/integration/internal/acc" + "github.com/databricks/cli/internal/testcli" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/testdiff" "github.com/databricks/databricks-sdk-go/service/apps" "github.com/google/uuid" "github.com/stretchr/testify/require" @@ -16,18 +18,12 @@ import ( func TestDeployBundleWithApp(t *testing.T) { ctx, wt := acc.WorkspaceTest(t) - // TODO: should only skip app run when app can be created with no_compute option. - if testing.Short() { - t.Log("Skip the app creation and run in short mode") - return - } - if testutil.GetCloud(t) == testutil.GCP { t.Skip("Skipping test for GCP cloud because /api/2.0/apps is temporarily unavailable there.") } uniqueId := uuid.New().String() - appId := "app-%s" + uuid.New().String()[0:8] + appId := "app-" + uuid.New().String()[0:8] nodeTypeId := testutil.GetCloud(t).NodeTypeID() instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID") @@ -49,7 +45,31 @@ func TestDeployBundleWithApp(t *testing.T) { } }) - deployBundle(t, ctx, root) + ctx, replacements := testdiff.WithReplacementsMap(ctx) + replacements.Set(uniqueId, "$UNIQUE_PRJ") + + user, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + require.NotNil(t, user) + testdiff.PrepareReplacementsUser(t, replacements, *user) + testdiff.PrepareReplacementsWorkspaceClient(t, replacements, wt.W) + testdiff.PrepareReplacementsUUID(t, replacements) + testdiff.PrepareReplacementsNumber(t, replacements) + testdiff.PrepareReplacementsTemporaryDirectory(t, replacements) + + testutil.Chdir(t, root) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "validate"}, + testutil.TestData("testdata/apps/bundle_validate.txt"), + ) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "deploy", "--force-lock", "--auto-approve"}, + testutil.TestData("testdata/apps/bundle_deploy.txt"), + ) // App should exists after bundle deployment app, err := wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: appId}) @@ -80,6 +100,29 @@ env: - name: JOB_ID value: "%d"`, job.JobId)) + // Redeploy bundle with changed config env for app and confirm it's updated in app.yaml + deployBundleWithArgs(t, ctx, root, `--var="env_var_name=ANOTHER_JOB_ID"`, "--force-lock", "--auto-approve") + reader, err = wt.W.Workspace.Download(ctx, pathToAppYml) + require.NoError(t, err) + + data, err = io.ReadAll(reader) + require.NoError(t, err) + + content = string(data) + require.Contains(t, content, fmt.Sprintf(`command: + - flask + - --app + - app + - run +env: + - name: ANOTHER_JOB_ID + value: "%d"`, job.JobId)) + + if testing.Short() { + t.Log("Skip the app run in short mode") + return + } + // Try to run the app _, out := runResourceWithStderr(t, ctx, root, "test_app") require.Contains(t, out, app.Url) diff --git a/integration/bundle/artifacts_test.go b/integration/bundle/artifacts_test.go index 94b96899e..125b5febd 100644 --- a/integration/bundle/artifacts_test.go +++ b/integration/bundle/artifacts_test.go @@ -80,7 +80,7 @@ func TestUploadArtifactFileToCorrectRemotePath(t *testing.T) { }, } - diags := bundle.Apply(ctx, b, bundle.Seq(libraries.ExpandGlobReferences(), libraries.Upload())) + diags := bundle.ApplySeq(ctx, b, libraries.ExpandGlobReferences(), libraries.Upload()) require.NoError(t, diags.Error()) // The remote path attribute on the artifact file should have been set. @@ -144,7 +144,7 @@ func TestUploadArtifactFileToCorrectRemotePathWithEnvironments(t *testing.T) { }, } - diags := bundle.Apply(ctx, b, bundle.Seq(libraries.ExpandGlobReferences(), libraries.Upload())) + diags := bundle.ApplySeq(ctx, b, libraries.ExpandGlobReferences(), libraries.Upload()) require.NoError(t, diags.Error()) // The remote path attribute on the artifact file should have been set. @@ -213,7 +213,7 @@ func TestUploadArtifactFileToCorrectRemotePathForVolumes(t *testing.T) { }, } - diags := bundle.Apply(ctx, b, bundle.Seq(libraries.ExpandGlobReferences(), libraries.Upload())) + diags := bundle.ApplySeq(ctx, b, libraries.ExpandGlobReferences(), libraries.Upload()) require.NoError(t, diags.Error()) // The remote path attribute on the artifact file should have been set. diff --git a/integration/bundle/basic_test.go b/integration/bundle/basic_test.go index 79301b850..53f8e3ef6 100644 --- a/integration/bundle/basic_test.go +++ b/integration/bundle/basic_test.go @@ -6,7 +6,9 @@ import ( "testing" "github.com/databricks/cli/integration/internal/acc" + "github.com/databricks/cli/internal/testcli" "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/testdiff" "github.com/google/uuid" "github.com/stretchr/testify/require" ) @@ -35,3 +37,40 @@ func TestBasicBundleDeployWithFailOnActiveRuns(t *testing.T) { // deploy empty bundle again deployBundleWithFlags(t, ctx, root, []string{"--fail-on-active-runs"}) } + +func TestBasicBundleDeployWithDoubleUnderscoreVariables(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + nodeTypeId := testutil.GetCloud(t).NodeTypeID() + uniqueId := uuid.New().String() + root := initTestTemplate(t, ctx, "basic_with_variables", map[string]any{ + "unique_id": uniqueId, + "node_type_id": nodeTypeId, + "spark_version": defaultSparkVersion, + }) + + currentUser, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + + ctx, replacements := testdiff.WithReplacementsMap(ctx) + replacements.Set(uniqueId, "$UNIQUE_PRJ") + replacements.Set(currentUser.UserName, "$USERNAME") + + t.Cleanup(func() { + destroyBundle(t, ctx, root) + }) + + testutil.Chdir(t, root) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "validate"}, + testutil.TestData("testdata/basic_with_variables/bundle_validate.txt"), + ) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "deploy", "--force-lock", "--auto-approve"}, + testutil.TestData("testdata/basic_with_variables/bundle_deploy.txt"), + ) +} diff --git a/integration/bundle/bundles/apps/template/databricks.yml.tmpl b/integration/bundle/bundles/apps/template/databricks.yml.tmpl index 4d862a06f..e0937be71 100644 --- a/integration/bundle/bundles/apps/template/databricks.yml.tmpl +++ b/integration/bundle/bundles/apps/template/databricks.yml.tmpl @@ -4,6 +4,10 @@ bundle: workspace: root_path: "~/.bundle/{{.unique_id}}" +variables: + env_var_name: + default: "JOB_ID" + resources: apps: test_app: @@ -17,7 +21,7 @@ resources: - app - run env: - - name: JOB_ID + - name: ${var.env_var_name} value: ${resources.jobs.foo.id} resources: diff --git a/integration/bundle/bundles/basic_with_variables/databricks_template_schema.json b/integration/bundle/bundles/basic_with_variables/databricks_template_schema.json new file mode 100644 index 000000000..41a723b0f --- /dev/null +++ b/integration/bundle/bundles/basic_with_variables/databricks_template_schema.json @@ -0,0 +1,21 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for job name" + }, + "spark_version": { + "type": "string", + "description": "Spark version used for job cluster" + }, + "node_type_id": { + "type": "string", + "description": "Node type id for job cluster" + }, + "root_path": { + "type": "string", + "description": "Root path to deploy bundle to", + "default": "" + } + } +} diff --git a/integration/bundle/bundles/basic_with_variables/template/databricks.yml.tmpl b/integration/bundle/bundles/basic_with_variables/template/databricks.yml.tmpl new file mode 100644 index 000000000..cb02c9e2f --- /dev/null +++ b/integration/bundle/bundles/basic_with_variables/template/databricks.yml.tmpl @@ -0,0 +1,32 @@ +bundle: + name: basic + +workspace: + {{ if .root_path }} + root_path: "{{.root_path}}/.bundle/{{.unique_id}}" + {{ else }} + root_path: "~/.bundle/{{.unique_id}}" + {{ end }} + +variables: + task__key: # Note: the variable has double underscore + default: my_notebook_task + +resources: + jobs: + foo__bar: # Note: the resource has double underscore to check that TF provider can use such names + name: test-job-basic-{{.unique_id}} + tasks: + - task_key: ${var.task__key} + new_cluster: + num_workers: 1 + spark_version: "{{.spark_version}}" + node_type_id: "{{.node_type_id}}" + spark_python_task: + python_file: ./hello_world.py + foo: + name: test-job-basic-ref-{{.unique_id}} + tasks: + - task_key: job_task + run_job_task: + job_id: ${resources.jobs.foo__bar.id} diff --git a/integration/bundle/bundles/basic_with_variables/template/hello_world.py b/integration/bundle/bundles/basic_with_variables/template/hello_world.py new file mode 100644 index 000000000..f301245e2 --- /dev/null +++ b/integration/bundle/bundles/basic_with_variables/template/hello_world.py @@ -0,0 +1 @@ +print("Hello World!") diff --git a/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl b/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl index 4ebeb2655..4ea687cf1 100644 --- a/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl +++ b/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl @@ -9,7 +9,6 @@ variables: description: The catalog the DLT pipeline should use. default: main - resources: pipelines: foo: @@ -19,6 +18,13 @@ resources: path: ./nb.sql development: true catalog: ${var.catalog} + target: ${resources.schemas.bar.id} + + schemas: + bar: + name: test-schema-{{.unique_id}} + catalog_name: ${var.catalog} + comment: This schema was created from DABs include: - "*.yml" diff --git a/integration/bundle/helpers_test.go b/integration/bundle/helpers_test.go index a537ca351..b4f9c9086 100644 --- a/integration/bundle/helpers_test.go +++ b/integration/bundle/helpers_test.go @@ -16,7 +16,6 @@ import ( "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/env" - "github.com/databricks/cli/libs/filer" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/folders" "github.com/databricks/cli/libs/template" @@ -40,10 +39,19 @@ func initTestTemplateWithBundleRoot(t testutil.TestingT, ctx context.Context, te cmd := cmdio.NewIO(ctx, flags.OutputJSON, strings.NewReader(""), os.Stdout, os.Stderr, "", "bundles") ctx = cmdio.InContext(ctx, cmd) - out, err := filer.NewLocalClient(bundleRoot) + r := template.Resolver{ + TemplatePathOrUrl: templateRoot, + ConfigFile: configFilePath, + OutputDir: bundleRoot, + } + + tmpl, err := r.Resolve(ctx) require.NoError(t, err) - err = template.Materialize(ctx, configFilePath, os.DirFS(templateRoot), out) + defer tmpl.Reader.Cleanup(ctx) + + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) require.NoError(t, err) + return bundleRoot } diff --git a/integration/bundle/init_default_python_test.go b/integration/bundle/init_default_python_test.go index c93e6b50b..ca66491ab 100644 --- a/integration/bundle/init_default_python_test.go +++ b/integration/bundle/init_default_python_test.go @@ -5,6 +5,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "testing" "github.com/databricks/cli/integration/internal/acc" @@ -53,12 +54,16 @@ func testDefaultPython(t *testing.T, pythonVersion string) { uniqueProjectId := testutil.RandomName("") ctx, replacements := testdiff.WithReplacementsMap(ctx) replacements.Set(uniqueProjectId, "$UNIQUE_PRJ") + replacements.Set(strings.ToLower(uniqueProjectId), "$UNIQUE_PRJ") user, err := wt.W.CurrentUser.Me(ctx) require.NoError(t, err) require.NotNil(t, user) testdiff.PrepareReplacementsUser(t, replacements, *user) - testdiff.PrepareReplacements(t, replacements, wt.W) + testdiff.PrepareReplacementsWorkspaceClient(t, replacements, wt.W) + testdiff.PrepareReplacementsUUID(t, replacements) + testdiff.PrepareReplacementsNumber(t, replacements) + testdiff.PrepareReplacementsTemporaryDirectory(t, replacements) tmpDir := t.TempDir() testutil.Chdir(t, tmpDir) diff --git a/integration/bundle/testdata/apps/bundle_deploy.txt b/integration/bundle/testdata/apps/bundle_deploy.txt new file mode 100644 index 000000000..437a55596 --- /dev/null +++ b/integration/bundle/testdata/apps/bundle_deploy.txt @@ -0,0 +1,4 @@ +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/$UNIQUE_PRJ/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/integration/bundle/testdata/apps/bundle_validate.txt b/integration/bundle/testdata/apps/bundle_validate.txt new file mode 100644 index 000000000..567fafd24 --- /dev/null +++ b/integration/bundle/testdata/apps/bundle_validate.txt @@ -0,0 +1,7 @@ +Name: basic +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/$UNIQUE_PRJ + +Validation OK! diff --git a/integration/bundle/testdata/basic_with_variables/bundle_deploy.txt b/integration/bundle/testdata/basic_with_variables/bundle_deploy.txt new file mode 100644 index 000000000..211164174 --- /dev/null +++ b/integration/bundle/testdata/basic_with_variables/bundle_deploy.txt @@ -0,0 +1,4 @@ +Uploading bundle files to /Workspace/Users/$USERNAME/.bundle/$UNIQUE_PRJ/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/integration/bundle/testdata/basic_with_variables/bundle_validate.txt b/integration/bundle/testdata/basic_with_variables/bundle_validate.txt new file mode 100644 index 000000000..dc9016a0f --- /dev/null +++ b/integration/bundle/testdata/basic_with_variables/bundle_validate.txt @@ -0,0 +1,7 @@ +Name: basic +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/$UNIQUE_PRJ + +Validation OK! diff --git a/integration/bundle/testdata/default_python/bundle_deploy.txt b/integration/bundle/testdata/default_python/bundle_deploy.txt index eef0b79b3..fe1cc4fac 100644 --- a/integration/bundle/testdata/default_python/bundle_deploy.txt +++ b/integration/bundle/testdata/default_python/bundle_deploy.txt @@ -1,6 +1,6 @@ -Building project_name_$UNIQUE_PRJ... -Uploading project_name_$UNIQUE_PRJ-0.0.1+.-py3-none-any.whl... -Uploading bundle files to /Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev/files... +Building python_artifact... +Uploading project_name_$UNIQUE_PRJ-0.0.1+[NUMID].[NUMID]-py3-none-any.whl... +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/files... Deploying resources... Updating deployment state... Deployment complete! diff --git a/integration/bundle/testdata/default_python/bundle_init.txt b/integration/bundle/testdata/default_python/bundle_init.txt index 6cfc32f98..6ea0801ad 100644 --- a/integration/bundle/testdata/default_python/bundle_init.txt +++ b/integration/bundle/testdata/default_python/bundle_init.txt @@ -1,6 +1,6 @@ Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'project_name_$UNIQUE_PRJ/databricks.yml'): https://$DATABRICKS_HOST +Workspace to use (auto-detected, edit in 'project_name_$UNIQUE_PRJ/databricks.yml'): [DATABRICKS_URL] ✨ Your new project has been created in the 'project_name_$UNIQUE_PRJ' directory! diff --git a/integration/bundle/testdata/default_python/bundle_summary.txt b/integration/bundle/testdata/default_python/bundle_summary.txt index a0bcfdbc8..968009759 100644 --- a/integration/bundle/testdata/default_python/bundle_summary.txt +++ b/integration/bundle/testdata/default_python/bundle_summary.txt @@ -7,8 +7,7 @@ "exec_path": "/tmp/.../terraform" }, "git": { - "bundle_root_path": ".", - "inferred": true + "bundle_root_path": "." }, "mode": "development", "deployment": { @@ -16,65 +15,65 @@ "enabled": false } }, - "uuid": "" + "uuid": "[UUID]" }, "include": [ "resources/project_name_$UNIQUE_PRJ.job.yml", "resources/project_name_$UNIQUE_PRJ.pipeline.yml" ], "workspace": { - "host": "https://$DATABRICKS_HOST", + "host": "[DATABRICKS_URL]", "current_user": { "active": true, - "displayName": "$USERNAME", + "displayName": "[USERNAME]", "emails": [ { "primary": true, "type": "work", - "value": "$USERNAME" + "value": "[USERNAME]" } ], "groups": [ { - "$ref": "Groups/$USER.Groups[0]", + "$ref": "Groups/[USERGROUP]", "display": "team.engineering", "type": "direct", - "value": "$USER.Groups[0]" + "value": "[USERGROUP]" } ], - "id": "$USER.Id", + "id": "[USERID]", "name": { - "familyName": "$USERNAME", - "givenName": "$USERNAME" + "familyName": "[USERNAME]", + "givenName": "[USERNAME]" }, "schemas": [ "urn:ietf:params:scim:schemas:core:2.0:User", "urn:ietf:params:scim:schemas:extension:workspace:2.0:User" ], - "short_name": "$USERNAME", - "userName": "$USERNAME" + "short_name": "[USERNAME]", + "userName": "[USERNAME]" }, - "root_path": "/Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev", - "file_path": "/Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev/files", - "resource_path": "/Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev/resources", - "artifact_path": "/Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev/artifacts", - "state_path": "/Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev/state" + "root_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev", + "file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/files", + "resource_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/resources", + "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/artifacts", + "state_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/state" }, "resources": { "jobs": { "project_name_$UNIQUE_PRJ_job": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/state/metadata.json" }, "edit_mode": "UI_LOCKED", "email_notifications": { "on_failure": [ - "$USERNAME" + "[USERNAME]" ] }, "format": "MULTI_TASK", - "id": "", + "id": "[NUMID]", "job_clusters": [ { "job_cluster_key": "job_cluster", @@ -83,24 +82,25 @@ "max_workers": 4, "min_workers": 1 }, + "data_security_mode": "SINGLE_USER", "node_type_id": "i3.xlarge", "spark_version": "15.4.x-scala2.12" } } ], "max_concurrent_runs": 4, - "name": "[dev $USERNAME] project_name_$UNIQUE_PRJ_job", + "name": "[dev [USERNAME]] project_name_$UNIQUE_PRJ_job", "queue": { "enabled": true }, "tags": { - "dev": "$USERNAME" + "dev": "[USERNAME]" }, "tasks": [ { "job_cluster_key": "job_cluster", "notebook_task": { - "notebook_path": "/Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev/files/src/notebook" + "notebook_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/files/src/notebook" }, "task_key": "notebook_task" }, @@ -141,31 +141,31 @@ "unit": "DAYS" } }, - "url": "https://$DATABRICKS_HOST/jobs/?o=" + "url": "[DATABRICKS_URL]/jobs/[NUMID]?o=[NUMID]" } }, "pipelines": { "project_name_$UNIQUE_PRJ_pipeline": { "catalog": "main", "configuration": { - "bundle.sourcePath": "/Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev/files/src" + "bundle.sourcePath": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/files/src" }, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev/state/metadata.json" + "metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/state/metadata.json" }, "development": true, - "id": "", + "id": "[UUID]", "libraries": [ { "notebook": { - "path": "/Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev/files/src/dlt_pipeline" + "path": "/Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/files/src/dlt_pipeline" } } ], - "name": "[dev $USERNAME] project_name_$UNIQUE_PRJ_pipeline", + "name": "[dev [USERNAME]] project_name_$UNIQUE_PRJ_pipeline", "target": "project_name_$UNIQUE_PRJ_dev", - "url": "https://$DATABRICKS_HOST/pipelines/?o=" + "url": "[DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID]" } } }, @@ -175,12 +175,12 @@ ] }, "presets": { - "name_prefix": "[dev $USERNAME] ", + "name_prefix": "[dev [USERNAME]] ", "pipelines_development": true, "trigger_pause_status": "PAUSED", "jobs_max_concurrent_runs": 4, "tags": { - "dev": "$USERNAME" + "dev": "[USERNAME]" } } } \ No newline at end of file diff --git a/integration/bundle/testdata/default_python/bundle_validate.txt b/integration/bundle/testdata/default_python/bundle_validate.txt index 88a5fdd18..c5c62b521 100644 --- a/integration/bundle/testdata/default_python/bundle_validate.txt +++ b/integration/bundle/testdata/default_python/bundle_validate.txt @@ -1,8 +1,8 @@ Name: project_name_$UNIQUE_PRJ Target: dev Workspace: - Host: https://$DATABRICKS_HOST - User: $USERNAME - Path: /Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev + Host: [DATABRICKS_URL] + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev Validation OK! diff --git a/integration/cmd/fs/cat_test.go b/integration/cmd/fs/cat_test.go index 3e964fe6e..14ec8140e 100644 --- a/integration/cmd/fs/cat_test.go +++ b/integration/cmd/fs/cat_test.go @@ -18,13 +18,11 @@ func TestFsCat(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Write(context.Background(), "hello.txt", strings.NewReader("abcd"), filer.CreateParentDirectories) require.NoError(t, err) @@ -40,13 +38,11 @@ func TestFsCatOnADir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "dir1") require.NoError(t, err) @@ -61,13 +57,11 @@ func TestFsCatOnNonExistentFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "cat", path.Join(tmpDir, "non-existent-file")) assert.ErrorIs(t, err, fs.ErrNotExist) diff --git a/integration/cmd/fs/cp_test.go b/integration/cmd/fs/cp_test.go index 76aef7acf..6d0266555 100644 --- a/integration/cmd/fs/cp_test.go +++ b/integration/cmd/fs/cp_test.go @@ -126,14 +126,12 @@ func TestFsCpDir(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", sourceDir, targetDir, "--recursive") @@ -147,14 +145,12 @@ func TestFsCpFileToFile(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceFile(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", path.Join(sourceDir, "foo.txt"), path.Join(targetDir, "bar.txt")) @@ -168,14 +164,12 @@ func TestFsCpFileToDir(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceFile(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", path.Join(sourceDir, "foo.txt"), targetDir) @@ -205,14 +199,12 @@ func TestFsCpDirToDirFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -231,14 +223,12 @@ func TestFsCpFileToDirFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -255,14 +245,12 @@ func TestFsCpFileToFileFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -279,14 +267,12 @@ func TestFsCpDirToDirWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -303,14 +289,12 @@ func TestFsCpFileToFileWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -327,14 +311,12 @@ func TestFsCpFileToDirWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -351,13 +333,11 @@ func TestFsCpErrorsWhenSourceIsDirWithoutRecursiveFlag(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "cp", path.Join(tmpDir), path.Join(tmpDir, "foobar")) r := regexp.MustCompile("source path .* is a directory. Please specify the --recursive flag") @@ -376,14 +356,12 @@ func TestFsCpSourceIsDirectoryButTargetIsFile(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target diff --git a/integration/cmd/fs/ls_test.go b/integration/cmd/fs/ls_test.go index 25929fdf3..0f53193bf 100644 --- a/integration/cmd/fs/ls_test.go +++ b/integration/cmd/fs/ls_test.go @@ -43,13 +43,11 @@ func TestFsLs(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json") @@ -77,13 +75,11 @@ func TestFsLsWithAbsolutePaths(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json", "--absolute") @@ -111,13 +107,11 @@ func TestFsLsOnFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "ls", path.Join(tmpDir, "a", "hello.txt"), "--output=json") @@ -131,13 +125,11 @@ func TestFsLsOnEmptyDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json") assert.Equal(t, "", stderr.String()) @@ -155,13 +147,11 @@ func TestFsLsForNonexistingDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "ls", path.Join(tmpDir, "nonexistent"), "--output=json") assert.ErrorIs(t, err, fs.ErrNotExist) diff --git a/integration/cmd/fs/mkdir_test.go b/integration/cmd/fs/mkdir_test.go index eff0599a7..5cea0599c 100644 --- a/integration/cmd/fs/mkdir_test.go +++ b/integration/cmd/fs/mkdir_test.go @@ -17,13 +17,11 @@ func TestFsMkdir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a" stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "mkdir", path.Join(tmpDir, "a")) @@ -43,13 +41,11 @@ func TestFsMkdirCreatesIntermediateDirectories(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a/b/c" stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "mkdir", path.Join(tmpDir, "a", "b", "c")) @@ -81,13 +77,11 @@ func TestFsMkdirWhenDirectoryAlreadyExists(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a" err := f.Mkdir(context.Background(), "a") diff --git a/integration/cmd/fs/rm_test.go b/integration/cmd/fs/rm_test.go index 018c7920e..fc19bb5b5 100644 --- a/integration/cmd/fs/rm_test.go +++ b/integration/cmd/fs/rm_test.go @@ -17,14 +17,12 @@ func TestFsRmFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a file ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Write(context.Background(), "hello.txt", strings.NewReader("abcd"), filer.CreateParentDirectories) require.NoError(t, err) @@ -48,14 +46,12 @@ func TestFsRmEmptyDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a directory ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "a") require.NoError(t, err) @@ -79,14 +75,12 @@ func TestFsRmNonEmptyDirectory(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a directory ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "a") require.NoError(t, err) @@ -110,13 +104,11 @@ func TestFsRmForNonExistentFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) // Expect error if file does not exist _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "rm", path.Join(tmpDir, "does-not-exist")) @@ -129,13 +121,11 @@ func TestFsRmDirRecursively(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // Create a directory err := f.Mkdir(context.Background(), "a") diff --git a/integration/cmd/sync/sync_test.go b/integration/cmd/sync/sync_test.go index 632497054..88e6ed89a 100644 --- a/integration/cmd/sync/sync_test.go +++ b/integration/cmd/sync/sync_test.go @@ -158,7 +158,7 @@ func (a *syncTest) remoteFileContent(ctx context.Context, relativePath, expected var res []byte a.c.Eventually(func() bool { - err = apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, &res) + err = apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, nil, &res) require.NoError(a.t, err) actualContent := string(res) return actualContent == expectedContent diff --git a/integration/libs/filer/filer_test.go b/integration/libs/filer/filer_test.go index 21c839e1b..bc1713b30 100644 --- a/integration/libs/filer/filer_test.go +++ b/integration/libs/filer/filer_test.go @@ -128,11 +128,9 @@ func TestFilerRecursiveDelete(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() // Common tests we run across all filers to ensure consistent behavior. @@ -239,11 +237,9 @@ func TestFilerReadWrite(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() // Common tests we run across all filers to ensure consistent behavior. @@ -348,11 +344,9 @@ func TestFilerReadDir(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() commonFilerReadDirTest(t, ctx, f) diff --git a/integration/libs/locker/locker_test.go b/integration/libs/locker/locker_test.go index 524996465..93cb1ffce 100644 --- a/integration/libs/locker/locker_test.go +++ b/integration/libs/locker/locker_test.go @@ -66,9 +66,8 @@ func TestLock(t *testing.T) { } var wg sync.WaitGroup - for i := range numConcurrentLocks { + for currentIndex := range numConcurrentLocks { wg.Add(1) - currentIndex := i go func() { defer wg.Done() time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond) diff --git a/integration/libs/telemetry/telemetry_test.go b/integration/libs/telemetry/telemetry_test.go new file mode 100644 index 000000000..d329c238e --- /dev/null +++ b/integration/libs/telemetry/telemetry_test.go @@ -0,0 +1,65 @@ +package telemetry + +import ( + "encoding/json" + "testing" + "time" + + "github.com/databricks/cli/integration/internal/acc" + "github.com/databricks/cli/libs/telemetry" + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/databricks/databricks-sdk-go/client" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTelemetryEndpoint(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + w := wt.W + + apiClient, err := client.New(w.Config) + require.NoError(t, err) + + logs := []protos.FrontendLog{ + { + FrontendLogEventID: uuid.New().String(), + Entry: protos.FrontendLogEntry{ + DatabricksCliLog: protos.DatabricksCliLog{ + CliTestEvent: &protos.CliTestEvent{Name: protos.DummyCliEnumValue1}, + }, + }, + }, + { + FrontendLogEventID: uuid.New().String(), + Entry: protos.FrontendLogEntry{ + DatabricksCliLog: protos.DatabricksCliLog{ + CliTestEvent: &protos.CliTestEvent{Name: protos.DummyCliEnumValue2}, + }, + }, + }, + } + + protoLogs := make([]string, len(logs)) + for i, log := range logs { + b, err := json.Marshal(log) + require.NoError(t, err) + protoLogs[i] = string(b) + } + + reqB := telemetry.RequestBody{ + UploadTime: time.Now().UnixMilli(), + Items: []string{}, + ProtoLogs: protoLogs, + } + + respB := telemetry.ResponseBody{} + + err = apiClient.Do(ctx, "POST", "/telemetry-ext", nil, nil, reqB, &respB) + require.NoError(t, err) + + assert.Equal(t, telemetry.ResponseBody{ + Errors: []telemetry.LogError{}, + NumProtoSuccess: int64(2), + }, respB) +} diff --git a/internal/testcli/runner.go b/internal/testcli/runner.go index d32fa3947..f462f44fc 100644 --- a/internal/testcli/runner.go +++ b/internal/testcli/runner.go @@ -39,6 +39,8 @@ type Runner struct { StderrLines <-chan string errch <-chan error + + Verbose bool } func consumeLines(ctx context.Context, wg *sync.WaitGroup, r io.Reader) <-chan string { @@ -139,7 +141,9 @@ func (r *Runner) RunBackground() { go func() { err := root.Execute(ctx, cli) if err != nil { - r.Logf("Error running command: %s", err) + if r.Verbose { + r.Logf("Error running command: %s", err) + } } // Close pipes to signal EOF. @@ -154,7 +158,9 @@ func (r *Runner) RunBackground() { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(r.stdout.Bytes())) for scanner.Scan() { - r.Logf("[databricks stdout]: %s", scanner.Text()) + if r.Verbose { + r.Logf("[databricks stdout]: %s", scanner.Text()) + } } } @@ -162,7 +168,9 @@ func (r *Runner) RunBackground() { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(r.stderr.Bytes())) for scanner.Scan() { - r.Logf("[databricks stderr]: %s", scanner.Text()) + if r.Verbose { + r.Logf("[databricks stderr]: %s", scanner.Text()) + } } } @@ -196,18 +204,24 @@ func (r *Runner) Run() (bytes.Buffer, bytes.Buffer, error) { cli.SetErr(&stderr) cli.SetArgs(r.args) - r.Logf(" args: %s", strings.Join(r.args, ", ")) + if r.Verbose { + r.Logf(" args: %s", strings.Join(r.args, ", ")) + } err := root.Execute(ctx, cli) if err != nil { - r.Logf(" error: %s", err) + if r.Verbose { + r.Logf(" error: %s", err) + } } if stdout.Len() > 0 { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(stdout.Bytes())) for scanner.Scan() { - r.Logf("stdout: %s", scanner.Text()) + if r.Verbose { + r.Logf("stdout: %s", scanner.Text()) + } } } @@ -215,7 +229,9 @@ func (r *Runner) Run() (bytes.Buffer, bytes.Buffer, error) { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(stderr.Bytes())) for scanner.Scan() { - r.Logf("stderr: %s", scanner.Text()) + if r.Verbose { + r.Logf("stderr: %s", scanner.Text()) + } } } @@ -275,8 +291,9 @@ func NewRunner(t testutil.TestingT, ctx context.Context, args ...string) *Runner return &Runner{ TestingT: t, - ctx: ctx, - args: args, + ctx: ctx, + args: args, + Verbose: true, } } diff --git a/libs/auth/env.go b/libs/auth/env.go new file mode 100644 index 000000000..c58cc53e3 --- /dev/null +++ b/libs/auth/env.go @@ -0,0 +1,26 @@ +package auth + +import "github.com/databricks/databricks-sdk-go/config" + +// Env generates the authentication environment variables we need to set for +// downstream applications from the CLI to work correctly. +func Env(cfg *config.Config) map[string]string { + out := make(map[string]string) + for _, attr := range config.ConfigAttributes { + // Ignore profile so that downstream tools don't try and reload + // the profile. We know the current configuration is already valid since + // otherwise the CLI would have thrown an error when loading it. + if attr.Name == "profile" { + continue + } + if len(attr.EnvVars) == 0 { + continue + } + if attr.IsZero(cfg) { + continue + } + out[attr.EnvVars[0]] = attr.GetString(cfg) + } + + return out +} diff --git a/libs/auth/env_test.go b/libs/auth/env_test.go new file mode 100644 index 000000000..be1cfc7ac --- /dev/null +++ b/libs/auth/env_test.go @@ -0,0 +1,42 @@ +package auth + +import ( + "testing" + + "github.com/databricks/databricks-sdk-go/config" + "github.com/stretchr/testify/assert" +) + +func TestAuthEnv(t *testing.T) { + in := &config.Config{ + Profile: "thisshouldbeignored", + Host: "https://test.com", + Token: "test-token", + Password: "test-password", + MetadataServiceURL: "http://somurl.com", + + AzureUseMSI: true, + AzureTenantID: "test-tenant-id", + AzureClientID: "test-client-id", + AzureClientSecret: "test-client-secret", + + ActionsIDTokenRequestToken: "test-actions-id-token-request-token", + } + + expected := map[string]string{ + "DATABRICKS_HOST": "https://test.com", + "DATABRICKS_TOKEN": "test-token", + "DATABRICKS_PASSWORD": "test-password", + "DATABRICKS_METADATA_SERVICE_URL": "http://somurl.com", + + "ARM_USE_MSI": "true", + "ARM_TENANT_ID": "test-tenant-id", + "ARM_CLIENT_ID": "test-client-id", + "ARM_CLIENT_SECRET": "test-client-secret", + + "ACTIONS_ID_TOKEN_REQUEST_TOKEN": "test-actions-id-token-request-token", + } + + out := Env(in) + assert.Equal(t, expected, out) +} diff --git a/libs/cmdgroup/command_test.go b/libs/cmdgroup/command_test.go index 2c248f09f..20904aad0 100644 --- a/libs/cmdgroup/command_test.go +++ b/libs/cmdgroup/command_test.go @@ -41,7 +41,7 @@ func TestCommandFlagGrouping(t *testing.T) { cmd.Flags().BoolP("bool", "b", false, "Bool flag") buf := bytes.NewBuffer(nil) - cmd.SetOutput(buf) + cmd.SetOut(buf) err := cmd.Usage() require.NoError(t, err) diff --git a/libs/cmdio/io.go b/libs/cmdio/io.go index c0e9e868a..11b75157d 100644 --- a/libs/cmdio/io.go +++ b/libs/cmdio/io.go @@ -285,3 +285,14 @@ func fromContext(ctx context.Context) *cmdIO { } return io } + +// Mocks the context with a cmdio object that discards all output. +func MockDiscard(ctx context.Context) context.Context { + return InContext(ctx, &cmdIO{ + interactive: false, + outputFormat: flags.OutputText, + in: io.NopCloser(strings.NewReader("")), + out: io.Discard, + err: io.Discard, + }) +} diff --git a/libs/cmdio/logger.go b/libs/cmdio/logger.go index 7edad5bf0..48b76ce42 100644 --- a/libs/cmdio/logger.go +++ b/libs/cmdio/logger.go @@ -189,7 +189,7 @@ func (l *Logger) writeJson(event Event) { // we panic because there we cannot catch this in jobs.RunNowAndWait panic(err) } - _, _ = l.Writer.Write([]byte(b)) + _, _ = l.Writer.Write(b) _, _ = l.Writer.Write([]byte("\n")) } diff --git a/libs/daemon/daemon.go b/libs/daemon/daemon.go new file mode 100644 index 000000000..7ab9a6f81 --- /dev/null +++ b/libs/daemon/daemon.go @@ -0,0 +1,114 @@ +package daemon + +import ( + "fmt" + "io" + "os" + "os/exec" + "strconv" +) + +type Daemon struct { + // If provided, the child process's pid will be written in the file at this + // path. + PidFilePath string + + // Environment variables to set in the child process. + Env []string + + // Path to executable to run. If empty, the current executable is used. + Executable string + + // Arguments to pass to the child process. + Args []string + + // Log file to write the child process's output to. + LogFile string + + logFile *os.File + cmd *exec.Cmd + stdin io.WriteCloser +} + +func (d *Daemon) Start() error { + var err error + executable := d.Executable + if executable == "" { + // If Executable is not provided, use the current CLI executable. + executable, err = os.Executable() + if err != nil { + return err + } + } + + d.cmd = exec.Command(executable, d.Args...) + d.cmd.Env = d.Env + + d.cmd.SysProcAttr = sysProcAttr() + + // By default redirect stdout and stderr to /dev/null. + d.cmd.Stdout = nil + d.cmd.Stderr = nil + + // If a log file is provided, redirect stdout and stderr to the log file. + if d.LogFile != "" { + d.logFile, err = os.OpenFile(d.LogFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644) + if err != nil { + return fmt.Errorf("failed to open log file: %w", err) + } + + // The file descriptor for the log file is closed in the [Daemon.Release] method. + d.cmd.Stdout = d.logFile + d.cmd.Stderr = d.logFile + } + + d.stdin, err = d.cmd.StdinPipe() + if err != nil { + return fmt.Errorf("failed to get stdin pipe: %w", err) + } + + err = d.cmd.Start() + if err != nil { + return err + } + + if d.PidFilePath != "" { + err = os.WriteFile(d.PidFilePath, []byte(strconv.Itoa(d.cmd.Process.Pid)), 0o644) + if err != nil { + return fmt.Errorf("failed to write pid file: %w", err) + } + } + + return nil +} + +func (d *Daemon) WriteInput(b []byte) error { + _, err := d.stdin.Write(b) + return err +} + +func (d *Daemon) Release() error { + if d.stdin != nil { + err := d.stdin.Close() + if err != nil { + return fmt.Errorf("failed to close stdin: %w", err) + } + } + + // Note that the child process will stream its output directly to the log file. + // So it's safe to close this file handle even if the child process is still running. + if d.logFile != nil { + err := d.logFile.Close() + if err != nil { + return fmt.Errorf("failed to close log file: %w", err) + } + } + + if d.cmd == nil { + return nil + } + + // The docs for [os.Process.Release] specify that we need to call Release if + // Wait is not called. + return d.cmd.Process.Release() +} diff --git a/libs/daemon/daemon_test.go b/libs/daemon/daemon_test.go new file mode 100644 index 000000000..ee9d92baa --- /dev/null +++ b/libs/daemon/daemon_test.go @@ -0,0 +1,51 @@ +package daemon + +import ( + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "strconv" + "testing" + "time" + + "github.com/databricks/cli/internal/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDaemon(t *testing.T) { + tmpDir := t.TempDir() + cmd := exec.Command("go", "run", "internal/parent_process/main.go", tmpDir) + + // cmd.Run() will block until the parent process exits. + err := cmd.Run() + require.NoError(t, err) + + // Assert that a PID file was created for the child process. + assert.FileExists(t, filepath.Join(tmpDir, "child.pid")) + + // Wait 10 seconds for the server to start and to write the port number to + // a file. + portFilePath := filepath.Join(tmpDir, "port.txt") + assert.Eventually(t, func() bool { + _, err := os.Stat(portFilePath) + return err == nil + }, 10*time.Second, 100*time.Millisecond) + + port, err := strconv.Atoi(testutil.ReadFile(t, portFilePath)) + require.NoError(t, err) + + // Query the local server, which should be alive even after the parent process + // has exited. + r, err := http.Get("http://localhost:" + strconv.Itoa(port)) + require.NoError(t, err) + defer r.Body.Close() + + // The server should respond with "child says hi". + assert.Equal(t, http.StatusOK, r.StatusCode) + b, err := io.ReadAll(r.Body) + require.NoError(t, err) + assert.Equal(t, "child says hi", string(b)) +} diff --git a/libs/daemon/daemon_unix.go b/libs/daemon/daemon_unix.go new file mode 100644 index 000000000..b9a7023a7 --- /dev/null +++ b/libs/daemon/daemon_unix.go @@ -0,0 +1,17 @@ +//go:build linux || darwin + +package daemon + +import "syscall" + +// References: +// 1. linux: https://go.dev/src/syscall/exec_linux.go +// 2. macos (arm): https://go.dev/src/syscall/exec_libc2.go +func sysProcAttr() *syscall.SysProcAttr { + return &syscall.SysProcAttr{ + // Create a new session for the child process. This ensures that the daemon + // is not terminated when the parent session is closed. This can happen + // for example when a ssh session is terminated. + Setsid: true, + } +} diff --git a/libs/daemon/daemon_windows.go b/libs/daemon/daemon_windows.go new file mode 100644 index 000000000..bccf22e4b --- /dev/null +++ b/libs/daemon/daemon_windows.go @@ -0,0 +1,16 @@ +//go:build windows + +package daemon + +import ( + "syscall" + + "golang.org/x/sys/windows" +) + +func sysProcAttr() *syscall.SysProcAttr { + return &syscall.SysProcAttr{ + HideWindow: true, + CreationFlags: windows.CREATE_NEW_PROCESS_GROUP | windows.DETACHED_PROCESS, + } +} diff --git a/libs/daemon/internal/parent_process/main.go b/libs/daemon/internal/parent_process/main.go new file mode 100644 index 000000000..87c1bdda2 --- /dev/null +++ b/libs/daemon/internal/parent_process/main.go @@ -0,0 +1,30 @@ +package main + +import ( + "os" + "path/filepath" + + "github.com/databricks/cli/libs/daemon" +) + +func main() { + tmpDir := os.Args[1] + + d := daemon.Daemon{ + PidFilePath: filepath.Join(tmpDir, "child.pid"), + Executable: "python3", + // The server script writes the port number the server is listening on + // to the specified file. + Args: []string{"./internal/parent_process/server.py", filepath.Join(tmpDir, "port.txt")}, + } + + err := d.Start() + if err != nil { + panic(err) + } + + err = d.Release() + if err != nil { + panic(err) + } +} diff --git a/libs/daemon/internal/parent_process/server.py b/libs/daemon/internal/parent_process/server.py new file mode 100644 index 000000000..ad341f992 --- /dev/null +++ b/libs/daemon/internal/parent_process/server.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +import sys +from http.server import BaseHTTPRequestHandler, HTTPServer + +if len(sys.argv) < 2: + print("Usage: python script.py ") + sys.exit(1) + +port_file_path = sys.argv[1] + + +class SimpleHandler(BaseHTTPRequestHandler): + def do_GET(self): + # Send HTTP 200 response with plain text content + self.send_response(200) + self.send_header("Content-type", "text/plain") + self.end_headers() + self.wfile.write(b"child says hi") + + +# Bind to localhost on port 0 to let the OS pick an available port. +server_address = ("localhost", 0) +httpd = HTTPServer(server_address, SimpleHandler) + +# Retrieve the assigned port. +assigned_port = httpd.server_address[1] + +# Write the port number to the provided file path. +with open(port_file_path, "w") as f: + f.write(str(assigned_port)) + +try: + # Automatically shut down the server after 2 minutes. This is a precaution to + # prevent the server from running indefinitely incase the GET API is never called. + httpd.timeout = 120 + + # This server will exit after one request. + httpd.handle_request() +except KeyboardInterrupt: + print("\nServer is shutting down.") diff --git a/libs/databrickscfg/cfgpickers/clusters_test.go b/libs/databrickscfg/cfgpickers/clusters_test.go index 29e190a93..840916e91 100644 --- a/libs/databrickscfg/cfgpickers/clusters_test.go +++ b/libs/databrickscfg/cfgpickers/clusters_test.go @@ -1,12 +1,10 @@ package cfgpickers import ( - "bytes" "context" "testing" "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/flags" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/qa" "github.com/databricks/databricks-sdk-go/service/compute" @@ -114,8 +112,8 @@ func TestFirstCompatibleCluster(t *testing.T) { defer server.Close() w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg))) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx := cmdio.MockDiscard(context.Background()) + clusterID, err := AskForCluster(ctx, w, WithDatabricksConnect("13.1")) require.NoError(t, err) require.Equal(t, "bcd-id", clusterID) @@ -161,8 +159,7 @@ func TestNoCompatibleClusters(t *testing.T) { defer server.Close() w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg))) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx := cmdio.MockDiscard(context.Background()) _, err := AskForCluster(ctx, w, WithDatabricksConnect("13.1")) require.Equal(t, ErrNoCompatibleClusters, err) } diff --git a/libs/dyn/drop_keys.go b/libs/dyn/drop_keys.go new file mode 100644 index 000000000..494f9b9cd --- /dev/null +++ b/libs/dyn/drop_keys.go @@ -0,0 +1,27 @@ +package dyn + +func DropKeys(v Value, drop []string) (Value, error) { + var err error + nv, err := Walk(v, func(p Path, v Value) (Value, error) { + if len(p) == 0 { + return v, nil + } + + // Check if this key should be dropped. + for _, key := range drop { + if p[0].Key() != key { + continue + } + + return InvalidValue, ErrDrop + } + + // Pass through all other values. + return v, ErrSkip + }) + if err != nil { + return InvalidValue, err + } + + return nv, nil +} diff --git a/libs/dyn/drop_keys_test.go b/libs/dyn/drop_keys_test.go new file mode 100644 index 000000000..83a9744ca --- /dev/null +++ b/libs/dyn/drop_keys_test.go @@ -0,0 +1,24 @@ +package dyn + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestDropKeysTest(t *testing.T) { + v := V(map[string]Value{ + "key1": V("value1"), + "key2": V("value2"), + "key3": V("value3"), + }) + + vout, err := DropKeys(v, []string{"key1", "key3"}) + require.NoError(t, err) + + mv := vout.MustMap() + require.Equal(t, 1, mv.Len()) + v, ok := mv.GetByString("key2") + require.True(t, ok) + require.Equal(t, "value2", v.MustString()) +} diff --git a/libs/dyn/dynvar/ref.go b/libs/dyn/dynvar/ref.go index a28938823..ba397267a 100644 --- a/libs/dyn/dynvar/ref.go +++ b/libs/dyn/dynvar/ref.go @@ -1,12 +1,16 @@ package dynvar import ( + "fmt" "regexp" "github.com/databricks/cli/libs/dyn" ) -var re = regexp.MustCompile(`\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`) +var ( + baseVarDef = `[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*` + re = regexp.MustCompile(fmt.Sprintf(`\$\{(%s(\.%s(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`, baseVarDef, baseVarDef)) +) // ref represents a variable reference. // It is a string [dyn.Value] contained in a larger [dyn.Value]. diff --git a/libs/dyn/dynvar/ref_test.go b/libs/dyn/dynvar/ref_test.go index 4110732f8..637ecb98e 100644 --- a/libs/dyn/dynvar/ref_test.go +++ b/libs/dyn/dynvar/ref_test.go @@ -15,9 +15,13 @@ func TestNewRefNoString(t *testing.T) { func TestNewRefValidPattern(t *testing.T) { for in, refs := range map[string][]string{ - "${hello_world.world_world}": {"hello_world.world_world"}, - "${helloworld.world-world}": {"helloworld.world-world"}, - "${hello-world.world-world}": {"hello-world.world-world"}, + "${hello_world.world_world}": {"hello_world.world_world"}, + "${helloworld.world-world}": {"helloworld.world-world"}, + "${hello-world.world-world}": {"hello-world.world-world"}, + "${hello_world.world__world}": {"hello_world.world__world"}, + "${hello_world.world--world}": {"hello_world.world--world"}, + "${hello_world.world-_world}": {"hello_world.world-_world"}, + "${hello_world.world_-world}": {"hello_world.world_-world"}, } { ref, ok := newRef(dyn.V(in)) require.True(t, ok, "should match valid pattern: %s", in) @@ -36,8 +40,6 @@ func TestNewRefInvalidPattern(t *testing.T) { "${_-_._-_.id}", // cannot use _- in sequence "${0helloworld.world-world}", // interpolated first section shouldn't start with number "${helloworld.9world-world}", // interpolated second section shouldn't start with number - "${a-a.a-_a-a.id}", // fails because of -_ in the second segment - "${a-a.a--a-a.id}", // fails because of -- in the second segment } for _, v := range invalid { _, ok := newRef(dyn.V(v)) diff --git a/libs/dyn/value_underlying.go b/libs/dyn/value_underlying.go index 0a867375d..a33ecd38e 100644 --- a/libs/dyn/value_underlying.go +++ b/libs/dyn/value_underlying.go @@ -81,7 +81,7 @@ func (v Value) AsInt() (int64, bool) { case int32: return int64(vv), true case int64: - return int64(vv), true + return vv, true default: return 0, false } diff --git a/libs/dyn/yamlsaver/utils.go b/libs/dyn/yamlsaver/utils.go index a162bf31f..c1b60b1b5 100644 --- a/libs/dyn/yamlsaver/utils.go +++ b/libs/dyn/yamlsaver/utils.go @@ -22,9 +22,50 @@ func ConvertToMapValue(strct any, order *Order, skipFields []string, dst map[str return dyn.InvalidValue, fmt.Errorf("expected map, got %s", mv.Kind()) } + mv, err = sortMapAlphabetically(mv) + if err != nil { + return dyn.InvalidValue, err + } + return skipAndOrder(mv, order, skipFields, dst) } +// Sort the map alphabetically by keys. This is used to produce stable output for generated YAML files. +func sortMapAlphabetically(mv dyn.Value) (dyn.Value, error) { + sortedMap := dyn.NewMapping() + mapV := mv.MustMap() + keys := mapV.Keys() + slices.SortStableFunc(keys, func(i, j dyn.Value) int { + iKey := i.MustString() + jKey := j.MustString() + if iKey < jKey { + return -1 + } + + if iKey > jKey { + return 1 + } + return 0 + }) + + for _, key := range keys { + value, _ := mapV.Get(key) + var err error + if value.Kind() == dyn.KindMap { + value, err = sortMapAlphabetically(value) + if err != nil { + return dyn.InvalidValue, err + } + } + err = sortedMap.Set(key, value) + if err != nil { + return dyn.InvalidValue, err + } + } + + return dyn.V(sortedMap), nil +} + func skipAndOrder(mv dyn.Value, order *Order, skipFields []string, dst map[string]dyn.Value) (dyn.Value, error) { for _, pair := range mv.MustMap().Pairs() { k := pair.Key.MustString() @@ -44,7 +85,11 @@ func skipAndOrder(mv dyn.Value, order *Order, skipFields []string, dst map[strin continue } - dst[k] = dyn.NewValue(v.Value(), []dyn.Location{{Line: order.Get(k)}}) + if order == nil { + dst[k] = v + } else { + dst[k] = dyn.NewValue(v.Value(), []dyn.Location{{Line: order.Get(k)}}) + } } return dyn.V(dst), nil diff --git a/libs/dyn/yamlsaver/utils_test.go b/libs/dyn/yamlsaver/utils_test.go index 1afab601a..f7ea3c96c 100644 --- a/libs/dyn/yamlsaver/utils_test.go +++ b/libs/dyn/yamlsaver/utils_test.go @@ -7,6 +7,54 @@ import ( assert "github.com/databricks/cli/libs/dyn/dynassert" ) +func TestConvertToMap(t *testing.T) { + type test struct { + Name string `json:"name"` + Map map[string]string `json:"map"` + List []string `json:"list"` + LongNameField string `json:"long_name_field"` + ForceSendFields []string `json:"-"` + Format string `json:"format"` + } + + v := &test{ + Name: "test", + Map: map[string]string{ + "key2": "value2", + "key1": "value1", + }, + List: []string{"a", "b", "c"}, + ForceSendFields: []string{ + "Name", + }, + LongNameField: "long name goes here", + } + result, err := ConvertToMapValue(v, nil, []string{"format"}, map[string]dyn.Value{}) + assert.NoError(t, err) + assert.Equal(t, dyn.V(map[string]dyn.Value{ + "list": dyn.NewValue( + []dyn.Value{ + dyn.V("a"), + dyn.V("b"), + dyn.V("c"), + }, + []dyn.Location{}, + ), + "long_name_field": dyn.NewValue("long name goes here", []dyn.Location{}), + "map": dyn.NewValue( + map[string]dyn.Value{ + "key1": dyn.V("value1"), + "key2": dyn.V("value2"), + }, + []dyn.Location{}, + ), + "name": dyn.NewValue( + "test", + []dyn.Location{}, + ), + }), result) +} + func TestConvertToMapValueWithOrder(t *testing.T) { type test struct { Name string `json:"name"` diff --git a/libs/exec/exec_test.go b/libs/exec/exec_test.go index c363c1f7c..f245f9dd1 100644 --- a/libs/exec/exec_test.go +++ b/libs/exec/exec_test.go @@ -85,7 +85,7 @@ func testExecutorWithShell(t *testing.T, shell string) { // Create temporary directory with only the shell executable in the PATH. tmpDir := t.TempDir() - t.Setenv("PATH", tmpDir) + t.Setenv("PATH", fmt.Sprintf("%s%c%s", tmpDir, os.PathListSeparator, os.Getenv("PATH"))) if runtime.GOOS == "windows" { err = os.Symlink(p, fmt.Sprintf("%s/%s.exe", tmpDir, shell)) require.NoError(t, err) diff --git a/libs/filer/files_client.go b/libs/filer/files_client.go index 98a534684..7102b6e29 100644 --- a/libs/filer/files_client.go +++ b/libs/filer/files_client.go @@ -148,7 +148,7 @@ func (w *FilesClient) Write(ctx context.Context, name string, reader io.Reader, overwrite := slices.Contains(mode, OverwriteIfExists) urlPath = fmt.Sprintf("%s?overwrite=%t", urlPath, overwrite) headers := map[string]string{"Content-Type": "application/octet-stream"} - err = w.apiClient.Do(ctx, http.MethodPut, urlPath, headers, reader, nil) + err = w.apiClient.Do(ctx, http.MethodPut, urlPath, headers, nil, reader, nil) // Return early on success. if err == nil { @@ -176,7 +176,7 @@ func (w *FilesClient) Read(ctx context.Context, name string) (io.ReadCloser, err } var reader io.ReadCloser - err = w.apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, &reader) + err = w.apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, nil, &reader) // Return early on success. if err == nil { @@ -303,8 +303,6 @@ func (w *FilesClient) recursiveDelete(ctx context.Context, name string) error { group.SetLimit(maxFilesRequestsInFlight) for _, file := range filesToDelete { - file := file - // Skip the file if the context has already been cancelled. select { case <-groupCtx.Done(): diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 8d5148edd..1d514f13b 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -106,7 +106,7 @@ func (info *wsfsFileInfo) MarshalJSON() ([]byte, error) { // as an interface to allow for mocking in tests. type apiClient interface { Do(ctx context.Context, method, path string, - headers map[string]string, request, response any, + headers map[string]string, queryString map[string]any, request, response any, visitors ...func(*http.Request) error) error } @@ -156,7 +156,7 @@ func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io return err } - err = w.apiClient.Do(ctx, http.MethodPost, urlPath, nil, body, nil) + err = w.apiClient.Do(ctx, http.MethodPost, urlPath, nil, nil, body, nil) // Return early on success. if err == nil { @@ -341,6 +341,7 @@ func (w *WorkspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileIn http.MethodGet, "/api/2.0/workspace/get-status", nil, + nil, map[string]string{ "path": absPath, "return_export_info": "true", diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 9ee2722e1..0127d180c 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -16,7 +16,7 @@ import ( "github.com/databricks/databricks-sdk-go/service/workspace" ) -type workspaceFilesExtensionsClient struct { +type WorkspaceFilesExtensionsClient struct { workspaceClient *databricks.WorkspaceClient wsfs Filer @@ -32,7 +32,7 @@ type workspaceFileStatus struct { nameForWorkspaceAPI string } -func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) (wsfsFileInfo, error) { +func (w *WorkspaceFilesExtensionsClient) stat(ctx context.Context, name string) (wsfsFileInfo, error) { info, err := w.wsfs.Stat(ctx, name) if err != nil { return wsfsFileInfo{}, err @@ -42,7 +42,7 @@ func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) // This function returns the stat for the provided notebook. The stat object itself contains the path // with the extension since it is meant to be used in the context of a fs.FileInfo. -func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { +func (w *WorkspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { ext := path.Ext(name) nameWithoutExt := strings.TrimSuffix(name, ext) @@ -104,7 +104,7 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex }, nil } -func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx context.Context, name string) (*workspaceFileStatus, error) { +func (w *WorkspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx context.Context, name string) (*workspaceFileStatus, error) { stat, err := w.stat(ctx, name) if err != nil { return nil, err @@ -184,7 +184,7 @@ func newWorkspaceFilesExtensionsClient(w *databricks.WorkspaceClient, root strin filer = newWorkspaceFilesReadaheadCache(filer) } - return &workspaceFilesExtensionsClient{ + return &WorkspaceFilesExtensionsClient{ workspaceClient: w, wsfs: filer, @@ -193,7 +193,7 @@ func newWorkspaceFilesExtensionsClient(w *databricks.WorkspaceClient, root strin }, nil } -func (w *workspaceFilesExtensionsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { +func (w *WorkspaceFilesExtensionsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { entries, err := w.wsfs.ReadDir(ctx, name) if err != nil { return nil, err @@ -235,7 +235,7 @@ func (w *workspaceFilesExtensionsClient) ReadDir(ctx context.Context, name strin // Note: The import API returns opaque internal errors for namespace clashes // (e.g. a file and a notebook or a directory and a notebook). Thus users of this // method should be careful to avoid such clashes. -func (w *workspaceFilesExtensionsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { +func (w *WorkspaceFilesExtensionsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { if w.readonly { return ReadOnlyError{"write"} } @@ -244,7 +244,7 @@ func (w *workspaceFilesExtensionsClient) Write(ctx context.Context, name string, } // Try to read the file as a regular file. If the file is not found, try to read it as a notebook. -func (w *workspaceFilesExtensionsClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { +func (w *WorkspaceFilesExtensionsClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { // Ensure that the file / notebook exists. We do this check here to avoid reading // the content of a notebook called `foo` when the user actually wanted // to read the content of a file called `foo`. @@ -283,7 +283,7 @@ func (w *workspaceFilesExtensionsClient) Read(ctx context.Context, name string) } // Try to delete the file as a regular file. If the file is not found, try to delete it as a notebook. -func (w *workspaceFilesExtensionsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { +func (w *WorkspaceFilesExtensionsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { if w.readonly { return ReadOnlyError{"delete"} } @@ -320,7 +320,7 @@ func (w *workspaceFilesExtensionsClient) Delete(ctx context.Context, name string } // Try to stat the file as a regular file. If the file is not found, try to stat it as a notebook. -func (w *workspaceFilesExtensionsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { +func (w *WorkspaceFilesExtensionsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { info, err := w.wsfs.Stat(ctx, name) // If the file is not found, it might be a notebook. @@ -361,7 +361,7 @@ func (w *workspaceFilesExtensionsClient) Stat(ctx context.Context, name string) // Note: The import API returns opaque internal errors for namespace clashes // (e.g. a file and a notebook or a directory and a notebook). Thus users of this // method should be careful to avoid such clashes. -func (w *workspaceFilesExtensionsClient) Mkdir(ctx context.Context, name string) error { +func (w *WorkspaceFilesExtensionsClient) Mkdir(ctx context.Context, name string) error { if w.readonly { return ReadOnlyError{"mkdir"} } diff --git a/libs/filer/workspace_files_extensions_client_test.go b/libs/filer/workspace_files_extensions_client_test.go index 10a2bebf0..e9fde4762 100644 --- a/libs/filer/workspace_files_extensions_client_test.go +++ b/libs/filer/workspace_files_extensions_client_test.go @@ -17,7 +17,7 @@ type mockApiClient struct { } func (m *mockApiClient) Do(ctx context.Context, method, path string, - headers map[string]string, request, response any, + headers map[string]string, queryString map[string]any, request, response any, visitors ...func(*http.Request) error, ) error { args := m.Called(ctx, method, path, headers, request, response, visitors) @@ -181,7 +181,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { root: NewWorkspaceRootPath("/dir"), } - workspaceFilesExtensionsClient := workspaceFilesExtensionsClient{ + workspaceFilesExtensionsClient := WorkspaceFilesExtensionsClient{ workspaceClient: mockedWorkspaceClient.WorkspaceClient, wsfs: &workspaceFilesClient, } diff --git a/libs/flags/log_level_flag.go b/libs/flags/log_level_flag.go index 836d84b70..82e2abc4c 100644 --- a/libs/flags/log_level_flag.go +++ b/libs/flags/log_level_flag.go @@ -25,7 +25,7 @@ type LogLevelFlag struct { func NewLogLevelFlag() LogLevelFlag { return LogLevelFlag{ - l: log.LevelDisabled, + l: log.LevelWarn, } } diff --git a/libs/flags/log_level_flag_test.go b/libs/flags/log_level_flag_test.go index 11a50bc45..c81f90d18 100644 --- a/libs/flags/log_level_flag_test.go +++ b/libs/flags/log_level_flag_test.go @@ -10,8 +10,8 @@ import ( func TestLogLevelFlagDefault(t *testing.T) { f := NewLogLevelFlag() - assert.Equal(t, log.LevelDisabled, f.Level()) - assert.Equal(t, "disabled", f.String()) + assert.Equal(t, log.LevelWarn, f.Level()) + assert.Equal(t, "warn", f.String()) } func TestLogLevelFlagSetValid(t *testing.T) { diff --git a/libs/git/info.go b/libs/git/info.go index 46e57be48..dc4af9b6d 100644 --- a/libs/git/info.go +++ b/libs/git/info.go @@ -66,6 +66,7 @@ func fetchRepositoryInfoAPI(ctx context.Context, path string, w *databricks.Work http.MethodGet, apiEndpoint, nil, + nil, map[string]string{ "path": path, "return_git_info": "true", diff --git a/libs/jsonschema/from_type.go b/libs/jsonschema/from_type.go index 6f8f39d96..ce25cb023 100644 --- a/libs/jsonschema/from_type.go +++ b/libs/jsonschema/from_type.go @@ -111,6 +111,10 @@ func FromType(typ reflect.Type, fns []func(typ reflect.Type, s Schema) Schema) ( return res, nil } +func TypePath(typ reflect.Type) string { + return typePath(typ) +} + // typePath computes a unique string representation of the type. $ref in the generated // JSON schema will refer to this path. See TestTypePath for examples outputs. func typePath(typ reflect.Type) string { diff --git a/libs/jsonschema/schema.go b/libs/jsonschema/schema.go index e63dde359..85f6a0328 100644 --- a/libs/jsonschema/schema.go +++ b/libs/jsonschema/schema.go @@ -76,6 +76,10 @@ type Schema struct { // Title of the object, rendered as inline documentation in the IDE. // https://json-schema.org/understanding-json-schema/reference/annotations Title string `json:"title,omitempty"` + + // Examples of the value for properties in the schema. + // https://json-schema.org/understanding-json-schema/reference/annotations + Examples any `json:"examples,omitempty"` } // Default value defined in a JSON Schema, represented as a string. diff --git a/libs/log/handler/friendly.go b/libs/log/handler/friendly.go index 33b88a9e2..5c60eb13d 100644 --- a/libs/log/handler/friendly.go +++ b/libs/log/handler/friendly.go @@ -53,11 +53,11 @@ func NewFriendlyHandler(out io.Writer, opts *Options) slog.Handler { // Cache (colorized) level strings. // The colors to use for each level are configured in `colors.go`. - h.levelTrace = h.sprintf(ttyColorLevelTrace, "%5s", "TRACE") - h.levelDebug = h.sprintf(ttyColorLevelDebug, "%5s", "DEBUG") - h.levelInfo = h.sprintf(ttyColorLevelInfo, "%5s", "INFO") - h.levelWarn = h.sprintf(ttyColorLevelWarn, "%5s", "WARN") - h.levelError = h.sprintf(ttyColorLevelError, "%5s", "ERROR") + h.levelTrace = h.sprintf(ttyColorLevelTrace, "%s", "Trace:") + h.levelDebug = h.sprintf(ttyColorLevelDebug, "%s", "Debug:") + h.levelInfo = h.sprintf(ttyColorLevelInfo, "%s", "Info:") + h.levelWarn = h.sprintf(ttyColorLevelWarn, "%s", "Warn:") + h.levelError = h.sprintf(ttyColorLevelError, "%s", "Error:") return h } @@ -185,33 +185,41 @@ func (s *handleState) appendAttr(a slog.Attr) { // Handle implements slog.Handler. func (h *friendlyHandler) Handle(ctx context.Context, r slog.Record) error { state := h.handleState() - state.append(h.sprintf(ttyColorTime, "%02d:%02d:%02d ", r.Time.Hour(), r.Time.Minute(), r.Time.Second())) + + if h.opts.Level.Level() <= slog.LevelDebug { + state.append(h.sprintf(ttyColorTime, "%02d:%02d:%02d ", r.Time.Hour(), r.Time.Minute(), r.Time.Second())) + } + state.appendf("%s ", h.coloredLevel(r)) state.append(h.sprint(ttyColorMessage, r.Message)) - // Handle state from WithGroup and WithAttrs. - goas := h.goas - if r.NumAttrs() == 0 { - // If the record has no Attrs, remove groups at the end of the list; they are empty. - for len(goas) > 0 && goas[len(goas)-1].group != "" { - goas = goas[:len(goas)-1] - } - } - for _, goa := range goas { - if goa.group != "" { - state.openGroup(goa.group) - } else { - for _, a := range goa.attrs { - state.appendAttr(a) + if h.opts.Level.Level() <= slog.LevelDebug { + + // Handle state from WithGroup and WithAttrs. + goas := h.goas + if r.NumAttrs() == 0 { + // If the record has no Attrs, remove groups at the end of the list; they are empty. + for len(goas) > 0 && goas[len(goas)-1].group != "" { + goas = goas[:len(goas)-1] + } + } + for _, goa := range goas { + if goa.group != "" { + state.openGroup(goa.group) + } else { + for _, a := range goa.attrs { + state.appendAttr(a) + } } } - } - // Add attributes from the record. - r.Attrs(func(a slog.Attr) bool { - state.appendAttr(a) - return true - }) + // Add attributes from the record. + r.Attrs(func(a slog.Attr) bool { + state.appendAttr(a) + return true + }) + + } // Add newline. state.append("\n") diff --git a/libs/notebook/detect.go b/libs/notebook/detect.go index 40c850945..579cc1de3 100644 --- a/libs/notebook/detect.go +++ b/libs/notebook/detect.go @@ -47,7 +47,7 @@ func (f file) close() error { func (f file) readHeader() (string, error) { // Scan header line with some padding. buf := make([]byte, headerLength) - n, err := f.f.Read([]byte(buf)) + n, err := f.f.Read(buf) if err != nil && err != io.EOF { return "", err } diff --git a/libs/notebook/testdata/.ruff.toml b/libs/notebook/testdata/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/libs/notebook/testdata/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/libs/python/detect.go b/libs/python/detect.go index e86d9d621..75158da65 100644 --- a/libs/python/detect.go +++ b/libs/python/detect.go @@ -39,27 +39,7 @@ func DetectExecutable(ctx context.Context) (string, error) { // // See https://github.com/pyenv/pyenv#understanding-python-version-selection - out, err := exec.LookPath(GetExecutable()) - - // most of the OS'es have python3 in $PATH, but for those which don't, - // we perform the latest version lookup - if err != nil && !errors.Is(err, exec.ErrNotFound) { - return "", err - } - if out != "" { - return out, nil - } - // otherwise, detect all interpreters and pick the least that satisfies - // minimal version requirements - all, err := DetectInterpreters(ctx) - if err != nil { - return "", err - } - interpreter, err := all.AtLeast("3.8") - if err != nil { - return "", err - } - return interpreter.Path, nil + return exec.LookPath(GetExecutable()) } // DetectVEnvExecutable returns the path to the python3 executable inside venvPath, diff --git a/libs/python/detect_unix_test.go b/libs/python/detect_unix_test.go index a962e1f55..1774aa108 100644 --- a/libs/python/detect_unix_test.go +++ b/libs/python/detect_unix_test.go @@ -16,24 +16,16 @@ func TestDetectsViaPathLookup(t *testing.T) { assert.NotEmpty(t, py) } -func TestDetectsViaListing(t *testing.T) { - t.Setenv("PATH", "testdata/other-binaries-filtered") - ctx := context.Background() - py, err := DetectExecutable(ctx) - assert.NoError(t, err) - assert.Equal(t, "testdata/other-binaries-filtered/python3.10", py) -} - func TestDetectFailsNoInterpreters(t *testing.T) { t.Setenv("PATH", "testdata") ctx := context.Background() _, err := DetectExecutable(ctx) - assert.Equal(t, ErrNoPythonInterpreters, err) + assert.Error(t, err) } func TestDetectFailsNoMinimalVersion(t *testing.T) { t.Setenv("PATH", "testdata/no-python3") ctx := context.Background() _, err := DetectExecutable(ctx) - assert.EqualError(t, err, "cannot find Python greater or equal to v3.8.0") + assert.Error(t, err) } diff --git a/libs/python/detect_win_test.go b/libs/python/detect_win_test.go index 2ef811a4b..7b2ee281e 100644 --- a/libs/python/detect_win_test.go +++ b/libs/python/detect_win_test.go @@ -20,5 +20,5 @@ func TestDetectFailsNoInterpreters(t *testing.T) { t.Setenv("PATH", "testdata") ctx := context.Background() _, err := DetectExecutable(ctx) - assert.ErrorIs(t, err, ErrNoPythonInterpreters) + assert.Error(t, err) } diff --git a/libs/sync/testdata/.ruff.toml b/libs/sync/testdata/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/libs/sync/testdata/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/libs/telemetry/api.go b/libs/telemetry/api.go new file mode 100644 index 000000000..4e3f6c861 --- /dev/null +++ b/libs/telemetry/api.go @@ -0,0 +1,31 @@ +package telemetry + +// RequestBody is the request body type bindings for the /telemetry-ext API endpoint. +type RequestBody struct { + // Timestamp in millis for when the log was uploaded. + UploadTime int64 `json:"uploadTime"` + + // DO NOT USE. This is the legacy field for logging in usage logs (not lumberjack). + // We keep this around because the API endpoint works only if this field is serialized + // to an empty array. + Items []string `json:"items"` + + // JSON encoded strings containing the proto logs. Since it's represented as a + // string here, the values here end up being double JSON encoded in the final + // request body. + // + // Any logs here will be logged in our lumberjack tables as long as a corresponding + // protobuf is defined in universe. + ProtoLogs []string `json:"protoLogs"` +} + +// ResponseBody is the response body type bindings for the /telemetry-ext API endpoint. +type ResponseBody struct { + Errors []LogError `json:"errors"` + NumProtoSuccess int64 `json:"numProtoSuccess"` +} + +type LogError struct { + Message string `json:"message"` + ErrorType string `json:"errorType"` +} diff --git a/libs/telemetry/protos/README.md b/libs/telemetry/protos/README.md new file mode 100644 index 000000000..7dcc75e17 --- /dev/null +++ b/libs/telemetry/protos/README.md @@ -0,0 +1,2 @@ +The types in this package are equivalent to the lumberjack protos defined in Universe. +You can find all lumberjack protos for the Databricks CLI in the `proto/logs/frontend/databricks_cli` directory. diff --git a/libs/telemetry/protos/bundle_deploy.go b/libs/telemetry/protos/bundle_deploy.go new file mode 100644 index 000000000..f3c3a360b --- /dev/null +++ b/libs/telemetry/protos/bundle_deploy.go @@ -0,0 +1,77 @@ +package protos + +type BundleDeployEvent struct { + // UUID associated with the bundle itself. Set in the `bundle.uuid` field in the bundle configuration. + BundleUuid string `json:"bundle_uuid,omitempty"` + + ResourceCount int64 `json:"resource_count,omitempty"` + ResourceJobCount int64 `json:"resource_job_count,omitempty"` + ResourcePipelineCount int64 `json:"resource_pipeline_count,omitempty"` + ResourceModelCount int64 `json:"resource_model_count,omitempty"` + ResourceExperimentCount int64 `json:"resource_experiment_count,omitempty"` + ResourceModelServingEndpointCount int64 `json:"resource_model_serving_endpoint_count,omitempty"` + ResourceRegisteredModelCount int64 `json:"resource_registered_model_count,omitempty"` + ResourceQualityMonitorCount int64 `json:"resource_quality_monitor_count,omitempty"` + ResourceSchemaCount int64 `json:"resource_schema_count,omitempty"` + ResourceVolumeCount int64 `json:"resource_volume_count,omitempty"` + ResourceClusterCount int64 `json:"resource_cluster_count,omitempty"` + ResourceDashboardCount int64 `json:"resource_dashboard_count,omitempty"` + ResourceAppCount int64 `json:"resource_app_count,omitempty"` + + // IDs of resources managed by the bundle. Some resources like volumes or schemas + // do not expose a numerical or UUID identifier and are tracked by name. Those + // resources are not tracked here since the names are PII. + ResourceJobIDs []string `json:"resource_job_ids,omitempty"` + ResourcePipelineIDs []string `json:"resource_pipeline_ids,omitempty"` + ResourceClusterIDs []string `json:"resource_cluster_ids,omitempty"` + ResourceDashboardIDs []string `json:"resource_dashboard_ids,omitempty"` + + Experimental *BundleDeployExperimental `json:"experimental,omitempty"` +} + +// These metrics are experimental and are often added in an adhoc manner. There +// are no guarantees for these metrics and they maybe removed in the future without +// any notice. +type BundleDeployExperimental struct { + // Number of configuration files in the bundle. + ConfigurationFileCount int64 `json:"configuration_file_count,omitempty"` + + // Size in bytes of the Terraform state file + TerraformStateSizeBytes int64 `json:"terraform_state_size_bytes,omitempty"` + + // Number of variables in the bundle + VariableCount int64 `json:"variable_count,omitempty"` + ComplexVariableCount int64 `json:"complex_variable_count,omitempty"` + LookupVariableCount int64 `json:"lookup_variable_count,omitempty"` + + // Number of targets in the bundle + TargetCount int64 `json:"target_count,omitempty"` + + // Whether a field is set or not. If a configuration field is not present in this + // map then it is not tracked by this field. + // Keys are the full path of the field in the configuration tree. + // Examples: "bundle.terraform.exec_path", "bundle.git.branch" etc. + SetFields []BoolMapEntry `json:"set_fields,omitempty"` + + // Values for boolean configuration fields like `experimental.python_wheel_wrapper` + // We don't need to define protos to track boolean values and can simply write those + // values to this map to track them. + BoolValues []BoolMapEntry `json:"bool_values,omitempty"` + + BundleMode BundleMode `json:"bundle_mode,omitempty"` + + WorkspaceArtifactPathType BundleDeployArtifactPathType `json:"workspace_artifact_path_type,omitempty"` + + // Execution time per mutator for a selected subset of mutators. + BundleMutatorExecutionTimeMs []IntMapEntry `json:"bundle_mutator_execution_time_ms,omitempty"` +} + +type BoolMapEntry struct { + Key string `json:"key,omitempty"` + Value bool `json:"value,omitempty"` +} + +type IntMapEntry struct { + Key string `json:"key,omitempty"` + Value int64 `json:"value,omitempty"` +} diff --git a/libs/telemetry/protos/bundle_init.go b/libs/telemetry/protos/bundle_init.go new file mode 100644 index 000000000..47308a267 --- /dev/null +++ b/libs/telemetry/protos/bundle_init.go @@ -0,0 +1,37 @@ +package protos + +type BundleInitEvent struct { + // UUID associated with the DAB itself. This is serialized into the DAB + // when a user runs `databricks bundle init` and all subsequent deployments of + // that DAB can then be associated with this init event. + BundleUuid string `json:"bundle_uuid,omitempty"` + + // Name of the template initialized when the user ran `databricks bundle init` + // This is only populated when the template is a first party template like + // mlops-stacks or default-python. + TemplateName string `json:"template_name,omitempty"` + + // Arguments used by the user to initialize the template. Only enum + // values will be set here by the Databricks CLI. + // + // We use a generic map representation here because a bundle template's args are + // managed in the template itself and maintaining a copy typed schema for it here + // will be untenable in the long term. + TemplateEnumArgs []BundleInitTemplateEnumArg `json:"template_enum_args,omitempty"` +} + +type BundleInitTemplateEnumArg struct { + // Valid key values for the template. These correspond to the keys specified in + // the "properties" section of the `databricks_template_schema.json` file. + // + // Note: `databricks_template_schema.json` contains a JSON schema type specification + // for the arguments that the template accepts. + Key string `json:"key"` + + // Value that the user set for the field. This is only populated for properties + // that have the "enum" field specified in the JSON schema type specification. + // + // The Databricks CLI ensures that the value here is one of the "enum" values from + // the template specification. + Value string `json:"value"` +} diff --git a/libs/telemetry/protos/databricks_cli_log.go b/libs/telemetry/protos/databricks_cli_log.go new file mode 100644 index 000000000..9e4e59596 --- /dev/null +++ b/libs/telemetry/protos/databricks_cli_log.go @@ -0,0 +1,35 @@ +package protos + +type ExecutionContext struct { + // UUID generated by the CLI for every CLI command run. This is also set in the HTTP user + // agent under the key "cmd-exec-id" and can be used to correlate frontend_log table + // with the http_access_log table. + CmdExecID string `json:"cmd_exec_id,omitempty"` + + // Version of the Databricks CLI used. + Version string `json:"version,omitempty"` + + // Command that was run by the user. Eg: bundle_deploy, fs_cp etc. + Command string `json:"command,omitempty"` + + // Lowercase string name for the operating system. Same value + // as the one set in `runtime.GOOS` in Golang. + OperatingSystem string `json:"operating_system,omitempty"` + + // Version of DBR from which CLI is being run. + // Only set when the CLI is being run from a Databricks cluster. + DbrVersion string `json:"dbr_version,omitempty"` + + // If true, the CLI is being run from a Databricks notebook / cluster web terminal. + FromWebTerminal bool `json:"from_web_terminal,omitempty"` + + // Time taken for the CLI command to execute. + ExecutionTimeMs int64 `json:"execution_time_ms,omitempty"` + + // Exit code of the CLI command. + ExitCode int64 `json:"exit_code,omitempty"` +} + +type CliTestEvent struct { + Name DummyCliEnum `json:"name,omitempty"` +} diff --git a/libs/telemetry/protos/enum.go b/libs/telemetry/protos/enum.go new file mode 100644 index 000000000..7f6780cb6 --- /dev/null +++ b/libs/telemetry/protos/enum.go @@ -0,0 +1,26 @@ +package protos + +type DummyCliEnum string + +const ( + DummyCliEnumUnspecified DummyCliEnum = "DUMMY_CLI_ENUM_UNSPECIFIED" + DummyCliEnumValue1 DummyCliEnum = "VALUE1" + DummyCliEnumValue2 DummyCliEnum = "VALUE2" + DummyCliEnumValue3 DummyCliEnum = "VALUE3" +) + +type BundleMode string + +const ( + BundleModeUnspecified BundleMode = "TYPE_UNSPECIFIED" + BundleModeDevelopment BundleMode = "DEVELOPMENT" + BundleModeProduction BundleMode = "PRODUCTION" +) + +type BundleDeployArtifactPathType string + +const ( + BundleDeployArtifactPathTypeUnspecified BundleDeployArtifactPathType = "TYPE_UNSPECIFIED" + BundleDeployArtifactPathTypeWorkspace BundleDeployArtifactPathType = "WORKSPACE_FILE_SYSTEM" + BundleDeployArtifactPathTypeVolume BundleDeployArtifactPathType = "UC_VOLUME" +) diff --git a/libs/telemetry/protos/frontend_log.go b/libs/telemetry/protos/frontend_log.go new file mode 100644 index 000000000..7e6ab1012 --- /dev/null +++ b/libs/telemetry/protos/frontend_log.go @@ -0,0 +1,22 @@ +package protos + +// This corresponds to the FrontendLog lumberjack proto in universe. +// FrontendLog is the top-level struct for any client-side logs at Databricks. +type FrontendLog struct { + // A UUID for the log event generated from the CLI. + FrontendLogEventID string `json:"frontend_log_event_id,omitempty"` + + Entry FrontendLogEntry `json:"entry,omitempty"` +} + +type FrontendLogEntry struct { + DatabricksCliLog DatabricksCliLog `json:"databricks_cli_log,omitempty"` +} + +type DatabricksCliLog struct { + ExecutionContext *ExecutionContext `json:"execution_context,omitempty"` + + CliTestEvent *CliTestEvent `json:"cli_test_event,omitempty"` + BundleInitEvent *BundleInitEvent `json:"bundle_init_event,omitempty"` + BundleDeployEvent *BundleDeployEvent `json:"bundle_deploy_event,omitempty"` +} diff --git a/libs/template/builtin.go b/libs/template/builtin.go index dcb3a8858..5b10534ef 100644 --- a/libs/template/builtin.go +++ b/libs/template/builtin.go @@ -8,14 +8,14 @@ import ( //go:embed all:templates var builtinTemplates embed.FS -// BuiltinTemplate represents a template that is built into the CLI. -type BuiltinTemplate struct { +// builtinTemplate represents a template that is built into the CLI. +type builtinTemplate struct { Name string FS fs.FS } -// Builtin returns the list of all built-in templates. -func Builtin() ([]BuiltinTemplate, error) { +// builtin returns the list of all built-in templates. +func builtin() ([]builtinTemplate, error) { templates, err := fs.Sub(builtinTemplates, "templates") if err != nil { return nil, err @@ -26,7 +26,7 @@ func Builtin() ([]BuiltinTemplate, error) { return nil, err } - var out []BuiltinTemplate + var out []builtinTemplate for _, entry := range entries { if !entry.IsDir() { continue @@ -37,7 +37,7 @@ func Builtin() ([]BuiltinTemplate, error) { return nil, err } - out = append(out, BuiltinTemplate{ + out = append(out, builtinTemplate{ Name: entry.Name(), FS: templateFS, }) diff --git a/libs/template/builtin_test.go b/libs/template/builtin_test.go index 79e04cb84..162a227ea 100644 --- a/libs/template/builtin_test.go +++ b/libs/template/builtin_test.go @@ -9,12 +9,12 @@ import ( ) func TestBuiltin(t *testing.T) { - out, err := Builtin() + out, err := builtin() require.NoError(t, err) assert.GreaterOrEqual(t, len(out), 3) // Create a map of templates by name for easier lookup - templates := make(map[string]*BuiltinTemplate) + templates := make(map[string]*builtinTemplate) for _, tmpl := range out { templates[tmpl.Name] = &tmpl } diff --git a/libs/template/materialize.go b/libs/template/materialize.go deleted file mode 100644 index 86a6a8c37..000000000 --- a/libs/template/materialize.go +++ /dev/null @@ -1,94 +0,0 @@ -package template - -import ( - "context" - "errors" - "fmt" - "io/fs" - - "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/filer" -) - -const ( - libraryDirName = "library" - templateDirName = "template" - schemaFileName = "databricks_template_schema.json" -) - -// This function materializes the input templates as a project, using user defined -// configurations. -// Parameters: -// -// ctx: context containing a cmdio object. This is used to prompt the user -// configFilePath: file path containing user defined config values -// templateFS: root of the template definition -// outputFiler: filer to use for writing the initialized template -func Materialize(ctx context.Context, configFilePath string, templateFS fs.FS, outputFiler filer.Filer) error { - if _, err := fs.Stat(templateFS, schemaFileName); errors.Is(err, fs.ErrNotExist) { - return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName) - } - - config, err := newConfig(ctx, templateFS, schemaFileName) - if err != nil { - return err - } - - // Read and assign config values from file - if configFilePath != "" { - err = config.assignValuesFromFile(configFilePath) - if err != nil { - return err - } - } - - helpers := loadHelpers(ctx) - r, err := newRenderer(ctx, config.values, helpers, templateFS, templateDirName, libraryDirName) - if err != nil { - return err - } - - // Print welcome message - welcome := config.schema.WelcomeMessage - if welcome != "" { - welcome, err = r.executeTemplate(welcome) - if err != nil { - return err - } - cmdio.LogString(ctx, welcome) - } - - // Prompt user for any missing config values. Assign default values if - // terminal is not TTY - err = config.promptOrAssignDefaultValues(r) - if err != nil { - return err - } - err = config.validate() - if err != nil { - return err - } - - // Walk and render the template, since input configuration is complete - err = r.walk() - if err != nil { - return err - } - - err = r.persistToDisk(ctx, outputFiler) - if err != nil { - return err - } - - success := config.schema.SuccessMessage - if success == "" { - cmdio.LogString(ctx, "✨ Successfully initialized template") - } else { - success, err = r.executeTemplate(success) - if err != nil { - return err - } - cmdio.LogString(ctx, success) - } - return nil -} diff --git a/libs/template/materialize_test.go b/libs/template/materialize_test.go deleted file mode 100644 index c9331b43f..000000000 --- a/libs/template/materialize_test.go +++ /dev/null @@ -1,23 +0,0 @@ -package template - -import ( - "context" - "os" - "testing" - - "github.com/databricks/cli/cmd/root" - "github.com/databricks/databricks-sdk-go" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestMaterializeForNonTemplateDirectory(t *testing.T) { - tmpDir := t.TempDir() - w, err := databricks.NewWorkspaceClient(&databricks.Config{}) - require.NoError(t, err) - ctx := root.SetWorkspaceClient(context.Background(), w) - - // Try to materialize a non-template directory. - err = Materialize(ctx, "", os.DirFS(tmpDir), nil) - assert.EqualError(t, err, "not a bundle template: expected to find a template schema file at "+schemaFileName) -} diff --git a/libs/template/reader.go b/libs/template/reader.go new file mode 100644 index 000000000..8e32a75cf --- /dev/null +++ b/libs/template/reader.go @@ -0,0 +1,119 @@ +package template + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/log" +) + +type Reader interface { + // FS returns a file system that contains the template + // definition files. + FS(ctx context.Context) (fs.FS, error) + + // Cleanup releases any resources associated with the reader + // like cleaning up temporary directories. + Cleanup(ctx context.Context) +} + +type builtinReader struct { + name string +} + +func (r *builtinReader) FS(ctx context.Context) (fs.FS, error) { + builtin, err := builtin() + if err != nil { + return nil, err + } + + for _, entry := range builtin { + if entry.Name == r.name { + return entry.FS, nil + } + } + + return nil, fmt.Errorf("builtin template %s not found", r.name) +} + +func (r *builtinReader) Cleanup(ctx context.Context) {} + +type gitReader struct { + gitUrl string + // tag or branch to checkout + ref string + // subdirectory within the repository that contains the template + templateDir string + // temporary directory where the repository is cloned + tmpRepoDir string + + // Function to clone the repository. This is a function pointer to allow + // mocking in tests. + cloneFunc func(ctx context.Context, url, reference, targetPath string) error +} + +// Computes the repo name from the repo URL. Treats the last non empty word +// when splitting at '/' as the repo name. For example: for url git@github.com:databricks/cli.git +// the name would be "cli.git" +func repoName(url string) string { + parts := strings.Split(strings.TrimRight(url, "/"), "/") + return parts[len(parts)-1] +} + +func (r *gitReader) FS(ctx context.Context) (fs.FS, error) { + // Calling FS twice will lead to two downloaded copies of the git repo. + // In the future if you need to call FS twice, consider adding some caching + // logic here to avoid multiple downloads. + if r.tmpRepoDir != "" { + return nil, errors.New("FS called twice on git reader") + } + + // Create a temporary directory with the name of the repository. The '*' + // character is replaced by a random string in the generated temporary directory. + repoDir, err := os.MkdirTemp("", repoName(r.gitUrl)+"-*") + if err != nil { + return nil, err + } + r.tmpRepoDir = repoDir + + // start the spinner + promptSpinner := cmdio.Spinner(ctx) + promptSpinner <- "Downloading the template\n" + + err = r.cloneFunc(ctx, r.gitUrl, r.ref, repoDir) + close(promptSpinner) + if err != nil { + return nil, err + } + + return os.DirFS(filepath.Join(repoDir, r.templateDir)), nil +} + +func (r *gitReader) Cleanup(ctx context.Context) { + if r.tmpRepoDir == "" { + return + } + + // Cleanup is best effort. Only log errors. + err := os.RemoveAll(r.tmpRepoDir) + if err != nil { + log.Debugf(ctx, "Error cleaning up tmp directory %s for git template reader for URL %s: %s", r.tmpRepoDir, r.gitUrl, err) + } +} + +type localReader struct { + // Path on the local filesystem that contains the template + path string +} + +func (r *localReader) FS(ctx context.Context) (fs.FS, error) { + return os.DirFS(r.path), nil +} + +func (r *localReader) Cleanup(ctx context.Context) {} diff --git a/libs/template/reader_test.go b/libs/template/reader_test.go new file mode 100644 index 000000000..89c8d9fdf --- /dev/null +++ b/libs/template/reader_test.go @@ -0,0 +1,101 @@ +package template + +import ( + "context" + "io/fs" + "path/filepath" + "testing" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/cmdio" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuiltInReader(t *testing.T) { + exists := []string{ + "default-python", + "default-sql", + "dbt-sql", + "experimental-jobs-as-code", + } + + for _, name := range exists { + t.Run(name, func(t *testing.T) { + r := &builtinReader{name: name} + fsys, err := r.FS(context.Background()) + assert.NoError(t, err) + assert.NotNil(t, fsys) + + // Assert file content returned is accurate and every template has a welcome + // message defined. + b, err := fs.ReadFile(fsys, "databricks_template_schema.json") + require.NoError(t, err) + assert.Contains(t, string(b), "welcome_message") + }) + } + + t.Run("doesnotexist", func(t *testing.T) { + r := &builtinReader{name: "doesnotexist"} + _, err := r.FS(context.Background()) + assert.EqualError(t, err, "builtin template doesnotexist not found") + }) +} + +func TestGitUrlReader(t *testing.T) { + ctx := cmdio.MockDiscard(context.Background()) + + var args []string + numCalls := 0 + cloneFunc := func(ctx context.Context, url, reference, targetPath string) error { + numCalls++ + args = []string{url, reference, targetPath} + testutil.WriteFile(t, filepath.Join(targetPath, "a", "b", "c", "somefile"), "somecontent") + return nil + } + r := &gitReader{ + gitUrl: "someurl", + cloneFunc: cloneFunc, + ref: "sometag", + templateDir: "a/b/c", + } + + // Assert cloneFunc is called with the correct args. + fsys, err := r.FS(ctx) + require.NoError(t, err) + require.NotEmpty(t, r.tmpRepoDir) + assert.Equal(t, 1, numCalls) + assert.DirExists(t, r.tmpRepoDir) + assert.Equal(t, []string{"someurl", "sometag", r.tmpRepoDir}, args) + + // Assert the fs returned is rooted at the templateDir. + b, err := fs.ReadFile(fsys, "somefile") + require.NoError(t, err) + assert.Equal(t, "somecontent", string(b)) + + // Assert second call to FS returns an error. + _, err = r.FS(ctx) + assert.ErrorContains(t, err, "FS called twice on git reader") + + // Assert the downloaded repository is cleaned up. + _, err = fs.Stat(fsys, ".") + require.NoError(t, err) + r.Cleanup(ctx) + _, err = fs.Stat(fsys, ".") + assert.ErrorIs(t, err, fs.ErrNotExist) +} + +func TestLocalReader(t *testing.T) { + tmpDir := t.TempDir() + testutil.WriteFile(t, filepath.Join(tmpDir, "somefile"), "somecontent") + ctx := context.Background() + + r := &localReader{path: tmpDir} + fsys, err := r.FS(ctx) + require.NoError(t, err) + + // Assert the fs returned is rooted at correct location. + b, err := fs.ReadFile(fsys, "somefile") + require.NoError(t, err) + assert.Equal(t, "somecontent", string(b)) +} diff --git a/libs/template/renderer_test.go b/libs/template/renderer_test.go index b2ec388bd..97030324b 100644 --- a/libs/template/renderer_test.go +++ b/libs/template/renderer_test.go @@ -78,7 +78,7 @@ func assertBuiltinTemplateValid(t *testing.T, template string, settings map[stri b, err := bundle.Load(ctx, filepath.Join(tempDir, "my_project")) require.NoError(t, err) - diags := bundle.Apply(ctx, b, phases.LoadNamedTarget(target)) + diags := phases.LoadNamedTarget(ctx, b, target) require.NoError(t, diags.Error()) // Apply initialize / validation mutators @@ -93,14 +93,12 @@ func assertBuiltinTemplateValid(t *testing.T, template string, settings map[stri b.Tagging = tags.ForCloud(w.Config) b.WorkspaceClient() - diags = bundle.Apply(ctx, b, bundle.Seq( - phases.Initialize(), - )) + diags = phases.Initialize(ctx, b) require.NoError(t, diags.Error()) // Apply build mutator if build { - diags = bundle.Apply(ctx, b, phases.Build()) + diags = phases.Build(ctx, b) require.NoError(t, diags.Error()) } } @@ -116,14 +114,17 @@ func TestBuiltinPythonTemplateValid(t *testing.T) { for _, includeDlt := range options { for _, includePython := range options { for _, isServicePrincipal := range []bool{true, false} { - config := map[string]any{ - "project_name": "my_project", - "include_notebook": includeNotebook, - "include_dlt": includeDlt, - "include_python": includePython, + for _, serverless := range options { + config := map[string]any{ + "project_name": "my_project", + "include_notebook": includeNotebook, + "include_dlt": includeDlt, + "include_python": includePython, + "serverless": serverless, + } + tempDir := t.TempDir() + assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir) } - tempDir := t.TempDir() - assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir) } } } @@ -135,6 +136,7 @@ func TestBuiltinPythonTemplateValid(t *testing.T) { "include_notebook": "yes", "include_dlt": "yes", "include_python": "yes", + "serverless": "yes", } isServicePrincipal = false build = true diff --git a/libs/template/resolver.go b/libs/template/resolver.go new file mode 100644 index 000000000..2cc8bf1c7 --- /dev/null +++ b/libs/template/resolver.go @@ -0,0 +1,122 @@ +package template + +import ( + "context" + "errors" + "strings" + + "github.com/databricks/cli/libs/git" +) + +var gitUrlPrefixes = []string{ + "https://", + "git@", +} + +func isRepoUrl(url string) bool { + result := false + for _, prefix := range gitUrlPrefixes { + if strings.HasPrefix(url, prefix) { + result = true + break + } + } + return result +} + +type Resolver struct { + // One of the following three: + // 1. Path to a local template directory. + // 2. URL to a Git repository containing a template. + // 3. Name of a built-in template. + TemplatePathOrUrl string + + // Path to a JSON file containing the configuration values to be used for + // template initialization. + ConfigFile string + + // Directory to write the initialized template to. + OutputDir string + + // Directory path within a Git repository containing the template. + TemplateDir string + + // Git tag or branch to download the template from. Only one of these can be + // specified. + Tag string + Branch string +} + +// ErrCustomSelected is returned when the user selects the "custom..." option +// in the prompt UI when they run `databricks bundle init`. This error signals +// the upstream callsite to show documentation to the user on how to use a custom +// template. +var ErrCustomSelected = errors.New("custom template selected") + +// Configures the reader and the writer for template and returns +// a handle to the template. +// Prompts the user if needed. +func (r Resolver) Resolve(ctx context.Context) (*Template, error) { + if r.Tag != "" && r.Branch != "" { + return nil, errors.New("only one of tag or branch can be specified") + } + + // Git ref to use for template initialization + ref := r.Branch + if r.Tag != "" { + ref = r.Tag + } + + var err error + var templateName TemplateName + + if r.TemplatePathOrUrl == "" { + // Prompt the user to select a template + // if a template path or URL is not provided. + templateName, err = SelectTemplate(ctx) + if err != nil { + return nil, err + } + } else { + templateName = TemplateName(r.TemplatePathOrUrl) + } + + tmpl := GetDatabricksTemplate(templateName) + + // If we could not find a databricks template with the name provided by the user, + // then we assume that the user provided us with a reference to a custom template. + // + // This reference could be one of: + // 1. Path to a local template directory. + // 2. URL to a Git repository containing a template. + // + // We resolve the appropriate reader according to the reference provided by the user. + if tmpl == nil { + tmpl = &Template{ + name: Custom, + // We use a writer that does not log verbose telemetry for custom templates. + // This is important because template definitions can contain PII that we + // do not want to centralize. + Writer: &defaultWriter{}, + } + + if isRepoUrl(r.TemplatePathOrUrl) { + tmpl.Reader = &gitReader{ + gitUrl: r.TemplatePathOrUrl, + ref: ref, + templateDir: r.TemplateDir, + cloneFunc: git.Clone, + } + } else { + tmpl.Reader = &localReader{ + path: r.TemplatePathOrUrl, + } + } + } + err = tmpl.Writer.Configure(ctx, r.ConfigFile, r.OutputDir) + if err != nil { + return nil, err + } + + return tmpl, nil +} diff --git a/libs/template/resolver_test.go b/libs/template/resolver_test.go new file mode 100644 index 000000000..1dee1c45f --- /dev/null +++ b/libs/template/resolver_test.go @@ -0,0 +1,110 @@ +package template + +import ( + "context" + "testing" + + "github.com/databricks/cli/libs/cmdio" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTemplateResolverBothTagAndBranch(t *testing.T) { + r := Resolver{ + Tag: "tag", + Branch: "branch", + } + + _, err := r.Resolve(context.Background()) + assert.EqualError(t, err, "only one of tag or branch can be specified") +} + +func TestTemplateResolverErrorsWhenPromptingIsNotSupported(t *testing.T) { + r := Resolver{} + ctx := cmdio.MockDiscard(context.Background()) + + _, err := r.Resolve(ctx) + assert.EqualError(t, err, "prompting is not supported. Please specify the path, name or URL of the template to use") +} + +func TestTemplateResolverForDefaultTemplates(t *testing.T) { + for _, name := range []string{ + "default-python", + "default-sql", + "dbt-sql", + } { + t.Run(name, func(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: name, + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + assert.Equal(t, &builtinReader{name: name}, tmpl.Reader) + assert.IsType(t, &writerWithFullTelemetry{}, tmpl.Writer) + }) + } + + t.Run("mlops-stacks", func(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: "mlops-stacks", + ConfigFile: "/config/file", + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + // Assert reader and writer configuration + assert.Equal(t, "https://github.com/databricks/mlops-stacks", tmpl.Reader.(*gitReader).gitUrl) + assert.Equal(t, "/config/file", tmpl.Writer.(*writerWithFullTelemetry).configPath) + }) +} + +func TestTemplateResolverForCustomUrl(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: "https://www.example.com/abc", + Tag: "tag", + TemplateDir: "/template/dir", + ConfigFile: "/config/file", + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + assert.Equal(t, Custom, tmpl.name) + + // Assert reader configuration + assert.Equal(t, "https://www.example.com/abc", tmpl.Reader.(*gitReader).gitUrl) + assert.Equal(t, "tag", tmpl.Reader.(*gitReader).ref) + assert.Equal(t, "/template/dir", tmpl.Reader.(*gitReader).templateDir) + + // Assert writer configuration + assert.Equal(t, "/config/file", tmpl.Writer.(*defaultWriter).configPath) +} + +func TestTemplateResolverForCustomPath(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: "/custom/path", + ConfigFile: "/config/file", + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + assert.Equal(t, Custom, tmpl.name) + + // Assert reader configuration + assert.Equal(t, "/custom/path", tmpl.Reader.(*localReader).path) + + // Assert writer configuration + assert.Equal(t, "/config/file", tmpl.Writer.(*defaultWriter).configPath) +} + +func TestBundleInitIsRepoUrl(t *testing.T) { + assert.True(t, isRepoUrl("git@github.com:databricks/cli.git")) + assert.True(t, isRepoUrl("https://github.com/databricks/cli.git")) + + assert.False(t, isRepoUrl("./local")) + assert.False(t, isRepoUrl("foo")) +} diff --git a/libs/template/template.go b/libs/template/template.go new file mode 100644 index 000000000..44834436b --- /dev/null +++ b/libs/template/template.go @@ -0,0 +1,140 @@ +package template + +import ( + "context" + "errors" + "fmt" + "slices" + "strings" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/git" +) + +type Template struct { + Reader Reader + Writer Writer + + name TemplateName + description string + aliases []string + hidden bool +} + +type TemplateName string + +const ( + DefaultPython TemplateName = "default-python" + DefaultSql TemplateName = "default-sql" + DbtSql TemplateName = "dbt-sql" + MlopsStacks TemplateName = "mlops-stacks" + DefaultPydabs TemplateName = "default-pydabs" + Custom TemplateName = "custom" + ExperimentalJobsAsCode TemplateName = "experimental-jobs-as-code" +) + +var databricksTemplates = []Template{ + { + name: DefaultPython, + description: "The default Python template for Notebooks / Delta Live Tables / Workflows", + Reader: &builtinReader{name: string(DefaultPython)}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: DefaultSql, + description: "The default SQL template for .sql files that run with Databricks SQL", + Reader: &builtinReader{name: string(DefaultSql)}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: DbtSql, + description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", + Reader: &builtinReader{name: string(DbtSql)}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: MlopsStacks, + description: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)", + aliases: []string{"mlops-stack"}, + Reader: &gitReader{gitUrl: "https://github.com/databricks/mlops-stacks", cloneFunc: git.Clone}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: DefaultPydabs, + hidden: true, + description: "The default PyDABs template", + Reader: &gitReader{gitUrl: "https://databricks.github.io/workflows-authoring-toolkit/pydabs-template.git", cloneFunc: git.Clone}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: ExperimentalJobsAsCode, + hidden: true, + description: "Jobs as code template (experimental)", + Reader: &builtinReader{name: string(ExperimentalJobsAsCode)}, + Writer: &writerWithFullTelemetry{}, + }, +} + +func HelpDescriptions() string { + var lines []string + for _, template := range databricksTemplates { + if template.name != Custom && !template.hidden { + lines = append(lines, fmt.Sprintf("- %s: %s", template.name, template.description)) + } + } + return strings.Join(lines, "\n") +} + +var customTemplateDescription = "Bring your own template" + +func options() []cmdio.Tuple { + names := make([]cmdio.Tuple, 0, len(databricksTemplates)) + for _, template := range databricksTemplates { + if template.hidden { + continue + } + tuple := cmdio.Tuple{ + Name: string(template.name), + Id: template.description, + } + names = append(names, tuple) + } + + names = append(names, cmdio.Tuple{ + Name: "custom...", + Id: customTemplateDescription, + }) + return names +} + +func SelectTemplate(ctx context.Context) (TemplateName, error) { + if !cmdio.IsPromptSupported(ctx) { + return "", errors.New("prompting is not supported. Please specify the path, name or URL of the template to use") + } + description, err := cmdio.SelectOrdered(ctx, options(), "Template to use") + if err != nil { + return "", err + } + + if description == customTemplateDescription { + return TemplateName(""), ErrCustomSelected + } + + for _, template := range databricksTemplates { + if template.description == description { + return template.name, nil + } + } + + return "", fmt.Errorf("template with description %s not found", description) +} + +func GetDatabricksTemplate(name TemplateName) *Template { + for _, template := range databricksTemplates { + if template.name == name || slices.Contains(template.aliases, string(name)) { + return &template + } + } + + return nil +} diff --git a/cmd/bundle/init_test.go b/libs/template/template_test.go similarity index 59% rename from cmd/bundle/init_test.go rename to libs/template/template_test.go index 475b2e149..80391e58b 100644 --- a/cmd/bundle/init_test.go +++ b/libs/template/template_test.go @@ -1,4 +1,4 @@ -package bundle +package template import ( "testing" @@ -7,12 +7,23 @@ import ( "github.com/stretchr/testify/assert" ) -func TestBundleInitIsRepoUrl(t *testing.T) { - assert.True(t, isRepoUrl("git@github.com:databricks/cli.git")) - assert.True(t, isRepoUrl("https://github.com/databricks/cli.git")) +func TestTemplateHelpDescriptions(t *testing.T) { + expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows +- default-sql: The default SQL template for .sql files that run with Databricks SQL +- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) +- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)` + assert.Equal(t, expected, HelpDescriptions()) +} - assert.False(t, isRepoUrl("./local")) - assert.False(t, isRepoUrl("foo")) +func TestTemplateOptions(t *testing.T) { + expected := []cmdio.Tuple{ + {Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"}, + {Name: "default-sql", Id: "The default SQL template for .sql files that run with Databricks SQL"}, + {Name: "dbt-sql", Id: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"}, + {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, + {Name: "custom...", Id: "Bring your own template"}, + } + assert.Equal(t, expected, options()) } func TestBundleInitRepoName(t *testing.T) { @@ -27,28 +38,41 @@ func TestBundleInitRepoName(t *testing.T) { assert.Equal(t, "www.github.com", repoName("https://www.github.com")) } -func TestNativeTemplateOptions(t *testing.T) { - expected := []cmdio.Tuple{ - {Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"}, - {Name: "default-sql", Id: "The default SQL template for .sql files that run with Databricks SQL"}, - {Name: "dbt-sql", Id: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"}, - {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, - {Name: "custom...", Id: "Bring your own template"}, +func TestTemplateTelemetryIsCapturedForAllDefaultTemplates(t *testing.T) { + for _, tmpl := range databricksTemplates { + w := tmpl.Writer + + // Assert telemetry is captured for all databricks templates, i.e. templates + // owned by databricks. + assert.IsType(t, &writerWithFullTelemetry{}, w) } - assert.Equal(t, expected, nativeTemplateOptions()) } -func TestNativeTemplateHelpDescriptions(t *testing.T) { - expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows -- default-sql: The default SQL template for .sql files that run with Databricks SQL -- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) -- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)` - assert.Equal(t, expected, nativeTemplateHelpDescriptions()) -} +func TestTemplateGetDatabricksTemplate(t *testing.T) { + names := []TemplateName{ + DefaultPython, + DefaultSql, + DbtSql, + MlopsStacks, + DefaultPydabs, + } -func TestGetUrlForNativeTemplate(t *testing.T) { - assert.Equal(t, "https://github.com/databricks/mlops-stacks", getUrlForNativeTemplate("mlops-stacks")) - assert.Equal(t, "https://github.com/databricks/mlops-stacks", getUrlForNativeTemplate("mlops-stack")) - assert.Equal(t, "", getUrlForNativeTemplate("default-python")) - assert.Equal(t, "", getUrlForNativeTemplate("invalid")) + for _, name := range names { + tmpl := GetDatabricksTemplate(name) + assert.Equal(t, tmpl.name, name) + } + + notExist := []string{ + "/some/path", + "doesnotexist", + "https://www.someurl.com", + } + + for _, name := range notExist { + tmpl := GetDatabricksTemplate(TemplateName(name)) + assert.Nil(t, tmpl) + } + + // Assert the alias works. + assert.Equal(t, MlopsStacks, GetDatabricksTemplate(TemplateName("mlops-stack")).name) } diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/.gitignore b/libs/template/templates/dbt-sql/template/{{.project_name}}/.gitignore new file mode 100644 index 000000000..231162918 --- /dev/null +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/.gitignore @@ -0,0 +1,15 @@ +# DABs +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md + +# dbt +target/ +dbt_packages/ +dbt_modules/ +logs/ diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index ba336f6a1..d991c06ff 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -12,12 +12,12 @@ include: # The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml targets: dev: - default: true # The default target uses 'mode: development' to create a development copy. # - Deployed resources get prefixed with '[dev my_user_name]' # - Any job schedules and triggers are paused by default. # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development + default: true workspace: host: {{workspace_host}} @@ -25,10 +25,8 @@ targets: mode: production workspace: host: {{workspace_host}} - # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy. root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE - run_as: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index d53bad91a..6d42d4115 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -29,6 +29,14 @@ "enum": ["yes", "no"], "description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'", "order": 4 + }, + "serverless": { + "type": "string", + "default": "no", + "enum": ["no", "yes"], + "description": "Use serverless compute", + "skip_prompt_if": {}, + "order": 5 } }, "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 53847a9c9..b8811fa3e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -38,10 +38,16 @@ The '{{.project_name}}' project was generated by using the default-python templa $ databricks bundle run ``` +{{- if (eq .include_python "no") }} 6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. -{{- if (eq .include_python "yes") }} Or read the "getting started" documentation for - **Databricks Connect** for instructions on running the included Python code from a different IDE. +{{- else }} +6. Optionally, install the Databricks extension for Visual Studio code for local development from + https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your + virtual environment and setup Databricks Connect for running unit tests locally. + When not using these tools, consult your development environment's documentation + and/or the documentation for Databricks Connect for manually setting up your environment + (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html). {{- end}} 7. For documentation on the Databricks asset bundles format used diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl index 4d052e38e..04d22a764 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -22,10 +22,8 @@ targets: mode: production workspace: host: {{workspace_host}} - # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy. root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE - run_as: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl index 5211e3894..22434aa64 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl @@ -4,6 +4,7 @@ {{if and (eq .include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} # This job runs {{.project_name}}_pipeline on a schedule. {{end -}} +{{$with_serverless := (eq .serverless "yes") -}} resources: jobs: @@ -29,7 +30,8 @@ resources: tasks: {{- if eq .include_notebook "yes" }} - task_key: notebook_task - job_cluster_key: job_cluster + {{- if not $with_serverless}} + job_cluster_key: job_cluster{{end}} notebook_task: notebook_path: ../src/notebook.ipynb {{end -}} @@ -52,23 +54,41 @@ resources: depends_on: - task_key: notebook_task {{end}} - job_cluster_key: job_cluster + {{- if $with_serverless }} + environment_key: default + {{- else }} + job_cluster_key: job_cluster{{end}} python_wheel_task: package_name: {{.project_name}} entry_point: main + {{- if not $with_serverless }} libraries: # By default we just include the .whl file generated for the {{.project_name}} package. # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html # for more information on how to add other libraries. - whl: ../dist/*.whl +{{- end -}} +{{else}} +{{- end}} +{{if $with_serverless}} + # A list of task execution environment specifications that can be referenced by tasks of this job. + environments: + - environment_key: default - {{else}} - {{end -}} + # Full documentation of this spec can be found at: + # https://docs.databricks.com/api/workspace/jobs/create#environments-spec + spec: + client: "1" + dependencies: + - ../dist/*.whl +{{ else }} job_clusters: - job_cluster_key: job_cluster new_cluster: spark_version: {{template "latest_lts_dbr_version"}} node_type_id: {{smallest_node_type}} + data_security_mode: SINGLE_USER autoscale: min_workers: 1 max_workers: 4 +{{end -}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index 50f11fe2c..024c1ab15 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -1,15 +1,22 @@ +{{$with_serverless := (eq .serverless "yes") -}} # The main pipeline for {{.project_name}} resources: pipelines: {{.project_name}}_pipeline: name: {{.project_name}}_pipeline {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} + {{- if $with_serverless }} + ## Catalog is required for serverless compute + catalog: main{{else}} ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: - # catalog: catalog_name + # catalog: catalog_name{{end}} {{- else}} catalog: {{default_catalog}} {{- end}} target: {{.project_name}}_${bundle.target} + {{- if $with_serverless }} + serverless: true + {{- end}} libraries: - notebook: path: ../src/dlt_pipeline.ipynb diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl index 42164dff0..d3e9beef3 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl @@ -29,7 +29,8 @@ "source": [ {{- if (eq .include_python "yes") }} "import sys\n", - "sys.path.append('../src')\n", + "\n", + "sys.path.append(\"../src\")\n", "from {{.project_name}} import main\n", "\n", "main.get_taxis(spark).show(10)" diff --git a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl index a0852c725..e3b70c605 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl @@ -5,28 +5,32 @@ This file is primarily used by the setuptools library and typically should not be executed directly. See README.md for how to deploy, test, and run the {{.project_name}} project. """ + from setuptools import setup, find_packages import sys -sys.path.append('./src') + +sys.path.append("./src") import datetime import {{.project_name}} +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + setup( name="{{.project_name}}", # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) # to ensure that changes to wheel package are picked up when used on all-purpose clusters - version={{.project_name}}.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"), + version={{.project_name}}.__version__ + "+" + local_version, url="https://databricks.com", author="{{user_name}}", description="wheel file based on {{.project_name}}/src", - packages=find_packages(where='./src'), - package_dir={'': 'src'}, + packages=find_packages(where="./src"), + package_dir={"": "src"}, entry_points={ "packages": [ - "main={{.project_name}}.main:main" - ] + "main={{.project_name}}.main:main", + ], }, install_requires=[ # Dependencies in case the output wheel file is used as a library dependency. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl index 253ed321c..d0286639f 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -35,6 +35,7 @@ "# Import DLT and src/{{.project_name}}\n", "import dlt\n", "import sys\n", + "\n", "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", "from pyspark.sql.functions import expr\n", "from {{.project_name}} import main" @@ -63,17 +64,18 @@ {{- if (eq .include_python "yes") }} "@dlt.view\n", "def taxi_raw():\n", - " return main.get_taxis(spark)\n", + " return main.get_taxis(spark)\n", {{else}} "\n", "@dlt.view\n", "def taxi_raw():\n", - " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", + " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", {{end -}} "\n", + "\n", "@dlt.table\n", "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" ] } ], diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl index c514c6dc5..5ae344c7e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -1,21 +1,25 @@ from pyspark.sql import SparkSession, DataFrame + def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") + return spark.read.table("samples.nyctaxi.trips") # Create a new Databricks Connect session. If this fails, # check that you have configured Databricks Connect correctly. # See https://docs.databricks.com/dev-tools/databricks-connect.html. def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + def main(): - get_taxis(get_spark()).show(5) + get_taxis(get_spark()).show(5) -if __name__ == '__main__': - main() + +if __name__ == "__main__": + main() diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/.gitignore b/libs/template/templates/default-sql/template/{{.project_name}}/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/libs/template/templates/default-sql/template/{{.project_name}}/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl index 84e07df17..6acdf40e7 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -42,7 +42,7 @@ targets: mode: production workspace: host: {{workspace_host}} - # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy. root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} @@ -51,5 +51,3 @@ targets: permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE - run_as: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json new file mode 100644 index 000000000..00d59af5f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json @@ -0,0 +1,28 @@ +{ + "welcome_message": "\nWelcome to (EXPERIMENTAL) \"Jobs as code\" template for Databricks Asset Bundles!", + "properties": { + "project_name": { + "type": "string", + "default": "jobs_as_code_project", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project", + "order": 1, + "pattern": "^[A-Za-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." + }, + "include_notebook": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", + "order": 2 + }, + "include_python": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a stub (sample) Python package in '{{.project_name}}/src'", + "order": 3 + } + }, + "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." +} diff --git a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl new file mode 100644 index 000000000..7d0c88e7d --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl @@ -0,0 +1,7 @@ +{{define "latest_lts_dbr_version" -}} + 15.4.x-scala2.12 +{{- end}} + +{{define "latest_lts_db_connect_version_spec" -}} + >=15.4,<15.5 +{{- end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl new file mode 100644 index 000000000..2f8e8ae3e --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl @@ -0,0 +1,30 @@ +# Preamble + +This file only template directives; it is skipped for the actual output. + +{{skip "__preamble"}} + +# TODO add DLT support, placeholder for now +{{$notDLT := true }} +{{$notNotebook := not (eq .include_notebook "yes")}} +{{$notPython := not (eq .include_python "yes")}} + +{{if $notPython}} + {{skip "{{.project_name}}/src/{{.project_name}}"}} + {{skip "{{.project_name}}/tests/main_test.py"}} +{{end}} + +{{if $notDLT}} + {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.py"}} +{{end}} + +{{if $notNotebook}} + {{skip "{{.project_name}}/src/notebook.ipynb"}} +{{end}} + +{{if (and $notDLT $notNotebook $notPython)}} + {{skip "{{.project_name}}/resources/{{.project_name}}_job.py"}} +{{else}} + {{skip "{{.project_name}}/resources/.gitkeep"}} +{{end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl new file mode 100644 index 000000000..497ce3723 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl @@ -0,0 +1,60 @@ +# {{.project_name}} + +The '{{.project_name}}' project was generated by using the "Jobs as code" template. + +## Prerequisites + +1. Install Databricks CLI 0.238 or later. + See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). + +2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). + We use uv to create a virtual environment and install the required dependencies. + +3. Authenticate to your Databricks workspace if you have not done so already: + ``` + $ databricks configure + ``` + +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. + {{- if (eq .include_python "yes") }} Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + {{- end}} + +5. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. + +## Deploy and run jobs + +1. Create a new virtual environment and install the required dependencies: + ``` + $ uv sync + ``` + +2. To deploy the bundle to the development target: + ``` + $ databricks bundle deploy --target dev + ``` + + *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] {{.project_name}}_job` to your workspace. + You can find that job by opening your workspace and clicking on **Workflows**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/{{.project_name}}_job.py). The schedule + is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). + +4. To run a job: + ``` + $ databricks bundle run + ``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 000000000..758ec3f16 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,51 @@ +# This is a Databricks asset bundle definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: {{.project_name}} + uuid: {{bundle_uuid}} + databricks_cli_version: ">= 0.238.0" + +experimental: + python: + # Activate virtual environment before loading resources defined in Python. + # If disabled, defaults to using the Python interpreter available in the current shell. + venv_path: .venv + # Functions called to load resources defined in Python. See resources/__init__.py + resources: + - "resources:load_resources" + +{{ if .include_python -}} +artifacts: + default: + type: whl + path: . + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build + +{{ end -}} +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: {{workspace_host}} + + prod: + mode: production + workspace: + host: {{workspace_host}} + # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE + run_as: + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl new file mode 100644 index 000000000..ee9570302 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl @@ -0,0 +1,27 @@ +# Fixtures +{{- /* +We don't want to have too many README.md files, since they +stand out so much. But we do need to have a file here to make +sure the folder is added to Git. +*/}} + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl new file mode 100644 index 000000000..cee0d8946 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl @@ -0,0 +1,57 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "{{.project_name}}" +requires-python = ">=3.10" +description = "wheel file based on {{.project_name}}" + +# Dependencies in case the output wheel file is used as a library dependency. +# For defining dependencies, when this package is used in Databricks, see: +# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +# +# Example: +# dependencies = [ +# "requests==x.y.z", +# ] +dependencies = [ +] + +# see setup.py +dynamic = ["version"] + +{{ if .include_python -}} +[project.entry-points.packages] +main = "{{.project_name}}.main:main" + +{{ end -}} + +[tool.setuptools] +{{ if .include_python -}} +py-modules = ["resources", "{{.project_name}}"] + +{{ else }} +py-modules = ["resources"] + +{{ end -}} +[tool.uv] +## Dependencies for local development +dev-dependencies = [ + "databricks-bundles==0.7.0", + + ## Add code completion support for DLT + # "databricks-dlt", + + ## databricks-connect can be used to run parts of this project locally. + ## See https://docs.databricks.com/dev-tools/databricks-connect.html. + ## + ## Uncomment line below to install a version of db-connect that corresponds to + ## the Databricks Runtime version used for this project. + # "databricks-connect{{template "latest_lts_db_connect_version_spec"}}", +] + +override-dependencies = [ + # pyspark package conflicts with 'databricks-connect' + "pyspark; sys_platform == 'never'", +] diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py new file mode 100644 index 000000000..fbcb9dc5f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import ( + Bundle, + Resources, + load_resources_from_current_package_module, +) + + +def load_resources(bundle: Bundle) -> Resources: + """ + 'load_resources' function is referenced in databricks.yml and is responsible for loading + bundle resources defined in Python code. This function is called by Databricks CLI during + bundle deployment. After deployment, this function is not used. + """ + + # the default implementation loads all Python files in 'resources' directory + return load_resources_from_current_package_module() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl new file mode 100644 index 000000000..d9d248799 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl @@ -0,0 +1,109 @@ +{{$include_dlt := "no" -}} +from databricks.bundles.jobs import Job + +""" +The main job for {{.project_name}}. + +{{- /* Clarify what this job is for for DLT-only users. */}} +{{if and (eq $include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} +This job runs {{.project_name}}_pipeline on a schedule. +{{end -}} +""" + + +{{.project_name}}_job = Job.from_dict( + { + "name": "{{.project_name}}_job", + "trigger": { + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + "periodic": { + "interval": 1, + "unit": "DAYS", + }, + }, + {{- if not is_service_principal}} + "email_notifications": { + "on_failure": [ + "{{user_name}}", + ], + }, + {{else}} + {{- end -}} + "tasks": [ + {{- if eq .include_notebook "yes" -}} + {{- "\n " -}} + { + "task_key": "notebook_task", + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "src/notebook.ipynb", + }, + }, + {{- end -}} + {{- if (eq $include_dlt "yes") -}} + {{- "\n " -}} + { + "task_key": "refresh_pipeline", + {{- if (eq .include_notebook "yes" )}} + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + {{- end}} + "pipeline_task": { + {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}} + "pipeline_id": "${resources.pipelines.{{.project_name}}_pipeline.id}", + }, + }, + {{- end -}} + {{- if (eq .include_python "yes") -}} + {{- "\n " -}} + { + "task_key": "main_task", + {{- if (eq $include_dlt "yes") }} + "depends_on": [ + { + "task_key": "refresh_pipeline", + }, + ], + {{- else if (eq .include_notebook "yes" )}} + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + {{- end}} + "job_cluster_key": "job_cluster", + "python_wheel_task": { + "package_name": "{{.project_name}}", + "entry_point": "main", + }, + "libraries": [ + # By default we just include the .whl file generated for the {{.project_name}} package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + { + "whl": "dist/*.whl", + }, + ], + }, + {{- end -}} + {{""}} + ], + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "spark_version": "{{template "latest_lts_dbr_version"}}", + "node_type_id": "{{smallest_node_type}}", + "data_security_mode": "SINGLE_USER", + "autoscale": { + "min_workers": 1, + "max_workers": 4, + }, + }, + }, + ], + } +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl new file mode 100644 index 000000000..c8579ae65 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl @@ -0,0 +1,24 @@ +from databricks.bundles.pipelines import Pipeline + +{{.project_name}}_pipeline = Pipeline.from_dict( + { + "name": "{{.project_name}}_pipeline", + "target": "{{.project_name}}_${bundle.target}", + {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + "catalog": "catalog_name", + {{- else}} + "catalog": "{{default_catalog}}", + {{- end}} + "libraries": [ + { + "notebook": { + "path": "src/dlt_pipeline.ipynb", + }, + }, + ], + "configuration": { + "bundle.sourcePath": "${workspace.file_path}/src", + }, + } +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl new file mode 100644 index 000000000..19c9d0ebe --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl @@ -0,0 +1,18 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the {{.project_name}} project. +""" + +import os + +from setuptools import setup + +local_version = os.getenv("LOCAL_VERSION") +version = "0.0.1" + +setup( + version=f"{version}+{local_version}" if local_version else version, +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl new file mode 100644 index 000000000..629106dbf --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "# Import DLT and src/{{.project_name}}\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from {{.project_name}} import main" + {{else}} + "import dlt\n", + "from pyspark.sql.functions import expr\n", + "from pyspark.sql import SparkSession\n", + "\n", + "spark = SparkSession.builder.getOrCreate()" + {{end -}} + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + {{else}} + "@dlt.view\n", + "def taxi_raw():\n", + " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", + {{end -}} + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl new file mode 100644 index 000000000..6782a053b --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -0,0 +1,79 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "ee353e42-ff58-4955-9608-12865bd0950e", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "from {{.project_name}} import main\n", + "\n", + "main.get_taxis(spark).show(10)" + {{else}} + "spark.range(10)" + {{end -}} + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl new file mode 100644 index 000000000..e69de29bb diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl new file mode 100644 index 000000000..6f89fca53 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl @@ -0,0 +1,8 @@ +from {{.project_name}}.main import get_taxis, get_spark + +# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/libs/template/writer.go b/libs/template/writer.go new file mode 100644 index 000000000..e3d5af583 --- /dev/null +++ b/libs/template/writer.go @@ -0,0 +1,171 @@ +package template + +import ( + "context" + "errors" + "fmt" + "io/fs" + "path/filepath" + "strings" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/filer" +) + +const ( + libraryDirName = "library" + templateDirName = "template" + schemaFileName = "databricks_template_schema.json" +) + +type Writer interface { + // Configure the writer with: + // 1. The path to the config file (if any) that contains input values for the + // template. + // 2. The output directory where the template will be materialized. + Configure(ctx context.Context, configPath, outputDir string) error + + // Materialize the template to the local file system. + Materialize(ctx context.Context, r Reader) error +} + +type defaultWriter struct { + configPath string + outputFiler filer.Filer + + // Internal state + config *config + renderer *renderer +} + +func constructOutputFiler(ctx context.Context, outputDir string) (filer.Filer, error) { + outputDir, err := filepath.Abs(outputDir) + if err != nil { + return nil, err + } + + // If the CLI is running on DBR and we're writing to the workspace file system, + // use the extension-aware workspace filesystem filer to instantiate the template. + // + // It is not possible to write notebooks through the workspace filesystem's FUSE mount. + // Therefore this is the only way we can initialize templates that contain notebooks + // when running the CLI on DBR and initializing a template to the workspace. + // + if strings.HasPrefix(outputDir, "/Workspace/") && dbr.RunsOnRuntime(ctx) { + return filer.NewWorkspaceFilesExtensionsClient(root.WorkspaceClient(ctx), outputDir) + } + + return filer.NewLocalClient(outputDir) +} + +func (tmpl *defaultWriter) Configure(ctx context.Context, configPath, outputDir string) error { + tmpl.configPath = configPath + + outputFiler, err := constructOutputFiler(ctx, outputDir) + if err != nil { + return err + } + + tmpl.outputFiler = outputFiler + return nil +} + +func (tmpl *defaultWriter) promptForInput(ctx context.Context, reader Reader) error { + readerFs, err := reader.FS(ctx) + if err != nil { + return err + } + if _, err := fs.Stat(readerFs, schemaFileName); errors.Is(err, fs.ErrNotExist) { + return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName) + } + + tmpl.config, err = newConfig(ctx, readerFs, schemaFileName) + if err != nil { + return err + } + + // Read and assign config values from file + if tmpl.configPath != "" { + err = tmpl.config.assignValuesFromFile(tmpl.configPath) + if err != nil { + return err + } + } + + helpers := loadHelpers(ctx) + tmpl.renderer, err = newRenderer(ctx, tmpl.config.values, helpers, readerFs, templateDirName, libraryDirName) + if err != nil { + return err + } + + // Print welcome message + welcome := tmpl.config.schema.WelcomeMessage + if welcome != "" { + welcome, err = tmpl.renderer.executeTemplate(welcome) + if err != nil { + return err + } + cmdio.LogString(ctx, welcome) + } + + // Prompt user for any missing config values. Assign default values if + // terminal is not TTY + err = tmpl.config.promptOrAssignDefaultValues(tmpl.renderer) + if err != nil { + return err + } + return tmpl.config.validate() +} + +func (tmpl *defaultWriter) printSuccessMessage(ctx context.Context) error { + success := tmpl.config.schema.SuccessMessage + if success == "" { + cmdio.LogString(ctx, "✨ Successfully initialized template") + return nil + } + + success, err := tmpl.renderer.executeTemplate(success) + if err != nil { + return err + } + cmdio.LogString(ctx, success) + return nil +} + +func (tmpl *defaultWriter) Materialize(ctx context.Context, reader Reader) error { + err := tmpl.promptForInput(ctx, reader) + if err != nil { + return err + } + + // Walk the template file tree and compute in-memory representations of the + // output files. + err = tmpl.renderer.walk() + if err != nil { + return err + } + + // Flush the output files to disk. + err = tmpl.renderer.persistToDisk(ctx, tmpl.outputFiler) + if err != nil { + return err + } + + return tmpl.printSuccessMessage(ctx) +} + +func (tmpl *defaultWriter) LogTelemetry(ctx context.Context) error { + // TODO, only log the template name and uuid. + return nil +} + +type writerWithFullTelemetry struct { + defaultWriter +} + +func (tmpl *writerWithFullTelemetry) LogTelemetry(ctx context.Context) error { + // TODO, log template name, uuid and enum args as well. + return nil +} diff --git a/libs/template/writer_test.go b/libs/template/writer_test.go new file mode 100644 index 000000000..9d57966ee --- /dev/null +++ b/libs/template/writer_test.go @@ -0,0 +1,58 @@ +package template + +import ( + "context" + "runtime" + "testing" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go" + workspaceConfig "github.com/databricks/databricks-sdk-go/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDefaultWriterConfigure(t *testing.T) { + // Test on local file system. + w := &defaultWriter{} + err := w.Configure(context.Background(), "/foo/bar", "/out/abc") + assert.NoError(t, err) + + assert.Equal(t, "/foo/bar", w.configPath) + assert.IsType(t, &filer.LocalClient{}, w.outputFiler) +} + +func TestDefaultWriterConfigureOnDBR(t *testing.T) { + // This test is not valid on windows because a DBR image is always based on + // Linux. + if runtime.GOOS == "windows" { + t.Skip("Skipping test on Windows") + } + + ctx := dbr.MockRuntime(context.Background(), true) + ctx = root.SetWorkspaceClient(ctx, &databricks.WorkspaceClient{ + Config: &workspaceConfig.Config{Host: "https://myhost.com"}, + }) + w := &defaultWriter{} + err := w.Configure(ctx, "/foo/bar", "/Workspace/out/abc") + assert.NoError(t, err) + + assert.Equal(t, "/foo/bar", w.configPath) + assert.IsType(t, &filer.WorkspaceFilesExtensionsClient{}, w.outputFiler) +} + +func TestMaterializeForNonTemplateDirectory(t *testing.T) { + tmpDir1 := t.TempDir() + tmpDir2 := t.TempDir() + ctx := context.Background() + + w := &defaultWriter{} + err := w.Configure(ctx, "/foo/bar", tmpDir1) + require.NoError(t, err) + + // Try to materialize a non-template directory. + err = w.Materialize(ctx, &localReader{path: tmpDir2}) + assert.EqualError(t, err, "not a bundle template: expected to find a template schema file at databricks_template_schema.json") +} diff --git a/libs/testdiff/context.go b/libs/testdiff/context.go new file mode 100644 index 000000000..7b6f5ff88 --- /dev/null +++ b/libs/testdiff/context.go @@ -0,0 +1,34 @@ +package testdiff + +import ( + "context" +) + +type key int + +const ( + replacementsMapKey = key(1) +) + +func WithReplacementsMap(ctx context.Context) (context.Context, *ReplacementsContext) { + value := ctx.Value(replacementsMapKey) + if value != nil { + if existingMap, ok := value.(*ReplacementsContext); ok { + return ctx, existingMap + } + } + + newMap := &ReplacementsContext{} + ctx = context.WithValue(ctx, replacementsMapKey, newMap) + return ctx, newMap +} + +func GetReplacementsMap(ctx context.Context) *ReplacementsContext { + value := ctx.Value(replacementsMapKey) + if value != nil { + if existingMap, ok := value.(*ReplacementsContext); ok { + return existingMap + } + } + return nil +} diff --git a/libs/testdiff/context_test.go b/libs/testdiff/context_test.go new file mode 100644 index 000000000..5a0191009 --- /dev/null +++ b/libs/testdiff/context_test.go @@ -0,0 +1,30 @@ +package testdiff + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetReplacementsMap_Nil(t *testing.T) { + ctx := context.Background() + repls := GetReplacementsMap(ctx) + assert.Nil(t, repls) +} + +func TestGetReplacementsMap_NotNil(t *testing.T) { + ctx := context.Background() + ctx, _ = WithReplacementsMap(ctx) + repls := GetReplacementsMap(ctx) + assert.NotNil(t, repls) +} + +func TestWithReplacementsMap_UseExisting(t *testing.T) { + ctx := context.Background() + ctx, r1 := WithReplacementsMap(ctx) + ctx, r2 := WithReplacementsMap(ctx) + repls := GetReplacementsMap(ctx) + assert.Equal(t, r1, repls) + assert.Equal(t, r2, repls) +} diff --git a/libs/testdiff/golden.go b/libs/testdiff/golden.go index 08d1e9608..c1c51b6c5 100644 --- a/libs/testdiff/golden.go +++ b/libs/testdiff/golden.go @@ -3,17 +3,11 @@ package testdiff import ( "context" "flag" - "fmt" "os" - "regexp" - "slices" "strings" "testing" "github.com/databricks/cli/internal/testutil" - "github.com/databricks/cli/libs/iamutil" - "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" ) @@ -71,12 +65,6 @@ func AssertOutputJQ(t testutil.TestingT, ctx context.Context, out, outTitle, exp } } -var ( - uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`) - numIdRegex = regexp.MustCompile(`[0-9]{3,}`) - privatePathRegex = regexp.MustCompile(`(/tmp|/private)(/.*)/([a-zA-Z0-9]+)`) -) - func ReplaceOutput(t testutil.TestingT, ctx context.Context, out string) string { t.Helper() out = NormalizeNewlines(out) @@ -84,136 +72,7 @@ func ReplaceOutput(t testutil.TestingT, ctx context.Context, out string) string if replacements == nil { t.Fatal("WithReplacementsMap was not called") } - out = replacements.Replace(out) - out = uuidRegex.ReplaceAllString(out, "") - out = numIdRegex.ReplaceAllString(out, "") - out = privatePathRegex.ReplaceAllString(out, "/tmp/.../$3") - - return out -} - -type key int - -const ( - replacementsMapKey = key(1) -) - -type Replacement struct { - Old string - New string -} - -type ReplacementsContext struct { - Repls []Replacement -} - -func (r *ReplacementsContext) Replace(s string) string { - // QQQ Should probably only replace whole words - for _, repl := range r.Repls { - s = strings.ReplaceAll(s, repl.Old, repl.New) - } - return s -} - -func (r *ReplacementsContext) Set(old, new string) { - if old == "" || new == "" { - return - } - r.Repls = append(r.Repls, Replacement{Old: old, New: new}) -} - -func WithReplacementsMap(ctx context.Context) (context.Context, *ReplacementsContext) { - value := ctx.Value(replacementsMapKey) - if value != nil { - if existingMap, ok := value.(*ReplacementsContext); ok { - return ctx, existingMap - } - } - - newMap := &ReplacementsContext{} - ctx = context.WithValue(ctx, replacementsMapKey, newMap) - return ctx, newMap -} - -func GetReplacementsMap(ctx context.Context) *ReplacementsContext { - value := ctx.Value(replacementsMapKey) - if value != nil { - if existingMap, ok := value.(*ReplacementsContext); ok { - return existingMap - } - } - return nil -} - -func PrepareReplacements(t testutil.TestingT, r *ReplacementsContext, w *databricks.WorkspaceClient) { - t.Helper() - // in some clouds (gcp) w.Config.Host includes "https://" prefix in others it's really just a host (azure) - host := strings.TrimPrefix(strings.TrimPrefix(w.Config.Host, "http://"), "https://") - r.Set(host, "$DATABRICKS_HOST") - r.Set(w.Config.ClusterID, "$DATABRICKS_CLUSTER_ID") - r.Set(w.Config.WarehouseID, "$DATABRICKS_WAREHOUSE_ID") - r.Set(w.Config.ServerlessComputeID, "$DATABRICKS_SERVERLESS_COMPUTE_ID") - r.Set(w.Config.MetadataServiceURL, "$DATABRICKS_METADATA_SERVICE_URL") - r.Set(w.Config.AccountID, "$DATABRICKS_ACCOUNT_ID") - r.Set(w.Config.Token, "$DATABRICKS_TOKEN") - r.Set(w.Config.Username, "$DATABRICKS_USERNAME") - r.Set(w.Config.Password, "$DATABRICKS_PASSWORD") - r.Set(w.Config.Profile, "$DATABRICKS_CONFIG_PROFILE") - r.Set(w.Config.ConfigFile, "$DATABRICKS_CONFIG_FILE") - r.Set(w.Config.GoogleServiceAccount, "$DATABRICKS_GOOGLE_SERVICE_ACCOUNT") - r.Set(w.Config.GoogleCredentials, "$GOOGLE_CREDENTIALS") - r.Set(w.Config.AzureResourceID, "$DATABRICKS_AZURE_RESOURCE_ID") - r.Set(w.Config.AzureClientSecret, "$ARM_CLIENT_SECRET") - // r.Set(w.Config.AzureClientID, "$ARM_CLIENT_ID") - r.Set(w.Config.AzureClientID, "$USERNAME") - r.Set(w.Config.AzureTenantID, "$ARM_TENANT_ID") - r.Set(w.Config.ActionsIDTokenRequestURL, "$ACTIONS_ID_TOKEN_REQUEST_URL") - r.Set(w.Config.ActionsIDTokenRequestToken, "$ACTIONS_ID_TOKEN_REQUEST_TOKEN") - r.Set(w.Config.AzureEnvironment, "$ARM_ENVIRONMENT") - r.Set(w.Config.ClientID, "$DATABRICKS_CLIENT_ID") - r.Set(w.Config.ClientSecret, "$DATABRICKS_CLIENT_SECRET") - r.Set(w.Config.DatabricksCliPath, "$DATABRICKS_CLI_PATH") - // This is set to words like "path" that happen too frequently - // r.Set(w.Config.AuthType, "$DATABRICKS_AUTH_TYPE") -} - -func PrepareReplacementsUser(t testutil.TestingT, r *ReplacementsContext, u iam.User) { - t.Helper() - // There could be exact matches or overlap between different name fields, so sort them by length - // to ensure we match the largest one first and map them all to the same token - names := []string{ - u.DisplayName, - u.UserName, - iamutil.GetShortUserName(&u), - } - if u.Name != nil { - names = append(names, u.Name.FamilyName) - names = append(names, u.Name.GivenName) - } - for _, val := range u.Emails { - names = append(names, val.Value) - } - stableSortReverseLength(names) - - for _, name := range names { - r.Set(name, "$USERNAME") - } - - for ind, val := range u.Groups { - r.Set(val.Value, fmt.Sprintf("$USER.Groups[%d]", ind)) - } - - r.Set(u.Id, "$USER.Id") - - for ind, val := range u.Roles { - r.Set(val.Value, fmt.Sprintf("$USER.Roles[%d]", ind)) - } -} - -func stableSortReverseLength(strs []string) { - slices.SortStableFunc(strs, func(a, b string) int { - return len(b) - len(a) - }) + return replacements.Replace(out) } func NormalizeNewlines(input string) string { diff --git a/libs/testdiff/golden_test.go b/libs/testdiff/golden_test.go deleted file mode 100644 index 0fc32be21..000000000 --- a/libs/testdiff/golden_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package testdiff - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestSort(t *testing.T) { - input := []string{"a", "bc", "cd"} - stableSortReverseLength(input) - assert.Equal(t, []string{"bc", "cd", "a"}, input) -} diff --git a/libs/testdiff/replacement.go b/libs/testdiff/replacement.go new file mode 100644 index 000000000..d4d5eb27b --- /dev/null +++ b/libs/testdiff/replacement.go @@ -0,0 +1,228 @@ +package testdiff + +import ( + "encoding/json" + "path/filepath" + "regexp" + "runtime" + "slices" + "strings" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/iamutil" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/iam" + "golang.org/x/mod/semver" +) + +const ( + testerName = "[USERNAME]" +) + +var ( + uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`) + numIdRegex = regexp.MustCompile(`[0-9]{3,}`) + privatePathRegex = regexp.MustCompile(`(/tmp|/private)(/.*)/([a-zA-Z0-9]+)`) + // Version could v0.0.0-dev+21e1aacf518a or just v0.0.0-dev (the latter is currently the case on Windows) + devVersionRegex = regexp.MustCompile(`0\.0\.0-dev(\+[a-f0-9]{10,16})?`) +) + +type Replacement struct { + Old *regexp.Regexp + New string +} + +type ReplacementsContext struct { + Repls []Replacement +} + +func (r *ReplacementsContext) Clone() ReplacementsContext { + return ReplacementsContext{Repls: slices.Clone(r.Repls)} +} + +func (r *ReplacementsContext) Replace(s string) string { + // QQQ Should probably only replace whole words + for _, repl := range r.Repls { + s = repl.Old.ReplaceAllString(s, repl.New) + } + return s +} + +func (r *ReplacementsContext) append(pattern *regexp.Regexp, replacement string) { + r.Repls = append(r.Repls, Replacement{ + Old: pattern, + New: replacement, + }) +} + +func (r *ReplacementsContext) appendLiteral(old, new string) { + r.append( + // Transform the input strings such that they can be used as literal strings in regular expressions. + regexp.MustCompile(regexp.QuoteMeta(old)), + // Transform the replacement string such that `$` is interpreted as a literal dollar sign. + // For more information about how the replacement string is used, see [regexp.Regexp.Expand]. + strings.ReplaceAll(new, `$`, `$$`), + ) +} + +func (r *ReplacementsContext) Set(old, new string) { + if old == "" || new == "" { + return + } + + // Always include both verbatim and json version of replacement. + // This helps when the string in question contains \ or other chars that need to be quoted. + // In that case we cannot rely that json(old) == '"{old}"' and need to add it explicitly. + + encodedNew, err := json.Marshal(new) + if err == nil { + encodedOld, err := json.Marshal(old) + if err == nil { + encodedStrNew := trimQuotes(string(encodedNew)) + encodedStrOld := trimQuotes(string(encodedOld)) + if encodedStrNew != new || encodedStrOld != old { + r.appendLiteral(encodedStrOld, encodedStrNew) + } + } + } + + r.appendLiteral(old, new) +} + +func trimQuotes(s string) string { + if len(s) > 0 && s[0] == '"' { + s = s[1:] + } + if len(s) > 0 && s[len(s)-1] == '"' { + s = s[:len(s)-1] + } + return s +} + +func (r *ReplacementsContext) SetPath(old, new string) { + if old != "" && old != "." { + // Converts C:\Users\DENIS~1.BIL -> C:\Users\denis.bilenko + oldEvalled, err1 := filepath.EvalSymlinks(old) + if err1 == nil && oldEvalled != old { + r.SetPathNoEval(oldEvalled, new) + } + } + + r.SetPathNoEval(old, new) +} + +func (r *ReplacementsContext) SetPathNoEval(old, new string) { + r.Set(old, new) + + if runtime.GOOS != "windows" { + return + } + + // Support both forward and backward slashes + m1 := strings.ReplaceAll(old, "\\", "/") + if m1 != old { + r.Set(m1, new) + } + + m2 := strings.ReplaceAll(old, "/", "\\") + if m2 != old && m2 != m1 { + r.Set(m2, new) + } +} + +func (r *ReplacementsContext) SetPathWithParents(old, new string) { + r.SetPath(old, new) + r.SetPath(filepath.Dir(old), new+"_PARENT") + r.SetPath(filepath.Dir(filepath.Dir(old)), new+"_GPARENT") +} + +func PrepareReplacementsWorkspaceClient(t testutil.TestingT, r *ReplacementsContext, w *databricks.WorkspaceClient) { + t.Helper() + // in some clouds (gcp) w.Config.Host includes "https://" prefix in others it's really just a host (azure) + host := strings.TrimPrefix(strings.TrimPrefix(w.Config.Host, "http://"), "https://") + r.Set("https://"+host, "[DATABRICKS_URL]") + r.Set("http://"+host, "[DATABRICKS_URL]") + r.Set(host, "[DATABRICKS_HOST]") + r.Set(w.Config.ClusterID, "[DATABRICKS_CLUSTER_ID]") + r.Set(w.Config.WarehouseID, "[DATABRICKS_WAREHOUSE_ID]") + r.Set(w.Config.ServerlessComputeID, "[DATABRICKS_SERVERLESS_COMPUTE_ID]") + r.Set(w.Config.AccountID, "[DATABRICKS_ACCOUNT_ID]") + r.Set(w.Config.Username, "[DATABRICKS_USERNAME]") + r.SetPath(w.Config.Profile, "[DATABRICKS_CONFIG_PROFILE]") + r.Set(w.Config.ConfigFile, "[DATABRICKS_CONFIG_FILE]") + r.Set(w.Config.GoogleServiceAccount, "[DATABRICKS_GOOGLE_SERVICE_ACCOUNT]") + r.Set(w.Config.AzureResourceID, "[DATABRICKS_AZURE_RESOURCE_ID]") + r.Set(w.Config.AzureClientID, testerName) + r.Set(w.Config.AzureTenantID, "[ARM_TENANT_ID]") + r.Set(w.Config.AzureEnvironment, "[ARM_ENVIRONMENT]") + r.Set(w.Config.ClientID, "[DATABRICKS_CLIENT_ID]") + r.SetPath(w.Config.DatabricksCliPath, "[DATABRICKS_CLI_PATH]") + // This is set to words like "path" that happen too frequently + // r.Set(w.Config.AuthType, "[DATABRICKS_AUTH_TYPE]") +} + +func PrepareReplacementsUser(t testutil.TestingT, r *ReplacementsContext, u iam.User) { + t.Helper() + // There could be exact matches or overlap between different name fields, so sort them by length + // to ensure we match the largest one first and map them all to the same token + + r.Set(u.UserName, testerName) + r.Set(u.DisplayName, testerName) + if u.Name != nil { + r.Set(u.Name.FamilyName, testerName) + r.Set(u.Name.GivenName, testerName) + } + + for _, val := range u.Emails { + r.Set(val.Value, testerName) + } + + r.Set(iamutil.GetShortUserName(&u), testerName) + + for _, val := range u.Groups { + r.Set(val.Value, "[USERGROUP]") + } + + r.Set(u.Id, "[USERID]") + + for _, val := range u.Roles { + r.Set(val.Value, "[USERROLE]") + } +} + +func PrepareReplacementsUUID(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(uuidRegex, "[UUID]") +} + +func PrepareReplacementsNumber(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(numIdRegex, "[NUMID]") +} + +func PrepareReplacementsTemporaryDirectory(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(privatePathRegex, "/tmp/.../$3") +} + +func PrepareReplacementsDevVersion(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(devVersionRegex, "[DEV_VERSION]") +} + +func PrepareReplacementSdkVersion(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.Set(databricks.Version(), "[SDK_VERSION]") +} + +func goVersion() string { + gv := runtime.Version() + ssv := strings.ReplaceAll(gv, "go", "v") + sv := semver.Canonical(ssv) + return strings.TrimPrefix(sv, "v") +} + +func PrepareReplacementsGoVersion(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.Set(goVersion(), "[GO_VERSION]") +} diff --git a/libs/testdiff/replacement_test.go b/libs/testdiff/replacement_test.go new file mode 100644 index 000000000..1b6c5fe2d --- /dev/null +++ b/libs/testdiff/replacement_test.go @@ -0,0 +1,46 @@ +package testdiff + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestReplacement_Literal(t *testing.T) { + var repls ReplacementsContext + + repls.Set(`foobar`, `[replacement]`) + assert.Equal(t, `[replacement]`, repls.Replace(`foobar`)) +} + +func TestReplacement_Encoded(t *testing.T) { + var repls ReplacementsContext + + repls.Set(`foo"bar`, `[replacement]`) + assert.Equal(t, `"[replacement]"`, repls.Replace(`"foo\"bar"`)) +} + +func TestReplacement_UUID(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsUUID(t, &repls) + + assert.Equal(t, "[UUID]", repls.Replace("123e4567-e89b-12d3-a456-426614174000")) +} + +func TestReplacement_Number(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsNumber(t, &repls) + + assert.Equal(t, "12", repls.Replace("12")) + assert.Equal(t, "[NUMID]", repls.Replace("123")) +} + +func TestReplacement_TemporaryDirectory(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsTemporaryDirectory(t, &repls) + + assert.Equal(t, "/tmp/.../tail", repls.Replace("/tmp/foo/bar/qux/tail")) +} diff --git a/libs/testdiff/testdiff.go b/libs/testdiff/testdiff.go index fef1d5ae2..f65adf7f7 100644 --- a/libs/testdiff/testdiff.go +++ b/libs/testdiff/testdiff.go @@ -17,18 +17,20 @@ func UnifiedDiff(filename1, filename2, s1, s2 string) string { return fmt.Sprint(gotextdiff.ToUnified(filename1, filename2, s1, edits)) } -func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out string) { +func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out string) bool { t.Helper() if len(out) < 1000 && len(expected) < 1000 { // This shows full strings + diff which could be useful when debugging newlines - assert.Equal(t, expected, out, "%s vs %s", filename1, filename2) + return assert.Equal(t, expected, out, "%s vs %s", filename1, filename2) } else { // only show diff for large texts diff := UnifiedDiff(filename1, filename2, expected, out) if diff != "" { - t.Errorf("Diff:\n" + diff) + t.Error("Diff:\n" + diff) + return false } } + return true } func AssertEqualJQ(t testutil.TestingT, expectedName, outName, expected, out string, ignorePaths []string) { diff --git a/libs/testserver/fake_workspace.go b/libs/testserver/fake_workspace.go new file mode 100644 index 000000000..4e943f828 --- /dev/null +++ b/libs/testserver/fake_workspace.go @@ -0,0 +1,175 @@ +package testserver + +import ( + "bytes" + "encoding/json" + "fmt" + "sort" + "strconv" + "strings" + + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/databricks/databricks-sdk-go/service/workspace" +) + +// FakeWorkspace holds a state of a workspace for acceptance tests. +type FakeWorkspace struct { + directories map[string]bool + files map[string][]byte + // normally, ids are not sequential, but we make them sequential for deterministic diff + nextJobId int64 + jobs map[int64]jobs.Job +} + +func NewFakeWorkspace() *FakeWorkspace { + return &FakeWorkspace{ + directories: map[string]bool{ + "/Workspace": true, + }, + files: map[string][]byte{}, + jobs: map[int64]jobs.Job{}, + nextJobId: 1, + } +} + +func (s *FakeWorkspace) WorkspaceGetStatus(path string) Response { + if s.directories[path] { + return Response{ + Body: &workspace.ObjectInfo{ + ObjectType: "DIRECTORY", + Path: path, + }, + } + } else if _, ok := s.files[path]; ok { + return Response{ + Body: &workspace.ObjectInfo{ + ObjectType: "FILE", + Path: path, + Language: "SCALA", + }, + } + } else { + return Response{ + StatusCode: 404, + Body: map[string]string{"message": "Workspace path not found"}, + } + } +} + +func (s *FakeWorkspace) WorkspaceMkdirs(request workspace.Mkdirs) { + s.directories[request.Path] = true +} + +func (s *FakeWorkspace) WorkspaceExport(path string) []byte { + return s.files[path] +} + +func (s *FakeWorkspace) WorkspaceDelete(path string, recursive bool) { + if !recursive { + s.files[path] = nil + } else { + for key := range s.files { + if strings.HasPrefix(key, path) { + s.files[key] = nil + } + } + } +} + +func (s *FakeWorkspace) WorkspaceFilesImportFile(path string, body []byte) { + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + s.files[path] = body +} + +func (s *FakeWorkspace) JobsCreate(request jobs.CreateJob) Response { + jobId := s.nextJobId + s.nextJobId++ + + jobSettings := jobs.JobSettings{} + err := jsonConvert(request, &jobSettings) + if err != nil { + return Response{ + StatusCode: 400, + Body: fmt.Sprintf("Cannot convert request to jobSettings: %s", err), + } + } + + s.jobs[jobId] = jobs.Job{ + JobId: jobId, + Settings: &jobSettings, + } + + return Response{ + Body: jobs.CreateResponse{JobId: jobId}, + } +} + +func (s *FakeWorkspace) JobsGet(jobId string) Response { + id := jobId + + jobIdInt, err := strconv.ParseInt(id, 10, 64) + if err != nil { + return Response{ + StatusCode: 400, + Body: fmt.Sprintf("Failed to parse job id: %s: %v", err, id), + } + } + + job, ok := s.jobs[jobIdInt] + if !ok { + return Response{ + StatusCode: 404, + } + } + + return Response{ + Body: job, + } +} + +func (s *FakeWorkspace) JobsList() Response { + list := make([]jobs.BaseJob, 0, len(s.jobs)) + for _, job := range s.jobs { + baseJob := jobs.BaseJob{} + err := jsonConvert(job, &baseJob) + if err != nil { + return Response{ + StatusCode: 400, + Body: fmt.Sprintf("failed to convert job to base job: %s", err), + } + } + + list = append(list, baseJob) + } + + // sort to have less non-determinism in tests + sort.Slice(list, func(i, j int) bool { + return list[i].JobId < list[j].JobId + }) + + return Response{ + Body: jobs.ListJobsResponse{ + Jobs: list, + }, + } +} + +// jsonConvert saves input to a value pointed by output +func jsonConvert(input, output any) error { + writer := new(bytes.Buffer) + encoder := json.NewEncoder(writer) + err := encoder.Encode(input) + if err != nil { + return fmt.Errorf("failed to encode: %w", err) + } + + decoder := json.NewDecoder(writer) + err = decoder.Decode(output) + if err != nil { + return fmt.Errorf("failed to decode: %w", err) + } + + return nil +} diff --git a/libs/testserver/server.go b/libs/testserver/server.go new file mode 100644 index 000000000..4aa2d2dc0 --- /dev/null +++ b/libs/testserver/server.go @@ -0,0 +1,297 @@ +package testserver + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "net/url" + "reflect" + "strings" + "sync" + + "github.com/gorilla/mux" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/databricks-sdk-go/apierr" +) + +type Server struct { + *httptest.Server + Router *mux.Router + + t testutil.TestingT + + fakeWorkspaces map[string]*FakeWorkspace + mu *sync.Mutex + + RecordRequestsCallback func(request *Request) +} + +type Request struct { + Method string + URL *url.URL + Headers http.Header + Body []byte + Vars map[string]string + Workspace *FakeWorkspace +} + +type Response struct { + StatusCode int + Headers http.Header + Body any +} + +type encodedResponse struct { + StatusCode int + Headers http.Header + Body []byte +} + +func NewRequest(t testutil.TestingT, r *http.Request, fakeWorkspace *FakeWorkspace) Request { + body, err := io.ReadAll(r.Body) + if err != nil { + t.Fatalf("Failed to read request body: %s", err) + } + + return Request{ + Method: r.Method, + URL: r.URL, + Headers: r.Header, + Body: body, + Vars: mux.Vars(r), + Workspace: fakeWorkspace, + } +} + +func normalizeResponse(t testutil.TestingT, resp any) encodedResponse { + result := normalizeResponseBody(t, resp) + if result.StatusCode == 0 { + result.StatusCode = 200 + } + return result +} + +func normalizeResponseBody(t testutil.TestingT, resp any) encodedResponse { + if isNil(resp) { + t.Errorf("Handler must not return nil") + return encodedResponse{StatusCode: 500} + } + + respBytes, ok := resp.([]byte) + if ok { + return encodedResponse{ + Body: respBytes, + Headers: getHeaders(respBytes), + } + } + + respString, ok := resp.(string) + if ok { + return encodedResponse{ + Body: []byte(respString), + Headers: getHeaders([]byte(respString)), + } + } + + respStruct, ok := resp.(Response) + if ok { + if isNil(respStruct.Body) { + return encodedResponse{ + StatusCode: respStruct.StatusCode, + Headers: respStruct.Headers, + Body: []byte{}, + } + } + + bytesVal, isBytes := respStruct.Body.([]byte) + if isBytes { + return encodedResponse{ + StatusCode: respStruct.StatusCode, + Headers: respStruct.Headers, + Body: bytesVal, + } + } + + stringVal, isString := respStruct.Body.(string) + if isString { + return encodedResponse{ + StatusCode: respStruct.StatusCode, + Headers: respStruct.Headers, + Body: []byte(stringVal), + } + } + + respBytes, err := json.MarshalIndent(respStruct.Body, "", " ") + if err != nil { + t.Errorf("JSON encoding error: %s", err) + return encodedResponse{ + StatusCode: 500, + Body: []byte("internal error"), + } + } + + headers := respStruct.Headers + if headers == nil { + headers = getJsonHeaders() + } + + return encodedResponse{ + StatusCode: respStruct.StatusCode, + Headers: headers, + Body: respBytes, + } + } + + respBytes, err := json.MarshalIndent(resp, "", " ") + if err != nil { + t.Errorf("JSON encoding error: %s", err) + return encodedResponse{ + StatusCode: 500, + Body: []byte("internal error"), + } + } + + return encodedResponse{ + Body: respBytes, + Headers: getJsonHeaders(), + } +} + +func getJsonHeaders() http.Header { + return map[string][]string{ + "Content-Type": {"application/json"}, + } +} + +func getHeaders(value []byte) http.Header { + if json.Valid(value) { + return getJsonHeaders() + } else { + return map[string][]string{ + "Content-Type": {"text/plain"}, + } + } +} + +func New(t testutil.TestingT) *Server { + router := mux.NewRouter() + server := httptest.NewServer(router) + t.Cleanup(server.Close) + + s := &Server{ + Server: server, + Router: router, + t: t, + mu: &sync.Mutex{}, + fakeWorkspaces: map[string]*FakeWorkspace{}, + } + + // Set up the not found handler as fallback + router.NotFoundHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + pattern := r.Method + " " + r.URL.Path + bodyBytes, err := io.ReadAll(r.Body) + var body string + if err != nil { + body = fmt.Sprintf("failed to read the body: %s", err) + } else { + body = fmt.Sprintf("[%d bytes] %s", len(bodyBytes), bodyBytes) + } + + t.Errorf(`No handler for URL: %s +Body: %s + +For acceptance tests, add this to test.toml: +[[Server]] +Pattern = %q +Response.Body = '' +# Response.StatusCode = +`, r.URL, body, pattern) + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotImplemented) + + resp := apierr.APIError{ + Message: "No stub found for pattern: " + pattern, + } + + respBytes, err := json.Marshal(resp) + if err != nil { + t.Errorf("JSON encoding error: %s", err) + respBytes = []byte("{\"message\": \"JSON encoding error\"}") + } + + if _, err := w.Write(respBytes); err != nil { + t.Errorf("Response write error: %s", err) + } + }) + + return s +} + +type HandlerFunc func(req Request) any + +func (s *Server) Handle(method, path string, handler HandlerFunc) { + s.Router.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) { + // For simplicity we process requests sequentially. It's fast enough because + // we don't do any IO except reading and writing request/response bodies. + s.mu.Lock() + defer s.mu.Unlock() + + // Each test uses unique DATABRICKS_TOKEN, we simulate each token having + // it's own fake fakeWorkspace to avoid interference between tests. + var fakeWorkspace *FakeWorkspace = nil + token := getToken(r) + if token != "" { + if _, ok := s.fakeWorkspaces[token]; !ok { + s.fakeWorkspaces[token] = NewFakeWorkspace() + } + + fakeWorkspace = s.fakeWorkspaces[token] + } + + request := NewRequest(s.t, r, fakeWorkspace) + if s.RecordRequestsCallback != nil { + s.RecordRequestsCallback(&request) + } + respAny := handler(request) + resp := normalizeResponse(s.t, respAny) + + for k, v := range resp.Headers { + w.Header()[k] = v + } + + w.WriteHeader(resp.StatusCode) + + if _, err := w.Write(resp.Body); err != nil { + s.t.Errorf("Failed to write response: %s", err) + return + } + }).Methods(method) +} + +func getToken(r *http.Request) string { + header := r.Header.Get("Authorization") + prefix := "Bearer " + + if !strings.HasPrefix(header, prefix) { + return "" + } + + return header[len(prefix):] +} + +func isNil(i any) bool { + if i == nil { + return true + } + v := reflect.ValueOf(i) + switch v.Kind() { + case reflect.Chan, reflect.Func, reflect.Map, reflect.Ptr, reflect.Interface, reflect.Slice: + return v.IsNil() + default: + return false + } +} diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 000000000..802a3ca67 --- /dev/null +++ b/ruff.toml @@ -0,0 +1 @@ +line-length = 150