diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index 8622b29ca..dfe78790a 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -a6a317df8327c9b1e5cb59a03a42ffa2aabeef6d \ No newline at end of file +779817ed8d63031f5ea761fbd25ee84f38feec0d \ No newline at end of file diff --git a/.codegen/service.go.tmpl b/.codegen/service.go.tmpl index ee2c7b0fd..0c9fa089a 100644 --- a/.codegen/service.go.tmpl +++ b/.codegen/service.go.tmpl @@ -140,9 +140,9 @@ func new{{.PascalName}}() *cobra.Command { {{- end}} {{$method := .}} {{ if not .IsJsonOnly }} - {{range $request.Fields -}} + {{range .AllFields -}} {{- if not .Required -}} - {{if .Entity.IsObject }}// TODO: complex arg: {{.Name}} + {{if .Entity.IsObject}}{{if not (eq . $method.RequestBodyField) }}// TODO: complex arg: {{.Name}}{{end}} {{else if .Entity.IsAny }}// TODO: any: {{.Name}} {{else if .Entity.ArrayValue }}// TODO: array: {{.Name}} {{else if .Entity.MapValue }}// TODO: map via StringToStringVar: {{.Name}} diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..76835de7d --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @pietern @andrewnester @shreyas-goenka @denik diff --git a/.github/dependabot.yml b/.github/dependabot.yml index f1b219b47..e7d7ad6b6 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,3 +4,7 @@ updates: directory: "/" schedule: interval: "weekly" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/close-stale-issues.yml b/.github/workflows/close-stale-issues.yml new file mode 100644 index 000000000..ea9558caf --- /dev/null +++ b/.github/workflows/close-stale-issues.yml @@ -0,0 +1,38 @@ +name: "Close Stale Issues" + +on: + workflow_dispatch: + schedule: + - cron: "0 0 * * *" # Run at midnight every day + +jobs: + cleanup: + name: Stale issue job + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco + + permissions: + issues: write + contents: read + pull-requests: write + + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0 + with: + stale-issue-message: This issue has not received a response in a while. If you want to keep this issue open, please leave a comment below and auto-close will be canceled. + stale-pr-message: This PR has not received an update in a while. If you want to keep this PR open, please leave a comment below or push a new commit and auto-close will be canceled. + + # These labels are required + stale-issue-label: Stale + stale-pr-label: Stale + + exempt-issue-labels: No Autoclose + exempt-pr-labels: No Autoclose + + # Issue timing + days-before-stale: 60 + days-before-close: 30 + + repo-token: ${{ secrets.GITHUB_TOKEN }} + loglevel: DEBUG diff --git a/.github/workflows/external-message.yml b/.github/workflows/external-message.yml index 1970735f9..108ca9162 100644 --- a/.github/workflows/external-message.yml +++ b/.github/workflows/external-message.yml @@ -13,12 +13,19 @@ on: jobs: comment-on-pr: - runs-on: ubuntu-latest + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco + permissions: pull-requests: write + # Only run this job for PRs from forks. + # Integration tests are not run automatically for PRs from forks. + if: "${{ github.event.pull_request.head.repo.fork }}" + steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Delete old comments env: @@ -43,7 +50,7 @@ jobs: run: | gh pr comment ${{ github.event.pull_request.number }} --body \ " - If integration tests don't run automatically, an authorized user can run them manually by following the instructions below: + An authorized user can trigger integration tests manually by following the instructions below: Trigger: [go/deco-tests-run/cli](https://go/deco-tests-run/cli) diff --git a/.github/workflows/integration-approve.yml b/.github/workflows/integration-approve.yml index 4bdeb62a3..293d31a2a 100644 --- a/.github/workflows/integration-approve.yml +++ b/.github/workflows/integration-approve.yml @@ -17,7 +17,9 @@ jobs: # * Avoid running integration tests twice, since it was already run at the tip of the branch before squashing. # trigger: - runs-on: ubuntu-latest + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco steps: - name: Auto-approve squashed commit diff --git a/.github/workflows/integration-main.yml b/.github/workflows/integration-main.yml index 064e439cf..84dd7263a 100644 --- a/.github/workflows/integration-main.yml +++ b/.github/workflows/integration-main.yml @@ -11,13 +11,16 @@ jobs: # This workflow triggers the integration test workflow in a different repository. # It requires secrets from the "test-trigger-is" environment, which are only available to authorized users. trigger: - runs-on: ubuntu-latest + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco + environment: "test-trigger-is" steps: - name: Generate GitHub App Token id: generate-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@c1a285145b9d317df6ced56c09f525b5c2b6f755 # v1.11.1 with: app-id: ${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }} private-key: ${{ secrets.DECO_WORKFLOW_TRIGGER_PRIVATE_KEY }} diff --git a/.github/workflows/integration-pr.yml b/.github/workflows/integration-pr.yml index bf2dcd8bc..7a62113cd 100644 --- a/.github/workflows/integration-pr.yml +++ b/.github/workflows/integration-pr.yml @@ -5,41 +5,25 @@ on: types: [opened, synchronize] jobs: - check-token: - runs-on: ubuntu-latest - environment: "test-trigger-is" - - outputs: - has_token: ${{ steps.set-token-status.outputs.has_token }} - - steps: - - name: Check if DECO_WORKFLOW_TRIGGER_APP_ID is set - id: set-token-status - run: | - if [ -z "${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }}" ]; then - echo "DECO_WORKFLOW_TRIGGER_APP_ID is empty. User has no access to secrets." - echo "::set-output name=has_token::false" - else - echo "DECO_WORKFLOW_TRIGGER_APP_ID is set. User has access to secrets." - echo "::set-output name=has_token::true" - fi - # Trigger for pull requests. # # This workflow triggers the integration test workflow in a different repository. # It requires secrets from the "test-trigger-is" environment, which are only available to authorized users. - # It depends on the "check-token" workflow to confirm access to this environment to avoid failures. trigger: - runs-on: ubuntu-latest + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco + environment: "test-trigger-is" - if: needs.check-token.outputs.has_token == 'true' - needs: check-token + # Only run this job for PRs from branches on the main repository and not from forks. + # Workflows triggered by PRs from forks don't have access to the "test-trigger-is" environment. + if: "${{ !github.event.pull_request.head.repo.fork }}" steps: - name: Generate GitHub App Token id: generate-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@c1a285145b9d317df6ced56c09f525b5c2b6f755 # v1.11.1 with: app-id: ${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }} private-key: ${{ secrets.DECO_WORKFLOW_TRIGGER_PRIVATE_KEY }} diff --git a/.github/workflows/publish-winget.yml b/.github/workflows/publish-winget.yml index 19603e669..eb9a72eda 100644 --- a/.github/workflows/publish-winget.yml +++ b/.github/workflows/publish-winget.yml @@ -2,15 +2,27 @@ name: publish-winget on: workflow_dispatch: + inputs: + tag: + description: 'Tag to publish' + default: '' jobs: publish-to-winget-pkgs: - runs-on: windows-latest + runs-on: + group: databricks-protected-runner-group + labels: windows-server-latest + environment: release + steps: - - uses: vedantmgoyal2009/winget-releaser@93fd8b606a1672ec3e5c6c3bb19426be68d1a8b0 # https://github.com/vedantmgoyal2009/winget-releaser/releases/tag/v2 + - uses: vedantmgoyal2009/winget-releaser@93fd8b606a1672ec3e5c6c3bb19426be68d1a8b0 # v2 with: identifier: Databricks.DatabricksCLI installers-regex: 'windows_.*-signed\.zip$' # Only signed Windows releases token: ${{ secrets.ENG_DEV_ECOSYSTEM_BOT_TOKEN }} fork-user: eng-dev-ecosystem-bot + + # Use the tag from the input, or the ref name if the input is not provided. + # The ref name is equal to the tag name when this workflow is triggered by the "sign-cli" command. + release-tag: ${{ inputs.tag || github.ref_name }} diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index a51927594..c3a314d69 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -13,9 +13,26 @@ on: # seed the build cache. branches: - main + schedule: + - cron: '0 0,12 * * *' # Runs at 00:00 and 12:00 UTC daily + +env: + GOTESTSUM_FORMAT: github-actions jobs: + cleanups: + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco + steps: + - name: Clean up cache if running on schedule + if: ${{ github.event_name == 'schedule' }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: gh cache delete --all --repo databricks/cli || true + tests: + needs: cleanups runs-on: ${{ matrix.os }} strategy: @@ -28,20 +45,26 @@ jobs: steps: - name: Checkout repository and submodules - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.4 - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.9' - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@887a942a15af3a7626099df99e897a18d9e5ab3a # v5.1.0 + + - name: Run ruff + uses: astral-sh/ruff-action@31a518504640beb4897d0b9f9e50a2a9196e75ba # v3.0.1 + with: + version: "0.9.1" + args: "format --check" - name: Set go env run: | @@ -54,17 +77,22 @@ jobs: make vendor pip3 install wheel - - name: Run tests - run: make testonly + - name: Run tests with coverage + run: make cover golangci: + needs: cleanups name: lint runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.4 + # Use different schema from regular job, to avoid overwriting the same key + cache-dependency-path: | + go.sum + .golangci.yaml - name: Run go mod tidy run: | go mod tidy @@ -73,22 +101,34 @@ jobs: # Exit with status code 1 if there are differences (i.e. unformatted files) git diff --exit-code - name: golangci-lint - uses: golangci/golangci-lint-action@v6 + uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 with: - version: v1.62.2 + version: v1.63.4 args: --timeout=15m validate-bundle-schema: + needs: cleanups runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.4 + # Use different schema from regular job, to avoid overwriting the same key + cache-dependency-path: | + go.sum + bundle/internal/schema/*.* + + - name: Verify that the schema is up to date + run: | + if ! ( make schema && git diff --exit-code ); then + echo "The schema is not up to date. Please run 'make schema' and commit the changes." + exit 1 + fi # Github repo: https://github.com/ajv-validator/ajv-cli - name: Install ajv-cli diff --git a/.github/workflows/release-snapshot.yml b/.github/workflows/release-snapshot.yml index 7ef8b43c9..548d93e90 100644 --- a/.github/workflows/release-snapshot.yml +++ b/.github/workflows/release-snapshot.yml @@ -20,16 +20,19 @@ on: jobs: goreleaser: - runs-on: ubuntu-latest + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco + steps: - name: Checkout repository and submodules - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 fetch-tags: true - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.4 @@ -45,27 +48,27 @@ jobs: - name: Run GoReleaser id: releaser - uses: goreleaser/goreleaser-action@v6 + uses: goreleaser/goreleaser-action@9ed2f89a662bf1735a48bc8557fd212fa902bebf # v6.1.0 with: version: ~> v2 args: release --snapshot --skip docker - name: Upload macOS binaries - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: cli_darwin_snapshot path: | dist/*_darwin_*/ - name: Upload Linux binaries - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: cli_linux_snapshot path: | dist/*_linux_*/ - name: Upload Windows binaries - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: cli_windows_snapshot path: | @@ -85,7 +88,7 @@ jobs: # Snapshot release may only be updated for commits to the main branch. if: github.ref == 'refs/heads/main' - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # v1 with: name: Snapshot prerelease: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e4a253531..5d5811b19 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,18 +9,22 @@ on: jobs: goreleaser: + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco + outputs: artifacts: ${{ steps.releaser.outputs.artifacts }} - runs-on: ubuntu-latest + steps: - name: Checkout repository and submodules - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 fetch-tags: true - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.4 @@ -33,7 +37,7 @@ jobs: # Log into the GitHub Container Registry. The goreleaser action will create # the docker images and push them to the GitHub Container Registry. - - uses: "docker/login-action@v3" + - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 with: registry: "ghcr.io" username: "${{ github.actor }}" @@ -42,11 +46,11 @@ jobs: # QEMU is required to build cross platform docker images using buildx. # It allows virtualization of the CPU architecture at the application level. - name: Set up QEMU dependency - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@53851d14592bedcffcf25ea515637cff71ef929a # v3.3.0 - name: Run GoReleaser id: releaser - uses: goreleaser/goreleaser-action@v6 + uses: goreleaser/goreleaser-action@9ed2f89a662bf1735a48bc8557fd212fa902bebf # v6.1.0 with: version: ~> v2 args: release @@ -54,8 +58,12 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} create-setup-cli-release-pr: + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco + needs: goreleaser - runs-on: ubuntu-latest + steps: - name: Set VERSION variable from tag run: | @@ -63,7 +71,7 @@ jobs: echo "VERSION=${VERSION:1}" >> $GITHUB_ENV - name: Update setup-cli - uses: actions/github-script@v7 + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 with: github-token: ${{ secrets.DECO_GITHUB_TOKEN }} script: | @@ -78,8 +86,12 @@ jobs: }); create-homebrew-tap-release-pr: + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco + needs: goreleaser - runs-on: ubuntu-latest + steps: - name: Set VERSION variable from tag run: | @@ -87,7 +99,7 @@ jobs: echo "VERSION=${VERSION:1}" >> $GITHUB_ENV - name: Update homebrew-tap - uses: actions/github-script@v7 + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 with: github-token: ${{ secrets.DECO_GITHUB_TOKEN }} script: | @@ -115,8 +127,12 @@ jobs: }); create-vscode-extension-update-pr: + runs-on: + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco + needs: goreleaser - runs-on: ubuntu-latest + steps: - name: Set VERSION variable from tag run: | @@ -124,7 +140,7 @@ jobs: echo "VERSION=${VERSION:1}" >> $GITHUB_ENV - name: Update CLI version in the VSCode extension - uses: actions/github-script@v7 + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 with: github-token: ${{ secrets.DECO_GITHUB_TOKEN }} script: | diff --git a/.gitignore b/.gitignore index edd1409ae..2060b6bac 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ dist/ *.log coverage.txt +coverage-acceptance.txt __pycache__ *.pyc @@ -31,3 +32,4 @@ __pycache__ .vscode/tasks.json .databricks +.ruff_cache diff --git a/.golangci.yaml b/.golangci.yaml index 9e69e5146..8a83135ee 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -11,12 +11,24 @@ linters: - gofmt - gofumpt - goimports + - testifylint + - intrange + - mirror + - perfsprint + - unconvert linters-settings: govet: enable-all: true disable: - fieldalignment - shadow + settings: + printf: + funcs: + - (github.com/databricks/cli/internal/testutil.TestingT).Infof + - (github.com/databricks/cli/internal/testutil.TestingT).Errorf + - (github.com/databricks/cli/internal/testutil.TestingT).Fatalf + - (github.com/databricks/cli/internal/testutil.TestingT).Skipf gofmt: rewrite-rules: - pattern: 'a[b:len(a)]' @@ -32,7 +44,14 @@ linters-settings: gofumpt: module-path: github.com/databricks/cli extra-rules: true - #goimports: - # local-prefixes: github.com/databricks/cli + testifylint: + enable-all: true + disable: + # good check, but we have too many assert.(No)?Errorf? so excluding for now + - require-error + copyloopvar: + check-alias: true issues: exclude-dirs-use-default: false # recommended by docs https://golangci-lint.run/usage/false-positives/ + max-issues-per-linter: 1000 + max-same-issues: 1000 diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bdb0795b..255bfb0a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,78 @@ # Version changelog +## [Release] Release v0.239.1 + +CLI: + * Added text output templates for apps list and list-deployments ([#2175](https://github.com/databricks/cli/pull/2175)). + * Fix duplicate "apps" entry in help output ([#2191](https://github.com/databricks/cli/pull/2191)). + +Bundles: + * Allow yaml-anchors in schema ([#2200](https://github.com/databricks/cli/pull/2200)). + * Show an error when non-yaml files used in include section ([#2201](https://github.com/databricks/cli/pull/2201)). + * Set WorktreeRoot to sync root outside git repo ([#2197](https://github.com/databricks/cli/pull/2197)). + * fix: Detailed message for using source-linked deployment with file_path specified ([#2119](https://github.com/databricks/cli/pull/2119)). + * Allow using variables in enum fields ([#2199](https://github.com/databricks/cli/pull/2199)). + * Add experimental-jobs-as-code template ([#2177](https://github.com/databricks/cli/pull/2177)). + * Reading variables from file ([#2171](https://github.com/databricks/cli/pull/2171)). + * Fixed an apps message order and added output test ([#2174](https://github.com/databricks/cli/pull/2174)). + * Default to forward slash-separated paths for path translation ([#2145](https://github.com/databricks/cli/pull/2145)). + * Include a materialized copy of built-in templates ([#2146](https://github.com/databricks/cli/pull/2146)). + + + +## [Release] Release v0.239.0 + +### New feature announcement + +#### Databricks Apps support + +You can now manage Databricks Apps using DABs by defining an `app` resource in your bundle configuration. +For more information see Databricks documentation https://docs.databricks.com/en/dev-tools/bundles/resources.html#app + +#### Referencing complex variables in complex variables + +You can now reference complex variables within other complex variables. +For more details see https://github.com/databricks/cli/pull/2157 + +CLI: + * Filter out system clusters in cluster picker ([#2131](https://github.com/databricks/cli/pull/2131)). + * Add command line flags for fields that are not in the API request body ([#2155](https://github.com/databricks/cli/pull/2155)). + +Bundles: + * Added support for Databricks Apps in DABs ([#1928](https://github.com/databricks/cli/pull/1928)). + * Allow artifact path to be located outside the sync root ([#2128](https://github.com/databricks/cli/pull/2128)). + * Retry app deployment if there is an active deployment in progress ([#2153](https://github.com/databricks/cli/pull/2153)). + * Resolve variables in a loop ([#2164](https://github.com/databricks/cli/pull/2164)). + * Improve resolution of complex variables within complex variables ([#2157](https://github.com/databricks/cli/pull/2157)). + * Added output message to warn about slower deployments with apps ([#2161](https://github.com/databricks/cli/pull/2161)). + * Patch references to UC schemas to capture dependencies automatically ([#1989](https://github.com/databricks/cli/pull/1989)). + * Format default-python template ([#2110](https://github.com/databricks/cli/pull/2110)). + * Encourage the use of root_path in production to ensure single deployment ([#1712](https://github.com/databricks/cli/pull/1712)). + * Log warnings to stderr for "bundle validate -o json" ([#2109](https://github.com/databricks/cli/pull/2109)). + +API Changes: + * Changed `databricks account federation-policy update` command with new required argument order. + * Changed `databricks account service-principal-federation-policy update` command with new required argument order. + +OpenAPI commit 779817ed8d63031f5ea761fbd25ee84f38feec0d (2025-01-08) +Dependency updates: + * Upgrade TF provider to 1.63.0 ([#2162](https://github.com/databricks/cli/pull/2162)). + * Bump golangci-lint version to v1.63.4 from v1.63.1 ([#2114](https://github.com/databricks/cli/pull/2114)). + * Bump astral-sh/setup-uv from 4 to 5 ([#2116](https://github.com/databricks/cli/pull/2116)). + * Bump golang.org/x/oauth2 from 0.24.0 to 0.25.0 ([#2080](https://github.com/databricks/cli/pull/2080)). + * Bump github.com/hashicorp/hc-install from 0.9.0 to 0.9.1 ([#2079](https://github.com/databricks/cli/pull/2079)). + * Bump golang.org/x/term from 0.27.0 to 0.28.0 ([#2078](https://github.com/databricks/cli/pull/2078)). + * Bump github.com/databricks/databricks-sdk-go from 0.54.0 to 0.55.0 ([#2126](https://github.com/databricks/cli/pull/2126)). + +## [Release] Release v0.238.0 + +Bundles: + * Fix finding Python within virtualenv on Windows ([#2034](https://github.com/databricks/cli/pull/2034)). + * Include missing field descriptions in JSON schema ([#2045](https://github.com/databricks/cli/pull/2045)). + * Add validation for volume referenced from `artifact_path` ([#2050](https://github.com/databricks/cli/pull/2050)). + * Handle `${workspace.file_path}` references in source-linked deployments ([#2046](https://github.com/databricks/cli/pull/2046)). + * Set the write bit for files written during template initialization ([#2068](https://github.com/databricks/cli/pull/2068)). + ## [Release] Release v0.237.0 Bundles: diff --git a/Makefile b/Makefile index f8e7834a5..00dadcb0c 100644 --- a/Makefile +++ b/Makefile @@ -1,38 +1,52 @@ -default: build +default: vendor fmt lint -lint: vendor - @echo "✓ Linting source code with https://golangci-lint.run/ (with --fix)..." - @golangci-lint run --fix ./... +PACKAGES=./acceptance/... ./libs/... ./internal/... ./cmd/... ./bundle/... . -lintcheck: vendor - @echo "✓ Linting source code with https://golangci-lint.run/ ..." - @golangci-lint run ./... +GOTESTSUM_FORMAT ?= pkgname-and-test-fails +GOTESTSUM_CMD ?= gotestsum --format ${GOTESTSUM_FORMAT} --no-summary=skipped -test: lint testonly -testonly: - @echo "✓ Running tests ..." - @gotestsum --format pkgname-and-test-fails --no-summary=skipped --raw-command go test -v -json -short -coverprofile=coverage.txt ./... +lint: + golangci-lint run --fix -coverage: test - @echo "✓ Opening coverage for unit tests ..." - @go tool cover -html=coverage.txt +lintcheck: + golangci-lint run ./... + +# Note 'make lint' will do formatting as well. However, if there are compilation errors, +# formatting/goimports will not be applied by 'make lint'. However, it will be applied by 'make fmt'. +# If you need to ensure that formatting & imports are always fixed, do "make fmt lint" +fmt: + ruff format -q + golangci-lint run --enable-only="gofmt,gofumpt,goimports" --fix ./... + +test: + ${GOTESTSUM_CMD} -- ${PACKAGES} + +cover: + rm -fr ./acceptance/build/cover/ + CLI_GOCOVERDIR=build/cover ${GOTESTSUM_CMD} -- -coverprofile=coverage.txt ${PACKAGES} + rm -fr ./acceptance/build/cover-merged/ + mkdir -p acceptance/build/cover-merged/ + go tool covdata merge -i $$(printf '%s,' acceptance/build/cover/* | sed 's/,$$//') -o acceptance/build/cover-merged/ + go tool covdata textfmt -i acceptance/build/cover-merged -o coverage-acceptance.txt + +showcover: + go tool cover -html=coverage.txt + +acc-showcover: + go tool cover -html=coverage-acceptance.txt build: vendor - @echo "✓ Building source code with go build ..." - @go build -mod vendor + go build -mod vendor snapshot: - @echo "✓ Building dev snapshot" - @go build -o .databricks/databricks + go build -o .databricks/databricks vendor: - @echo "✓ Filling vendor folder with library code ..." - @go mod vendor - + go mod vendor + schema: - @echo "✓ Generating json-schema ..." - @go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json + go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json INTEGRATION = gotestsum --format github-actions --rerun-fails --jsonfile output.json --packages "./integration/..." -- -parallel 4 -timeout=2h @@ -42,4 +56,4 @@ integration: integration-short: $(INTEGRATION) -short -.PHONY: lint lintcheck test testonly coverage build snapshot vendor schema integration integration-short +.PHONY: lint lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short acc-cover acc-showcover diff --git a/NOTICE b/NOTICE index f6b59e0b0..ed22084cf 100644 --- a/NOTICE +++ b/NOTICE @@ -105,3 +105,7 @@ License - https://github.com/wI2L/jsondiff/blob/master/LICENSE https://github.com/hexops/gotextdiff Copyright (c) 2009 The Go Authors. All rights reserved. License - https://github.com/hexops/gotextdiff/blob/main/LICENSE + +https://github.com/BurntSushi/toml +Copyright (c) 2013 TOML authors +https://github.com/BurntSushi/toml/blob/master/COPYING diff --git a/acceptance/README.md b/acceptance/README.md new file mode 100644 index 000000000..42a37d253 --- /dev/null +++ b/acceptance/README.md @@ -0,0 +1,19 @@ +Acceptance tests are blackbox tests that are run against compiled binary. + +Currently these tests are run against "fake" HTTP server pretending to be Databricks API. However, they will be extended to run against real environment as regular integration tests. + +To author a test, + - Add a new directory under `acceptance`. Any level of nesting is supported. + - Add `databricks.yml` there. + - Add `script` with commands to run, e.g. `$CLI bundle validate`. The test case is recognized by presence of `script`. + +The test runner will run script and capture output and compare it with `output.txt` file in the same directory. + +In order to write `output.txt` for the first time or overwrite it with the current output pass -update flag to go test. + +The scripts are run with `bash -e` so any errors will be propagated. They are captured in `output.txt` by appending `Exit code: N` line at the end. + +For more complex tests one can also use: +- `errcode` helper: if the command fails with non-zero code, it appends `Exit code: N` to the output but returns success to caller (bash), allowing continuation of script. +- `trace` helper: prints the arguments before executing the command. +- custom output files: redirect output to custom file (it must start with `out`), e.g. `$CLI bundle validate > out.txt 2> out.error.txt`. diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go new file mode 100644 index 000000000..a1c41c5e6 --- /dev/null +++ b/acceptance/acceptance_test.go @@ -0,0 +1,481 @@ +package acceptance_test + +import ( + "context" + "errors" + "flag" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "runtime" + "slices" + "sort" + "strings" + "testing" + "time" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/testdiff" + "github.com/databricks/databricks-sdk-go" + "github.com/stretchr/testify/require" +) + +var KeepTmp bool + +// In order to debug CLI running under acceptance test, set this to full subtest name, e.g. "bundle/variables/empty" +// Then install your breakpoints and click "debug test" near TestAccept in VSCODE. +// example: var SingleTest = "bundle/variables/empty" +var SingleTest = "" + +// If enabled, instead of compiling and running CLI externally, we'll start in-process server that accepts and runs +// CLI commands. The $CLI in test scripts is a helper that just forwards command-line arguments to this server (see bin/callserver.py). +// Also disables parallelism in tests. +var InprocessMode bool + +func init() { + flag.BoolVar(&InprocessMode, "inprocess", SingleTest != "", "Run CLI in the same process as test (for debugging)") + flag.BoolVar(&KeepTmp, "keeptmp", false, "Do not delete TMP directory after run") +} + +const ( + EntryPointScript = "script" + CleanupScript = "script.cleanup" + PrepareScript = "script.prepare" +) + +var Scripts = map[string]bool{ + EntryPointScript: true, + CleanupScript: true, + PrepareScript: true, +} + +func TestAccept(t *testing.T) { + testAccept(t, InprocessMode, SingleTest) +} + +func TestInprocessMode(t *testing.T) { + if InprocessMode { + t.Skip("Already tested by TestAccept") + } + if runtime.GOOS == "windows" { + // - catalogs A catalog is the first layer of Unity Catalog’s three-level namespace. + // + catalogs A catalog is the first layer of Unity Catalog�s three-level namespace. + t.Skip("Fails on CI on unicode characters") + } + require.NotZero(t, testAccept(t, true, "help")) +} + +func testAccept(t *testing.T, InprocessMode bool, singleTest string) int { + repls := testdiff.ReplacementsContext{} + cwd, err := os.Getwd() + require.NoError(t, err) + + coverDir := os.Getenv("CLI_GOCOVERDIR") + + if coverDir != "" { + require.NoError(t, os.MkdirAll(coverDir, os.ModePerm)) + coverDir, err = filepath.Abs(coverDir) + require.NoError(t, err) + t.Logf("Writing coverage to %s", coverDir) + } + + execPath := "" + + if InprocessMode { + cmdServer := StartCmdServer(t) + t.Setenv("CMD_SERVER_URL", cmdServer.URL) + execPath = filepath.Join(cwd, "bin", "callserver.py") + } else { + execPath = BuildCLI(t, cwd, coverDir) + } + + t.Setenv("CLI", execPath) + repls.SetPath(execPath, "$CLI") + + // Make helper scripts available + t.Setenv("PATH", fmt.Sprintf("%s%c%s", filepath.Join(cwd, "bin"), os.PathListSeparator, os.Getenv("PATH"))) + + tempHomeDir := t.TempDir() + repls.SetPath(tempHomeDir, "$TMPHOME") + t.Logf("$TMPHOME=%v", tempHomeDir) + + // Prevent CLI from downloading terraform in each test: + t.Setenv("DATABRICKS_TF_EXEC_PATH", tempHomeDir) + + ctx := context.Background() + cloudEnv := os.Getenv("CLOUD_ENV") + + if cloudEnv == "" { + server := StartServer(t) + AddHandlers(server) + // Redirect API access to local server: + t.Setenv("DATABRICKS_HOST", server.URL) + t.Setenv("DATABRICKS_TOKEN", "dapi1234") + + homeDir := t.TempDir() + // Do not read user's ~/.databrickscfg + t.Setenv(env.HomeEnvVar(), homeDir) + } + + workspaceClient, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + user, err := workspaceClient.CurrentUser.Me(ctx) + require.NoError(t, err) + require.NotNil(t, user) + testdiff.PrepareReplacementsUser(t, &repls, *user) + testdiff.PrepareReplacementsWorkspaceClient(t, &repls, workspaceClient) + testdiff.PrepareReplacementsUUID(t, &repls) + + testDirs := getTests(t) + require.NotEmpty(t, testDirs) + + if singleTest != "" { + testDirs = slices.DeleteFunc(testDirs, func(n string) bool { + return n != singleTest + }) + require.NotEmpty(t, testDirs, "singleTest=%#v did not match any tests\n%#v", singleTest, testDirs) + } + + for _, dir := range testDirs { + testName := strings.ReplaceAll(dir, "\\", "/") + t.Run(testName, func(t *testing.T) { + if !InprocessMode { + t.Parallel() + } + + runTest(t, dir, coverDir, repls.Clone()) + }) + } + + return len(testDirs) +} + +func getTests(t *testing.T) []string { + testDirs := make([]string, 0, 128) + + err := filepath.Walk(".", func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + name := filepath.Base(path) + if name == EntryPointScript { + // Presence of 'script' marks a test case in this directory + testDirs = append(testDirs, filepath.Dir(path)) + } + return nil + }) + require.NoError(t, err) + + sort.Strings(testDirs) + return testDirs +} + +func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsContext) { + config, configPath := LoadConfig(t, dir) + + isEnabled, isPresent := config.GOOS[runtime.GOOS] + if isPresent && !isEnabled { + t.Skipf("Disabled via GOOS.%s setting in %s", runtime.GOOS, configPath) + } + + var tmpDir string + var err error + if KeepTmp { + tempDirBase := filepath.Join(os.TempDir(), "acceptance") + _ = os.Mkdir(tempDirBase, 0o755) + tmpDir, err = os.MkdirTemp(tempDirBase, "") + require.NoError(t, err) + t.Logf("Created directory: %s", tmpDir) + } else { + tmpDir = t.TempDir() + } + + repls.SetPathWithParents(tmpDir, "$TMPDIR") + + scriptContents := readMergedScriptContents(t, dir) + testutil.WriteFile(t, filepath.Join(tmpDir, EntryPointScript), scriptContents) + + inputs := make(map[string]bool, 2) + outputs := make(map[string]bool, 2) + err = CopyDir(dir, tmpDir, inputs, outputs) + require.NoError(t, err) + + args := []string{"bash", "-euo", "pipefail", EntryPointScript} + cmd := exec.Command(args[0], args[1:]...) + if coverDir != "" { + // Creating individual coverage directory for each test, because writing to the same one + // results in sporadic failures like this one (only if tests are running in parallel): + // +error: coverage meta-data emit failed: writing ... rename .../tmp.covmeta.b3f... .../covmeta.b3f2c...: no such file or directory + coverDir = filepath.Join(coverDir, strings.ReplaceAll(dir, string(os.PathSeparator), "--")) + err := os.MkdirAll(coverDir, os.ModePerm) + require.NoError(t, err) + cmd.Env = append(os.Environ(), "GOCOVERDIR="+coverDir) + } + + // Write combined output to a file + out, err := os.Create(filepath.Join(tmpDir, "output.txt")) + require.NoError(t, err) + cmd.Stdout = out + cmd.Stderr = out + cmd.Dir = tmpDir + err = cmd.Run() + + // Include exit code in output (if non-zero) + formatOutput(out, err) + require.NoError(t, out.Close()) + + // Compare expected outputs + for relPath := range outputs { + doComparison(t, repls, dir, tmpDir, relPath) + } + + // Make sure there are not unaccounted for new files + files := ListDir(t, tmpDir) + for _, relPath := range files { + if _, ok := inputs[relPath]; ok { + continue + } + if _, ok := outputs[relPath]; ok { + continue + } + if strings.HasPrefix(relPath, "out") { + // We have a new file starting with "out" + // Show the contents & support overwrite mode for it: + doComparison(t, repls, dir, tmpDir, relPath) + } + } +} + +func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirNew, relPath string) { + pathRef := filepath.Join(dirRef, relPath) + pathNew := filepath.Join(dirNew, relPath) + bufRef, okRef := readIfExists(t, pathRef) + bufNew, okNew := readIfExists(t, pathNew) + if !okRef && !okNew { + t.Errorf("Both files are missing: %s, %s", pathRef, pathNew) + return + } + + valueRef := testdiff.NormalizeNewlines(string(bufRef)) + valueNew := testdiff.NormalizeNewlines(string(bufNew)) + + // Apply replacements to the new value only. + // The reference value is stored after applying replacements. + valueNew = repls.Replace(valueNew) + + // The test did not produce an expected output file. + if okRef && !okNew { + t.Errorf("Missing output file: %s", relPath) + testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if testdiff.OverwriteMode { + t.Logf("Removing output file: %s", relPath) + require.NoError(t, os.Remove(pathRef)) + } + return + } + + // The test produced an unexpected output file. + if !okRef && okNew { + t.Errorf("Unexpected output file: %s", relPath) + testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if testdiff.OverwriteMode { + t.Logf("Writing output file: %s", relPath) + testutil.WriteFile(t, pathRef, valueNew) + } + return + } + + // Compare the reference and new values. + equal := testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if !equal && testdiff.OverwriteMode { + t.Logf("Overwriting existing output file: %s", relPath) + testutil.WriteFile(t, pathRef, valueNew) + } +} + +// Returns combined script.prepare (root) + script.prepare (parent) + ... + script + ... + script.cleanup (parent) + ... +// Note, cleanups are not executed if main script fails; that's not a huge issue, since it runs it temp dir. +func readMergedScriptContents(t *testing.T, dir string) string { + scriptContents := testutil.ReadFile(t, filepath.Join(dir, EntryPointScript)) + + // Wrap script contents in a subshell such that changing the working + // directory only affects the main script and not cleanup. + scriptContents = "(\n" + scriptContents + ")\n" + + prepares := []string{} + cleanups := []string{} + + for { + x, ok := readIfExists(t, filepath.Join(dir, CleanupScript)) + if ok { + cleanups = append(cleanups, string(x)) + } + + x, ok = readIfExists(t, filepath.Join(dir, PrepareScript)) + if ok { + prepares = append(prepares, string(x)) + } + + if dir == "" || dir == "." { + break + } + + dir = filepath.Dir(dir) + require.True(t, filepath.IsLocal(dir)) + } + + slices.Reverse(prepares) + prepares = append(prepares, scriptContents) + prepares = append(prepares, cleanups...) + return strings.Join(prepares, "\n") +} + +func BuildCLI(t *testing.T, cwd, coverDir string) string { + execPath := filepath.Join(cwd, "build", "databricks") + if runtime.GOOS == "windows" { + execPath += ".exe" + } + + start := time.Now() + args := []string{ + "go", "build", + "-mod", "vendor", + "-o", execPath, + } + + if coverDir != "" { + args = append(args, "-cover") + } + + if runtime.GOOS == "windows" { + // Get this error on my local Windows: + // error obtaining VCS status: exit status 128 + // Use -buildvcs=false to disable VCS stamping. + args = append(args, "-buildvcs=false") + } + + cmd := exec.Command(args[0], args[1:]...) + cmd.Dir = ".." + out, err := cmd.CombinedOutput() + elapsed := time.Since(start) + t.Logf("%s took %s", args, elapsed) + require.NoError(t, err, "go build failed: %s: %s\n%s", args, err, out) + if len(out) > 0 { + t.Logf("go build output: %s: %s", args, out) + } + + // Quick check + warm up cache: + cmd = exec.Command(execPath, "--version") + out, err = cmd.CombinedOutput() + require.NoError(t, err, "%s --version failed: %s\n%s", execPath, err, out) + return execPath +} + +func copyFile(src, dst string) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + + out, err := os.Create(dst) + if err != nil { + return err + } + defer out.Close() + + _, err = io.Copy(out, in) + return err +} + +func formatOutput(w io.Writer, err error) { + if err == nil { + return + } + if exiterr, ok := err.(*exec.ExitError); ok { + exitCode := exiterr.ExitCode() + fmt.Fprintf(w, "\nExit code: %d\n", exitCode) + } else { + fmt.Fprintf(w, "\nError: %s\n", err) + } +} + +func readIfExists(t *testing.T, path string) ([]byte, bool) { + data, err := os.ReadFile(path) + if err == nil { + return data, true + } + + if !errors.Is(err, os.ErrNotExist) { + t.Fatalf("%s: %s", path, err) + } + return []byte{}, false +} + +func CopyDir(src, dst string, inputs, outputs map[string]bool) error { + return filepath.Walk(src, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + name := info.Name() + + relPath, err := filepath.Rel(src, path) + if err != nil { + return err + } + + if strings.HasPrefix(relPath, "out") { + if !info.IsDir() { + outputs[relPath] = true + } + return nil + } else { + inputs[relPath] = true + } + + if _, ok := Scripts[name]; ok { + return nil + } + + destPath := filepath.Join(dst, relPath) + + if info.IsDir() { + return os.MkdirAll(destPath, info.Mode()) + } + + return copyFile(path, destPath) + }) +} + +func ListDir(t *testing.T, src string) []string { + var files []string + err := filepath.Walk(src, func(path string, info os.FileInfo, err error) error { + if err != nil { + // Do not FailNow here. + // The output comparison is happening after this call which includes output.txt which + // includes errors printed by commands which include explanation why a given file cannot be read. + t.Errorf("Error when listing %s: path=%s: %s", src, path, err) + return nil + } + + if info.IsDir() { + return nil + } + + relPath, err := filepath.Rel(src, path) + if err != nil { + return err + } + + files = append(files, relPath) + return nil + }) + if err != nil { + t.Errorf("Failed to list %s: %s", src, err) + } + return files +} diff --git a/acceptance/bin/callserver.py b/acceptance/bin/callserver.py new file mode 100755 index 000000000..294ef8fdb --- /dev/null +++ b/acceptance/bin/callserver.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +import sys +import os +import json +import urllib.request +from urllib.parse import urlencode + +env = {} +for key, value in os.environ.items(): + if len(value) > 10_000: + sys.stderr.write(f"Dropping key={key} value len={len(value)}\n") + continue + env[key] = value + +q = { + "args": " ".join(sys.argv[1:]), + "cwd": os.getcwd(), + "env": json.dumps(env), +} + +url = os.environ["CMD_SERVER_URL"] + "/?" + urlencode(q) +if len(url) > 100_000: + sys.exit("url too large") + +resp = urllib.request.urlopen(url) +assert resp.status == 200, (resp.status, resp.url, resp.headers) +result = json.load(resp) +sys.stderr.write(result["stderr"]) +sys.stdout.write(result["stdout"]) +exitcode = int(result["exitcode"]) +sys.exit(exitcode) diff --git a/acceptance/bin/sort_blocks.py b/acceptance/bin/sort_blocks.py new file mode 100755 index 000000000..d558f252a --- /dev/null +++ b/acceptance/bin/sort_blocks.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +""" +Helper to sort blocks in text file. A block is a set of lines separated from others by empty line. + +This is to workaround non-determinism in the output. +""" + +import sys + +blocks = [] + +for line in sys.stdin: + if not line.strip(): + if blocks and blocks[-1]: + blocks.append("") + continue + if not blocks: + blocks.append("") + blocks[-1] += line + +blocks.sort() +print("\n".join(blocks)) diff --git a/acceptance/build/.gitignore b/acceptance/build/.gitignore new file mode 100644 index 000000000..a48b4db25 --- /dev/null +++ b/acceptance/build/.gitignore @@ -0,0 +1 @@ +databricks diff --git a/acceptance/bundle/help/bundle-deploy/output.txt b/acceptance/bundle/help/bundle-deploy/output.txt new file mode 100644 index 000000000..13c903f3e --- /dev/null +++ b/acceptance/bundle/help/bundle-deploy/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle deploy --help +Deploy bundle + +Usage: + databricks bundle deploy [flags] + +Flags: + --auto-approve Skip interactive approvals that might be required for deployment. + -c, --cluster-id string Override cluster in the deployment with the given cluster ID. + --fail-on-active-runs Fail if there are running jobs or pipelines in the deployment. + --force Force-override Git branch validation. + --force-lock Force acquisition of deployment lock. + -h, --help help for deploy + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-deploy/script b/acceptance/bundle/help/bundle-deploy/script new file mode 100644 index 000000000..6375cfea3 --- /dev/null +++ b/acceptance/bundle/help/bundle-deploy/script @@ -0,0 +1 @@ +trace $CLI bundle deploy --help diff --git a/acceptance/bundle/help/bundle-deployment/output.txt b/acceptance/bundle/help/bundle-deployment/output.txt new file mode 100644 index 000000000..ddf5b3305 --- /dev/null +++ b/acceptance/bundle/help/bundle-deployment/output.txt @@ -0,0 +1,22 @@ + +>>> $CLI bundle deployment --help +Deployment related commands + +Usage: + databricks bundle deployment [command] + +Available Commands: + bind Bind bundle-defined resources to existing resources + unbind Unbind bundle-defined resources from its managed remote resource + +Flags: + -h, --help help for deployment + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Use "databricks bundle deployment [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle-deployment/script b/acceptance/bundle/help/bundle-deployment/script new file mode 100644 index 000000000..ef93f7dc2 --- /dev/null +++ b/acceptance/bundle/help/bundle-deployment/script @@ -0,0 +1 @@ +trace $CLI bundle deployment --help diff --git a/acceptance/bundle/help/bundle-destroy/output.txt b/acceptance/bundle/help/bundle-destroy/output.txt new file mode 100644 index 000000000..d70164301 --- /dev/null +++ b/acceptance/bundle/help/bundle-destroy/output.txt @@ -0,0 +1,18 @@ + +>>> $CLI bundle destroy --help +Destroy deployed bundle resources + +Usage: + databricks bundle destroy [flags] + +Flags: + --auto-approve Skip interactive approvals for deleting resources and files + --force-lock Force acquisition of deployment lock. + -h, --help help for destroy + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-destroy/script b/acceptance/bundle/help/bundle-destroy/script new file mode 100644 index 000000000..955d7b7f9 --- /dev/null +++ b/acceptance/bundle/help/bundle-destroy/script @@ -0,0 +1 @@ +trace $CLI bundle destroy --help diff --git a/acceptance/bundle/help/bundle-generate-dashboard/output.txt b/acceptance/bundle/help/bundle-generate-dashboard/output.txt new file mode 100644 index 000000000..a63ce0ff8 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-dashboard/output.txt @@ -0,0 +1,24 @@ + +>>> $CLI bundle generate dashboard --help +Generate configuration for a dashboard + +Usage: + databricks bundle generate dashboard [flags] + +Flags: + -s, --dashboard-dir string directory to write the dashboard representation to (default "src") + --existing-id string ID of the dashboard to generate configuration for + --existing-path string workspace path of the dashboard to generate configuration for + -f, --force force overwrite existing files in the output directory + -h, --help help for dashboard + --resource string resource key of dashboard to watch for changes + -d, --resource-dir string directory to write the configuration to (default "resources") + --watch watch for changes to the dashboard and update the configuration + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-dashboard/script b/acceptance/bundle/help/bundle-generate-dashboard/script new file mode 100644 index 000000000..320156129 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-dashboard/script @@ -0,0 +1 @@ +trace $CLI bundle generate dashboard --help diff --git a/acceptance/bundle/help/bundle-generate-job/output.txt b/acceptance/bundle/help/bundle-generate-job/output.txt new file mode 100644 index 000000000..adc3f45ae --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-job/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle generate job --help +Generate bundle configuration for a job + +Usage: + databricks bundle generate job [flags] + +Flags: + -d, --config-dir string Dir path where the output config will be stored (default "resources") + --existing-job-id int Job ID of the job to generate config for + -f, --force Force overwrite existing files in the output directory + -h, --help help for job + -s, --source-dir string Dir path where the downloaded files will be stored (default "src") + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-job/script b/acceptance/bundle/help/bundle-generate-job/script new file mode 100644 index 000000000..109ed59aa --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-job/script @@ -0,0 +1 @@ +trace $CLI bundle generate job --help diff --git a/acceptance/bundle/help/bundle-generate-pipeline/output.txt b/acceptance/bundle/help/bundle-generate-pipeline/output.txt new file mode 100644 index 000000000..cf5f70920 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-pipeline/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle generate pipeline --help +Generate bundle configuration for a pipeline + +Usage: + databricks bundle generate pipeline [flags] + +Flags: + -d, --config-dir string Dir path where the output config will be stored (default "resources") + --existing-pipeline-id string ID of the pipeline to generate config for + -f, --force Force overwrite existing files in the output directory + -h, --help help for pipeline + -s, --source-dir string Dir path where the downloaded files will be stored (default "src") + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-pipeline/script b/acceptance/bundle/help/bundle-generate-pipeline/script new file mode 100644 index 000000000..c6af62d0a --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-pipeline/script @@ -0,0 +1 @@ +trace $CLI bundle generate pipeline --help diff --git a/acceptance/bundle/help/bundle-generate/output.txt b/acceptance/bundle/help/bundle-generate/output.txt new file mode 100644 index 000000000..1d77dfdbd --- /dev/null +++ b/acceptance/bundle/help/bundle-generate/output.txt @@ -0,0 +1,25 @@ + +>>> $CLI bundle generate --help +Generate bundle configuration + +Usage: + databricks bundle generate [command] + +Available Commands: + app Generate bundle configuration for a Databricks app + dashboard Generate configuration for a dashboard + job Generate bundle configuration for a job + pipeline Generate bundle configuration for a pipeline + +Flags: + -h, --help help for generate + --key string resource key to use for the generated configuration + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Use "databricks bundle generate [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle-generate/script b/acceptance/bundle/help/bundle-generate/script new file mode 100644 index 000000000..932588768 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate/script @@ -0,0 +1 @@ +trace $CLI bundle generate --help diff --git a/acceptance/bundle/help/bundle-init/output.txt b/acceptance/bundle/help/bundle-init/output.txt new file mode 100644 index 000000000..bafe5a187 --- /dev/null +++ b/acceptance/bundle/help/bundle-init/output.txt @@ -0,0 +1,31 @@ + +>>> $CLI bundle init --help +Initialize using a bundle template. + +TEMPLATE_PATH optionally specifies which template to use. It can be one of the following: +- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows +- default-sql: The default SQL template for .sql files that run with Databricks SQL +- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) +- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks) +- a local file system path with a template directory +- a Git repository URL, e.g. https://github.com/my/repository + +See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates. + +Usage: + databricks bundle init [TEMPLATE_PATH] [flags] + +Flags: + --branch string Git branch to use for template initialization + --config-file string JSON file containing key value pairs of input parameters required for template initialization. + -h, --help help for init + --output-dir string Directory to write the initialized template to. + --tag string Git tag to use for template initialization + --template-dir string Directory path within a Git repository containing the template. + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-init/script b/acceptance/bundle/help/bundle-init/script new file mode 100644 index 000000000..9bcce7559 --- /dev/null +++ b/acceptance/bundle/help/bundle-init/script @@ -0,0 +1 @@ +trace $CLI bundle init --help diff --git a/acceptance/bundle/help/bundle-open/output.txt b/acceptance/bundle/help/bundle-open/output.txt new file mode 100644 index 000000000..8b98aa850 --- /dev/null +++ b/acceptance/bundle/help/bundle-open/output.txt @@ -0,0 +1,17 @@ + +>>> $CLI bundle open --help +Open a resource in the browser + +Usage: + databricks bundle open [flags] + +Flags: + --force-pull Skip local cache and load the state from the remote workspace + -h, --help help for open + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-open/script b/acceptance/bundle/help/bundle-open/script new file mode 100644 index 000000000..b4dfa2222 --- /dev/null +++ b/acceptance/bundle/help/bundle-open/script @@ -0,0 +1 @@ +trace $CLI bundle open --help diff --git a/acceptance/bundle/help/bundle-run/output.txt b/acceptance/bundle/help/bundle-run/output.txt new file mode 100644 index 000000000..17763a295 --- /dev/null +++ b/acceptance/bundle/help/bundle-run/output.txt @@ -0,0 +1,57 @@ + +>>> $CLI bundle run --help +Run the job or pipeline identified by KEY. + +The KEY is the unique identifier of the resource to run. In addition to +customizing the run using any of the available flags, you can also specify +keyword or positional arguments as shown in these examples: + + databricks bundle run my_job -- --key1 value1 --key2 value2 + +Or: + + databricks bundle run my_job -- value1 value2 value3 + +If the specified job uses job parameters or the job has a notebook task with +parameters, the first example applies and flag names are mapped to the +parameter names. + +If the specified job does not use job parameters and the job has a Python file +task or a Python wheel task, the second example applies. + +Usage: + databricks bundle run [flags] KEY + +Job Flags: + --params stringToString comma separated k=v pairs for job parameters (default []) + +Job Task Flags: + Note: please prefer use of job-level parameters (--param) over task-level parameters. + For more information, see https://docs.databricks.com/en/workflows/jobs/create-run-jobs.html#pass-parameters-to-a-databricks-job-task + --dbt-commands strings A list of commands to execute for jobs with DBT tasks. + --jar-params strings A list of parameters for jobs with Spark JAR tasks. + --notebook-params stringToString A map from keys to values for jobs with notebook tasks. (default []) + --pipeline-params stringToString A map from keys to values for jobs with pipeline tasks. (default []) + --python-named-params stringToString A map from keys to values for jobs with Python wheel tasks. (default []) + --python-params strings A list of parameters for jobs with Python tasks. + --spark-submit-params strings A list of parameters for jobs with Spark submit tasks. + --sql-params stringToString A map from keys to values for jobs with SQL tasks. (default []) + +Pipeline Flags: + --full-refresh strings List of tables to reset and recompute. + --full-refresh-all Perform a full graph reset and recompute. + --refresh strings List of tables to update. + --refresh-all Perform a full graph update. + --validate-only Perform an update to validate graph correctness. + +Flags: + -h, --help help for run + --no-wait Don't wait for the run to complete. + --restart Restart the run if it is already running. + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-run/script b/acceptance/bundle/help/bundle-run/script new file mode 100644 index 000000000..edcf1786a --- /dev/null +++ b/acceptance/bundle/help/bundle-run/script @@ -0,0 +1 @@ +trace $CLI bundle run --help diff --git a/acceptance/bundle/help/bundle-schema/output.txt b/acceptance/bundle/help/bundle-schema/output.txt new file mode 100644 index 000000000..8f2983f5b --- /dev/null +++ b/acceptance/bundle/help/bundle-schema/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle schema --help +Generate JSON Schema for bundle configuration + +Usage: + databricks bundle schema [flags] + +Flags: + -h, --help help for schema + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-schema/script b/acceptance/bundle/help/bundle-schema/script new file mode 100644 index 000000000..a084fec8e --- /dev/null +++ b/acceptance/bundle/help/bundle-schema/script @@ -0,0 +1 @@ +trace $CLI bundle schema --help diff --git a/acceptance/bundle/help/bundle-summary/output.txt b/acceptance/bundle/help/bundle-summary/output.txt new file mode 100644 index 000000000..935c4bdc5 --- /dev/null +++ b/acceptance/bundle/help/bundle-summary/output.txt @@ -0,0 +1,17 @@ + +>>> $CLI bundle summary --help +Summarize resources deployed by this bundle + +Usage: + databricks bundle summary [flags] + +Flags: + --force-pull Skip local cache and load the state from the remote workspace + -h, --help help for summary + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-summary/script b/acceptance/bundle/help/bundle-summary/script new file mode 100644 index 000000000..967279d86 --- /dev/null +++ b/acceptance/bundle/help/bundle-summary/script @@ -0,0 +1 @@ +trace $CLI bundle summary --help diff --git a/acceptance/bundle/help/bundle-sync/output.txt b/acceptance/bundle/help/bundle-sync/output.txt new file mode 100644 index 000000000..6588e6978 --- /dev/null +++ b/acceptance/bundle/help/bundle-sync/output.txt @@ -0,0 +1,19 @@ + +>>> $CLI bundle sync --help +Synchronize bundle tree to the workspace + +Usage: + databricks bundle sync [flags] + +Flags: + --full perform full synchronization (default is incremental) + -h, --help help for sync + --interval duration file system polling interval (for --watch) (default 1s) + --output type type of the output format + --watch watch local file system for changes + +Global Flags: + --debug enable debug logging + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-sync/script b/acceptance/bundle/help/bundle-sync/script new file mode 100644 index 000000000..fe1d6c7e3 --- /dev/null +++ b/acceptance/bundle/help/bundle-sync/script @@ -0,0 +1 @@ +trace $CLI bundle sync --help diff --git a/acceptance/bundle/help/bundle-validate/output.txt b/acceptance/bundle/help/bundle-validate/output.txt new file mode 100644 index 000000000..a0c350faf --- /dev/null +++ b/acceptance/bundle/help/bundle-validate/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle validate --help +Validate configuration + +Usage: + databricks bundle validate [flags] + +Flags: + -h, --help help for validate + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-validate/script b/acceptance/bundle/help/bundle-validate/script new file mode 100644 index 000000000..8b8434b2d --- /dev/null +++ b/acceptance/bundle/help/bundle-validate/script @@ -0,0 +1 @@ +trace $CLI bundle validate --help diff --git a/acceptance/bundle/help/bundle/output.txt b/acceptance/bundle/help/bundle/output.txt new file mode 100644 index 000000000..e0e2ea47c --- /dev/null +++ b/acceptance/bundle/help/bundle/output.txt @@ -0,0 +1,33 @@ + +>>> $CLI bundle --help +Databricks Asset Bundles let you express data/AI/analytics projects as code. + +Online documentation: https://docs.databricks.com/en/dev-tools/bundles/index.html + +Usage: + databricks bundle [command] + +Available Commands: + deploy Deploy bundle + deployment Deployment related commands + destroy Destroy deployed bundle resources + generate Generate bundle configuration + init Initialize using a bundle template + open Open a resource in the browser + run Run a job or pipeline update + schema Generate JSON Schema for bundle configuration + summary Summarize resources deployed by this bundle + sync Synchronize bundle tree to the workspace + validate Validate configuration + +Flags: + -h, --help help for bundle + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + +Use "databricks bundle [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle/script b/acceptance/bundle/help/bundle/script new file mode 100644 index 000000000..eac116817 --- /dev/null +++ b/acceptance/bundle/help/bundle/script @@ -0,0 +1 @@ +trace $CLI bundle --help diff --git a/acceptance/bundle/includes/non_yaml_in_include/databricks.yml b/acceptance/bundle/includes/non_yaml_in_include/databricks.yml new file mode 100644 index 000000000..162bd6013 --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: non_yaml_in_includes + +include: + - test.py + - resources/*.yml diff --git a/acceptance/bundle/includes/non_yaml_in_include/output.txt b/acceptance/bundle/includes/non_yaml_in_include/output.txt new file mode 100644 index 000000000..6006ca14e --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/output.txt @@ -0,0 +1,10 @@ +Error: Files in the 'include' configuration section must be YAML files. + in databricks.yml:5:4 + +The file test.py in the 'include' configuration section is not a YAML file, and only YAML files are supported. To include files to sync, specify them in the 'sync.include' configuration section instead. + +Name: non_yaml_in_includes + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/includes/non_yaml_in_include/script b/acceptance/bundle/includes/non_yaml_in_include/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/includes/non_yaml_in_include/test.py b/acceptance/bundle/includes/non_yaml_in_include/test.py new file mode 100644 index 000000000..44159b395 --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/test.py @@ -0,0 +1 @@ +print("Hello world") diff --git a/bundle/tests/clusters/databricks.yml b/acceptance/bundle/override/clusters/databricks.yml similarity index 92% rename from bundle/tests/clusters/databricks.yml rename to acceptance/bundle/override/clusters/databricks.yml index 1074462a6..14efceec0 100644 --- a/bundle/tests/clusters/databricks.yml +++ b/acceptance/bundle/override/clusters/databricks.yml @@ -1,9 +1,6 @@ bundle: name: clusters -workspace: - host: https://acme.cloud.databricks.com/ - resources: clusters: foo: diff --git a/acceptance/bundle/override/clusters/output.txt b/acceptance/bundle/override/clusters/output.txt new file mode 100644 index 000000000..cff30b3af --- /dev/null +++ b/acceptance/bundle/override/clusters/output.txt @@ -0,0 +1,33 @@ + +>>> $CLI bundle validate -o json -t default +{ + "autoscale": { + "max_workers": 7, + "min_workers": 2 + }, + "cluster_name": "foo", + "custom_tags": {}, + "node_type_id": "i3.xlarge", + "num_workers": 2, + "spark_conf": { + "spark.executor.memory": "2g" + }, + "spark_version": "13.3.x-scala2.12" +} + +>>> $CLI bundle validate -o json -t development +{ + "autoscale": { + "max_workers": 3, + "min_workers": 1 + }, + "cluster_name": "foo-override", + "custom_tags": {}, + "node_type_id": "m5.xlarge", + "num_workers": 3, + "spark_conf": { + "spark.executor.memory": "4g", + "spark.executor.memory2": "4g" + }, + "spark_version": "15.2.x-scala2.12" +} diff --git a/acceptance/bundle/override/clusters/script b/acceptance/bundle/override/clusters/script new file mode 100644 index 000000000..4a73dd93e --- /dev/null +++ b/acceptance/bundle/override/clusters/script @@ -0,0 +1,2 @@ +trace $CLI bundle validate -o json -t default | jq .resources.clusters.foo +trace $CLI bundle validate -o json -t development | jq .resources.clusters.foo diff --git a/bundle/tests/override_job_cluster/databricks.yml b/acceptance/bundle/override/job_cluster/databricks.yml similarity index 91% rename from bundle/tests/override_job_cluster/databricks.yml rename to acceptance/bundle/override/job_cluster/databricks.yml index a85b3b711..d6b7ede4f 100644 --- a/bundle/tests/override_job_cluster/databricks.yml +++ b/acceptance/bundle/override/job_cluster/databricks.yml @@ -1,9 +1,6 @@ bundle: name: override_job_cluster -workspace: - host: https://acme.cloud.databricks.com/ - resources: jobs: foo: diff --git a/acceptance/bundle/override/job_cluster/output.txt b/acceptance/bundle/override/job_cluster/output.txt new file mode 100644 index 000000000..ff6e8316e --- /dev/null +++ b/acceptance/bundle/override/job_cluster/output.txt @@ -0,0 +1,56 @@ + +>>> $CLI bundle validate -o json -t development +{ + "foo": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/development/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "key", + "new_cluster": { + "node_type_id": "i3.xlarge", + "num_workers": 1, + "spark_version": "13.3.x-scala2.12" + } + } + ], + "name": "job", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {} + } +} + +>>> $CLI bundle validate -o json -t staging +{ + "foo": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "key", + "new_cluster": { + "node_type_id": "i3.2xlarge", + "num_workers": 4, + "spark_version": "13.3.x-scala2.12" + } + } + ], + "name": "job", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {} + } +} diff --git a/acceptance/bundle/override/job_cluster/script b/acceptance/bundle/override/job_cluster/script new file mode 100644 index 000000000..4a26c433a --- /dev/null +++ b/acceptance/bundle/override/job_cluster/script @@ -0,0 +1,2 @@ +trace $CLI bundle validate -o json -t development | jq '.resources.jobs' +trace $CLI bundle validate -o json -t staging | jq '.resources.jobs' diff --git a/acceptance/bundle/override/job_cluster_var/databricks.yml b/acceptance/bundle/override/job_cluster_var/databricks.yml new file mode 100644 index 000000000..48e68c926 --- /dev/null +++ b/acceptance/bundle/override/job_cluster_var/databricks.yml @@ -0,0 +1,36 @@ +bundle: + name: override_job_cluster + +variables: + mykey: + default: key + +resources: + jobs: + foo: + name: job + job_clusters: + - job_cluster_key: key + new_cluster: + spark_version: 13.3.x-scala2.12 + +targets: + development: + resources: + jobs: + foo: + job_clusters: + - job_cluster_key: "${var.mykey}" + new_cluster: + node_type_id: i3.xlarge + num_workers: 1 + + staging: + resources: + jobs: + foo: + job_clusters: + - job_cluster_key: "${var.mykey}" + new_cluster: + node_type_id: i3.2xlarge + num_workers: 4 diff --git a/acceptance/bundle/override/job_cluster_var/output.txt b/acceptance/bundle/override/job_cluster_var/output.txt new file mode 100644 index 000000000..0b19e5eb2 --- /dev/null +++ b/acceptance/bundle/override/job_cluster_var/output.txt @@ -0,0 +1,74 @@ + +>>> $CLI bundle validate -o json -t development +{ + "foo": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/development/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "key", + "new_cluster": { + "node_type_id": "i3.xlarge", + "num_workers": 1, + "spark_version": "13.3.x-scala2.12" + } + } + ], + "name": "job", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {} + } +} + +>>> $CLI bundle validate -t development +Name: override_job_cluster +Target: development +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/override_job_cluster/development + +Validation OK! + +>>> $CLI bundle validate -o json -t staging +{ + "foo": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "key", + "new_cluster": { + "node_type_id": "i3.2xlarge", + "num_workers": 4, + "spark_version": "13.3.x-scala2.12" + } + } + ], + "name": "job", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {} + } +} + +>>> $CLI bundle validate -t staging +Name: override_job_cluster +Target: staging +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging + +Validation OK! diff --git a/acceptance/bundle/override/job_cluster_var/script b/acceptance/bundle/override/job_cluster_var/script new file mode 100644 index 000000000..1cf373828 --- /dev/null +++ b/acceptance/bundle/override/job_cluster_var/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate -o json -t development | jq '.resources.jobs' +trace $CLI bundle validate -t development +trace $CLI bundle validate -o json -t staging | jq '.resources.jobs' +trace $CLI bundle validate -t staging diff --git a/bundle/tests/override_job_tasks/databricks.yml b/acceptance/bundle/override/job_tasks/databricks.yml similarity index 94% rename from bundle/tests/override_job_tasks/databricks.yml rename to acceptance/bundle/override/job_tasks/databricks.yml index ddee28793..fd7edafb9 100644 --- a/bundle/tests/override_job_tasks/databricks.yml +++ b/acceptance/bundle/override/job_tasks/databricks.yml @@ -1,9 +1,6 @@ bundle: name: override_job_tasks -workspace: - host: https://acme.cloud.databricks.com/ - resources: jobs: foo: diff --git a/acceptance/bundle/override/job_tasks/out.development.stderr.txt b/acceptance/bundle/override/job_tasks/out.development.stderr.txt new file mode 100644 index 000000000..7b6fef0cc --- /dev/null +++ b/acceptance/bundle/override/job_tasks/out.development.stderr.txt @@ -0,0 +1,6 @@ + +>>> errcode $CLI bundle validate -o json -t development +Error: file ./test1.py not found + + +Exit code: 1 diff --git a/acceptance/bundle/override/job_tasks/output.txt b/acceptance/bundle/override/job_tasks/output.txt new file mode 100644 index 000000000..915351d4e --- /dev/null +++ b/acceptance/bundle/override/job_tasks/output.txt @@ -0,0 +1,77 @@ +{ + "name": "job", + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "new_cluster": { + "node_type_id": "i3.xlarge", + "num_workers": 1, + "spark_version": "13.3.x-scala2.12" + }, + "spark_python_task": { + "python_file": "./test1.py" + }, + "task_key": "key1" + }, + { + "new_cluster": { + "spark_version": "13.3.x-scala2.12" + }, + "spark_python_task": { + "python_file": "./test2.py" + }, + "task_key": "key2" + } + ] +} + +>>> errcode $CLI bundle validate -o json -t staging +Error: file ./test1.py not found + + +Exit code: 1 +{ + "name": "job", + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "new_cluster": { + "spark_version": "13.3.x-scala2.12" + }, + "spark_python_task": { + "python_file": "./test1.py" + }, + "task_key": "key1" + }, + { + "new_cluster": { + "node_type_id": "i3.2xlarge", + "num_workers": 4, + "spark_version": "13.3.x-scala2.12" + }, + "spark_python_task": { + "python_file": "./test3.py" + }, + "task_key": "key2" + } + ] +} + +>>> errcode $CLI bundle validate -t staging +Error: file ./test1.py not found + +Name: override_job_tasks +Target: staging +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/override_job_tasks/staging + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/override/job_tasks/script b/acceptance/bundle/override/job_tasks/script new file mode 100644 index 000000000..f41729c1e --- /dev/null +++ b/acceptance/bundle/override/job_tasks/script @@ -0,0 +1,3 @@ +trace errcode $CLI bundle validate -o json -t development 2> out.development.stderr.txt | jq .resources.jobs.foo +trace errcode $CLI bundle validate -o json -t staging | jq .resources.jobs.foo +trace errcode $CLI bundle validate -t staging diff --git a/acceptance/bundle/override/merge-string-map/databricks.yml b/acceptance/bundle/override/merge-string-map/databricks.yml new file mode 100644 index 000000000..5e443ceca --- /dev/null +++ b/acceptance/bundle/override/merge-string-map/databricks.yml @@ -0,0 +1,13 @@ +bundle: + name: merge-string-map + +resources: + clusters: + my_cluster: "hello" + +targets: + dev: + resources: + clusters: + my_cluster: + spark_version: "25" diff --git a/acceptance/bundle/override/merge-string-map/output.txt b/acceptance/bundle/override/merge-string-map/output.txt new file mode 100644 index 000000000..b566aa07f --- /dev/null +++ b/acceptance/bundle/override/merge-string-map/output.txt @@ -0,0 +1,27 @@ + +>>> $CLI bundle validate -o json -t dev +Warning: expected map, found string + at resources.clusters.my_cluster + in databricks.yml:6:17 + +{ + "clusters": { + "my_cluster": { + "custom_tags": {}, + "spark_version": "25" + } + } +} + +>>> $CLI bundle validate -t dev +Warning: expected map, found string + at resources.clusters.my_cluster + in databricks.yml:6:17 + +Name: merge-string-map +Target: dev +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/merge-string-map/dev + +Found 1 warning diff --git a/acceptance/bundle/override/merge-string-map/script b/acceptance/bundle/override/merge-string-map/script new file mode 100644 index 000000000..a109d5f69 --- /dev/null +++ b/acceptance/bundle/override/merge-string-map/script @@ -0,0 +1,2 @@ +trace $CLI bundle validate -o json -t dev | jq .resources +trace $CLI bundle validate -t dev diff --git a/bundle/tests/override_pipeline_cluster/databricks.yml b/acceptance/bundle/override/pipeline_cluster/databricks.yml similarity index 90% rename from bundle/tests/override_pipeline_cluster/databricks.yml rename to acceptance/bundle/override/pipeline_cluster/databricks.yml index 8930f30e8..8b4857460 100644 --- a/bundle/tests/override_pipeline_cluster/databricks.yml +++ b/acceptance/bundle/override/pipeline_cluster/databricks.yml @@ -1,9 +1,6 @@ bundle: name: override_pipeline_cluster -workspace: - host: https://acme.cloud.databricks.com/ - resources: pipelines: foo: diff --git a/acceptance/bundle/override/pipeline_cluster/output.txt b/acceptance/bundle/override/pipeline_cluster/output.txt new file mode 100644 index 000000000..8babed0ec --- /dev/null +++ b/acceptance/bundle/override/pipeline_cluster/output.txt @@ -0,0 +1,44 @@ + +>>> $CLI bundle validate -o json -t development +{ + "foo": { + "clusters": [ + { + "label": "default", + "node_type_id": "i3.xlarge", + "num_workers": 1, + "spark_conf": { + "foo": "bar" + } + } + ], + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_pipeline_cluster/development/state/metadata.json" + }, + "name": "job", + "permissions": [] + } +} + +>>> $CLI bundle validate -o json -t staging +{ + "foo": { + "clusters": [ + { + "label": "default", + "node_type_id": "i3.2xlarge", + "num_workers": 4, + "spark_conf": { + "foo": "bar" + } + } + ], + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_pipeline_cluster/staging/state/metadata.json" + }, + "name": "job", + "permissions": [] + } +} diff --git a/acceptance/bundle/override/pipeline_cluster/script b/acceptance/bundle/override/pipeline_cluster/script new file mode 100644 index 000000000..b06005ce5 --- /dev/null +++ b/acceptance/bundle/override/pipeline_cluster/script @@ -0,0 +1,2 @@ +trace $CLI bundle validate -o json -t development | jq .resources.pipelines +trace $CLI bundle validate -o json -t staging | jq .resources.pipelines diff --git a/bundle/tests/path_translation/fallback/README.md b/acceptance/bundle/paths/fallback/README.md similarity index 100% rename from bundle/tests/path_translation/fallback/README.md rename to acceptance/bundle/paths/fallback/README.md diff --git a/bundle/tests/path_translation/nominal/databricks.yml b/acceptance/bundle/paths/fallback/databricks.yml similarity index 80% rename from bundle/tests/path_translation/nominal/databricks.yml rename to acceptance/bundle/paths/fallback/databricks.yml index cd425920d..c6d0abe0a 100644 --- a/bundle/tests/path_translation/nominal/databricks.yml +++ b/acceptance/bundle/paths/fallback/databricks.yml @@ -1,5 +1,5 @@ bundle: - name: path_translation_nominal + name: fallback include: - "resources/*.yml" diff --git a/acceptance/bundle/paths/fallback/output.job.json b/acceptance/bundle/paths/fallback/output.job.json new file mode 100644 index 000000000..fe9e1cf3d --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.job.json @@ -0,0 +1,67 @@ +[ + { + "job_cluster_key": "default", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook" + }, + "task_key": "notebook_example" + }, + { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file.py" + }, + "task_key": "spark_python_example" + }, + { + "dbt_task": { + "commands": [ + "dbt run", + "dbt run" + ], + "project_directory": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/dbt_project" + }, + "job_cluster_key": "default", + "task_key": "dbt_example" + }, + { + "job_cluster_key": "default", + "sql_task": { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/sql.sql" + }, + "warehouse_id": "cafef00d" + }, + "task_key": "sql_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "whl": "dist/wheel1.whl" + }, + { + "whl": "dist/wheel2.whl" + } + ], + "python_wheel_task": { + "package_name": "my_package" + }, + "task_key": "python_wheel_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "jar": "target/jar1.jar" + }, + { + "jar": "target/jar2.jar" + } + ], + "spark_jar_task": { + "main_class_name": "com.example.Main" + }, + "task_key": "spark_jar_example" + } +] diff --git a/acceptance/bundle/paths/fallback/output.pipeline.json b/acceptance/bundle/paths/fallback/output.pipeline.json new file mode 100644 index 000000000..38521cb22 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.pipeline.json @@ -0,0 +1,22 @@ +[ + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file1.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook1" + } + }, + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file2.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook2" + } + } +] diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt new file mode 100644 index 000000000..63121f3d7 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle validate -t development -o json + +>>> $CLI bundle validate -t error +Error: notebook this value is overridden not found. Local notebook references are expected +to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] + +Name: fallback +Target: error +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/fallback/error + +Found 1 error + +Exit code: 1 diff --git a/bundle/tests/path_translation/fallback/override_job.yml b/acceptance/bundle/paths/fallback/override_job.yml similarity index 100% rename from bundle/tests/path_translation/fallback/override_job.yml rename to acceptance/bundle/paths/fallback/override_job.yml diff --git a/bundle/tests/path_translation/fallback/override_pipeline.yml b/acceptance/bundle/paths/fallback/override_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/fallback/override_pipeline.yml rename to acceptance/bundle/paths/fallback/override_pipeline.yml diff --git a/bundle/tests/path_translation/fallback/resources/my_job.yml b/acceptance/bundle/paths/fallback/resources/my_job.yml similarity index 71% rename from bundle/tests/path_translation/fallback/resources/my_job.yml rename to acceptance/bundle/paths/fallback/resources/my_job.yml index 4907df4f0..921ee412b 100644 --- a/bundle/tests/path_translation/fallback/resources/my_job.yml +++ b/acceptance/bundle/paths/fallback/resources/my_job.yml @@ -4,33 +4,45 @@ resources: name: "placeholder" tasks: - task_key: notebook_example + job_cluster_key: default notebook_task: notebook_path: "this value is overridden" - task_key: spark_python_example + job_cluster_key: default spark_python_task: python_file: "this value is overridden" - task_key: dbt_example + job_cluster_key: default dbt_task: project_directory: "this value is overridden" commands: - "dbt run" - task_key: sql_example + job_cluster_key: default sql_task: file: path: "this value is overridden" warehouse_id: cafef00d - task_key: python_wheel_example + job_cluster_key: default python_wheel_task: package_name: my_package libraries: - whl: ../dist/wheel1.whl - task_key: spark_jar_example + job_cluster_key: default spark_jar_task: main_class_name: com.example.Main libraries: - jar: ../target/jar1.jar + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/bundle/tests/path_translation/fallback/resources/my_pipeline.yml b/acceptance/bundle/paths/fallback/resources/my_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/fallback/resources/my_pipeline.yml rename to acceptance/bundle/paths/fallback/resources/my_pipeline.yml diff --git a/acceptance/bundle/paths/fallback/script b/acceptance/bundle/paths/fallback/script new file mode 100644 index 000000000..29aa420c5 --- /dev/null +++ b/acceptance/bundle/paths/fallback/script @@ -0,0 +1,10 @@ +errcode trace $CLI bundle validate -t development -o json > output.tmp.json + +# Capture job tasks +jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json + +# Capture pipeline libraries +jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json + +# Expect failure for the "error" target +errcode trace $CLI bundle validate -t error diff --git a/acceptance/bundle/paths/fallback/script.cleanup b/acceptance/bundle/paths/fallback/script.cleanup new file mode 100644 index 000000000..f93425dff --- /dev/null +++ b/acceptance/bundle/paths/fallback/script.cleanup @@ -0,0 +1 @@ +rm -f output.tmp.json diff --git a/bundle/tests/path_translation/fallback/src/dbt_project/.gitkeep b/acceptance/bundle/paths/fallback/src/dbt_project/.gitkeep similarity index 100% rename from bundle/tests/path_translation/fallback/src/dbt_project/.gitkeep rename to acceptance/bundle/paths/fallback/src/dbt_project/.gitkeep diff --git a/bundle/tests/path_translation/fallback/src/file.py b/acceptance/bundle/paths/fallback/src/file.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file.py rename to acceptance/bundle/paths/fallback/src/file.py diff --git a/bundle/tests/path_translation/fallback/src/file1.py b/acceptance/bundle/paths/fallback/src/file1.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file1.py rename to acceptance/bundle/paths/fallback/src/file1.py diff --git a/bundle/tests/path_translation/fallback/src/file2.py b/acceptance/bundle/paths/fallback/src/file2.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file2.py rename to acceptance/bundle/paths/fallback/src/file2.py diff --git a/bundle/tests/path_translation/fallback/src/notebook.py b/acceptance/bundle/paths/fallback/src/notebook.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook.py rename to acceptance/bundle/paths/fallback/src/notebook.py diff --git a/bundle/tests/path_translation/fallback/src/notebook1.py b/acceptance/bundle/paths/fallback/src/notebook1.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook1.py rename to acceptance/bundle/paths/fallback/src/notebook1.py diff --git a/bundle/tests/path_translation/fallback/src/notebook2.py b/acceptance/bundle/paths/fallback/src/notebook2.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook2.py rename to acceptance/bundle/paths/fallback/src/notebook2.py diff --git a/bundle/tests/path_translation/fallback/src/sql.sql b/acceptance/bundle/paths/fallback/src/sql.sql similarity index 100% rename from bundle/tests/path_translation/fallback/src/sql.sql rename to acceptance/bundle/paths/fallback/src/sql.sql diff --git a/bundle/tests/path_translation/nominal/README.md b/acceptance/bundle/paths/nominal/README.md similarity index 100% rename from bundle/tests/path_translation/nominal/README.md rename to acceptance/bundle/paths/nominal/README.md diff --git a/bundle/tests/path_translation/fallback/databricks.yml b/acceptance/bundle/paths/nominal/databricks.yml similarity index 79% rename from bundle/tests/path_translation/fallback/databricks.yml rename to acceptance/bundle/paths/nominal/databricks.yml index 92be3f921..5d3c22f91 100644 --- a/bundle/tests/path_translation/fallback/databricks.yml +++ b/acceptance/bundle/paths/nominal/databricks.yml @@ -1,5 +1,5 @@ bundle: - name: path_translation_fallback + name: nominal include: - "resources/*.yml" diff --git a/acceptance/bundle/paths/nominal/output.job.json b/acceptance/bundle/paths/nominal/output.job.json new file mode 100644 index 000000000..9e1cb4d90 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.job.json @@ -0,0 +1,89 @@ +[ + { + "job_cluster_key": "default", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook" + }, + "task_key": "notebook_example" + }, + { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file.py" + }, + "task_key": "spark_python_example" + }, + { + "dbt_task": { + "commands": [ + "dbt run", + "dbt run" + ], + "project_directory": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/dbt_project" + }, + "job_cluster_key": "default", + "task_key": "dbt_example" + }, + { + "job_cluster_key": "default", + "sql_task": { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/sql.sql" + }, + "warehouse_id": "cafef00d" + }, + "task_key": "sql_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "whl": "dist/wheel1.whl" + }, + { + "whl": "dist/wheel2.whl" + } + ], + "python_wheel_task": { + "package_name": "my_package" + }, + "task_key": "python_wheel_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "jar": "target/jar1.jar" + }, + { + "jar": "target/jar2.jar" + } + ], + "spark_jar_task": { + "main_class_name": "com.example.Main" + }, + "task_key": "spark_jar_example" + }, + { + "for_each_task": { + "task": { + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook" + } + } + }, + "job_cluster_key": "default", + "task_key": "for_each_notebook_example" + }, + { + "for_each_task": { + "task": { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file.py" + } + } + }, + "task_key": "for_each_spark_python_example" + } +] diff --git a/acceptance/bundle/paths/nominal/output.pipeline.json b/acceptance/bundle/paths/nominal/output.pipeline.json new file mode 100644 index 000000000..277b0c4a1 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.pipeline.json @@ -0,0 +1,22 @@ +[ + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file1.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook1" + } + }, + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file2.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook2" + } + } +] diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt new file mode 100644 index 000000000..1badcdec6 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle validate -t development -o json + +>>> $CLI bundle validate -t error +Error: notebook this value is overridden not found. Local notebook references are expected +to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] + +Name: nominal +Target: error +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/nominal/error + +Found 1 error + +Exit code: 1 diff --git a/bundle/tests/path_translation/nominal/override_job.yml b/acceptance/bundle/paths/nominal/override_job.yml similarity index 100% rename from bundle/tests/path_translation/nominal/override_job.yml rename to acceptance/bundle/paths/nominal/override_job.yml diff --git a/bundle/tests/path_translation/nominal/override_pipeline.yml b/acceptance/bundle/paths/nominal/override_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/nominal/override_pipeline.yml rename to acceptance/bundle/paths/nominal/override_pipeline.yml diff --git a/bundle/tests/path_translation/nominal/resources/my_job.yml b/acceptance/bundle/paths/nominal/resources/my_job.yml similarity index 74% rename from bundle/tests/path_translation/nominal/resources/my_job.yml rename to acceptance/bundle/paths/nominal/resources/my_job.yml index 2020c9dc8..13996a20c 100644 --- a/bundle/tests/path_translation/nominal/resources/my_job.yml +++ b/acceptance/bundle/paths/nominal/resources/my_job.yml @@ -4,38 +4,45 @@ resources: name: "placeholder" tasks: - task_key: notebook_example + job_cluster_key: default notebook_task: notebook_path: "this value is overridden" - task_key: spark_python_example + job_cluster_key: default spark_python_task: python_file: "this value is overridden" - task_key: dbt_example + job_cluster_key: default dbt_task: project_directory: "this value is overridden" commands: - "dbt run" - task_key: sql_example + job_cluster_key: default sql_task: file: path: "this value is overridden" warehouse_id: cafef00d - task_key: python_wheel_example + job_cluster_key: default python_wheel_task: package_name: my_package libraries: - whl: ../dist/wheel1.whl - task_key: spark_jar_example + job_cluster_key: default spark_jar_task: main_class_name: com.example.Main libraries: - jar: ../target/jar1.jar - task_key: for_each_notebook_example + job_cluster_key: default for_each_task: task: notebook_task: @@ -44,5 +51,12 @@ resources: - task_key: for_each_spark_python_example for_each_task: task: + job_cluster_key: default spark_python_task: python_file: "this value is overridden" + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/bundle/tests/path_translation/nominal/resources/my_pipeline.yml b/acceptance/bundle/paths/nominal/resources/my_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/nominal/resources/my_pipeline.yml rename to acceptance/bundle/paths/nominal/resources/my_pipeline.yml diff --git a/acceptance/bundle/paths/nominal/script b/acceptance/bundle/paths/nominal/script new file mode 100644 index 000000000..29aa420c5 --- /dev/null +++ b/acceptance/bundle/paths/nominal/script @@ -0,0 +1,10 @@ +errcode trace $CLI bundle validate -t development -o json > output.tmp.json + +# Capture job tasks +jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json + +# Capture pipeline libraries +jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json + +# Expect failure for the "error" target +errcode trace $CLI bundle validate -t error diff --git a/acceptance/bundle/paths/nominal/script.cleanup b/acceptance/bundle/paths/nominal/script.cleanup new file mode 100644 index 000000000..f93425dff --- /dev/null +++ b/acceptance/bundle/paths/nominal/script.cleanup @@ -0,0 +1 @@ +rm -f output.tmp.json diff --git a/bundle/tests/path_translation/nominal/src/dbt_project/.gitkeep b/acceptance/bundle/paths/nominal/src/dbt_project/.gitkeep similarity index 100% rename from bundle/tests/path_translation/nominal/src/dbt_project/.gitkeep rename to acceptance/bundle/paths/nominal/src/dbt_project/.gitkeep diff --git a/bundle/tests/path_translation/nominal/src/file.py b/acceptance/bundle/paths/nominal/src/file.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file.py rename to acceptance/bundle/paths/nominal/src/file.py diff --git a/bundle/tests/path_translation/nominal/src/file1.py b/acceptance/bundle/paths/nominal/src/file1.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file1.py rename to acceptance/bundle/paths/nominal/src/file1.py diff --git a/bundle/tests/path_translation/nominal/src/file2.py b/acceptance/bundle/paths/nominal/src/file2.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file2.py rename to acceptance/bundle/paths/nominal/src/file2.py diff --git a/bundle/tests/path_translation/nominal/src/notebook.py b/acceptance/bundle/paths/nominal/src/notebook.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook.py rename to acceptance/bundle/paths/nominal/src/notebook.py diff --git a/bundle/tests/path_translation/nominal/src/notebook1.py b/acceptance/bundle/paths/nominal/src/notebook1.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook1.py rename to acceptance/bundle/paths/nominal/src/notebook1.py diff --git a/bundle/tests/path_translation/nominal/src/notebook2.py b/acceptance/bundle/paths/nominal/src/notebook2.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook2.py rename to acceptance/bundle/paths/nominal/src/notebook2.py diff --git a/bundle/tests/path_translation/nominal/src/sql.sql b/acceptance/bundle/paths/nominal/src/sql.sql similarity index 100% rename from bundle/tests/path_translation/nominal/src/sql.sql rename to acceptance/bundle/paths/nominal/src/sql.sql diff --git a/bundle/tests/relative_path_translation/databricks.yml b/acceptance/bundle/paths/relative_path_translation/databricks.yml similarity index 100% rename from bundle/tests/relative_path_translation/databricks.yml rename to acceptance/bundle/paths/relative_path_translation/databricks.yml diff --git a/acceptance/bundle/paths/relative_path_translation/output.default.json b/acceptance/bundle/paths/relative_path_translation/output.default.json new file mode 100644 index 000000000..e2514b392 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.default.json @@ -0,0 +1,6 @@ +{ + "paths": [ + "/Workspace/remote/src/file1.py", + "/Workspace/remote/src/file1.py" + ] +} diff --git a/acceptance/bundle/paths/relative_path_translation/output.override.json b/acceptance/bundle/paths/relative_path_translation/output.override.json new file mode 100644 index 000000000..729d2eaa0 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.override.json @@ -0,0 +1,6 @@ +{ + "paths": [ + "/Workspace/remote/src/file2.py", + "/Workspace/remote/src/file2.py" + ] +} diff --git a/acceptance/bundle/paths/relative_path_translation/output.txt b/acceptance/bundle/paths/relative_path_translation/output.txt new file mode 100644 index 000000000..362f2ec7b --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.txt @@ -0,0 +1,4 @@ + +>>> $CLI bundle validate -t default -o json + +>>> $CLI bundle validate -t override -o json diff --git a/bundle/tests/relative_path_translation/resources/job.yml b/acceptance/bundle/paths/relative_path_translation/resources/job.yml similarity index 66% rename from bundle/tests/relative_path_translation/resources/job.yml rename to acceptance/bundle/paths/relative_path_translation/resources/job.yml index 93f121f25..9540ff1ad 100644 --- a/bundle/tests/relative_path_translation/resources/job.yml +++ b/acceptance/bundle/paths/relative_path_translation/resources/job.yml @@ -3,12 +3,20 @@ resources: job: tasks: - task_key: local + job_cluster_key: default spark_python_task: python_file: ../src/file1.py - task_key: variable_reference + job_cluster_key: default spark_python_task: # Note: this is a pure variable reference yet needs to persist the location # of the reference, not the location of the variable value. # Also see https://github.com/databricks/cli/issues/1330. python_file: ${var.file_path} + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/acceptance/bundle/paths/relative_path_translation/script b/acceptance/bundle/paths/relative_path_translation/script new file mode 100644 index 000000000..252e9a07f --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate -t default -o json | \ + jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.default.json +trace $CLI bundle validate -t override -o json | \ + jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.override.json diff --git a/bundle/tests/relative_path_translation/src/file1.py b/acceptance/bundle/paths/relative_path_translation/src/file1.py similarity index 100% rename from bundle/tests/relative_path_translation/src/file1.py rename to acceptance/bundle/paths/relative_path_translation/src/file1.py diff --git a/bundle/tests/relative_path_translation/src/file2.py b/acceptance/bundle/paths/relative_path_translation/src/file2.py similarity index 100% rename from bundle/tests/relative_path_translation/src/file2.py rename to acceptance/bundle/paths/relative_path_translation/src/file2.py diff --git a/bundle/tests/quality_monitor/databricks.yml b/acceptance/bundle/quality_monitor/databricks.yml similarity index 100% rename from bundle/tests/quality_monitor/databricks.yml rename to acceptance/bundle/quality_monitor/databricks.yml diff --git a/acceptance/bundle/quality_monitor/output.txt b/acceptance/bundle/quality_monitor/output.txt new file mode 100644 index 000000000..b3718c802 --- /dev/null +++ b/acceptance/bundle/quality_monitor/output.txt @@ -0,0 +1,73 @@ + +>>> $CLI bundle validate -o json -t development +{ + "mode": "development", + "quality_monitors": { + "my_monitor": { + "assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1", + "inference_log": { + "granularities": [ + "1 day" + ], + "model_id_col": "model_id", + "prediction_col": "prediction", + "problem_type": "PROBLEM_TYPE_REGRESSION", + "timestamp_col": "timestamp" + }, + "output_schema_name": "main.dev", + "schedule": null, + "table_name": "main.test.dev" + } + } +} + +>>> $CLI bundle validate -o json -t staging +{ + "mode": null, + "quality_monitors": { + "my_monitor": { + "assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1", + "inference_log": { + "granularities": [ + "1 day" + ], + "model_id_col": "model_id", + "prediction_col": "prediction", + "problem_type": "PROBLEM_TYPE_REGRESSION", + "timestamp_col": "timestamp" + }, + "output_schema_name": "main.staging", + "schedule": { + "quartz_cron_expression": "0 0 12 * * ?", + "timezone_id": "UTC" + }, + "table_name": "main.test.staging" + } + } +} + +>>> $CLI bundle validate -o json -t production +{ + "mode": null, + "quality_monitors": { + "my_monitor": { + "assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1", + "inference_log": { + "granularities": [ + "1 day", + "1 hour" + ], + "model_id_col": "model_id_prod", + "prediction_col": "prediction_prod", + "problem_type": "PROBLEM_TYPE_REGRESSION", + "timestamp_col": "timestamp_prod" + }, + "output_schema_name": "main.prod", + "schedule": { + "quartz_cron_expression": "0 0 12 * * ?", + "timezone_id": "UTC" + }, + "table_name": "main.test.prod" + } + } +} diff --git a/acceptance/bundle/quality_monitor/script b/acceptance/bundle/quality_monitor/script new file mode 100644 index 000000000..85a69d5e7 --- /dev/null +++ b/acceptance/bundle/quality_monitor/script @@ -0,0 +1,3 @@ +trace $CLI bundle validate -o json -t development | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }' +trace $CLI bundle validate -o json -t staging | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }' +trace $CLI bundle validate -o json -t production | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }' diff --git a/acceptance/bundle/syncroot/dotdot-git/databricks.yml b/acceptance/bundle/syncroot/dotdot-git/databricks.yml new file mode 100644 index 000000000..7215ffea2 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-git/databricks.yml @@ -0,0 +1,5 @@ +bundle: + name: test-bundle +sync: + paths: + - .. diff --git a/acceptance/bundle/syncroot/dotdot-git/output.txt b/acceptance/bundle/syncroot/dotdot-git/output.txt new file mode 100644 index 000000000..f1dc5fb01 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-git/output.txt @@ -0,0 +1,11 @@ +Error: path "$TMPDIR" is not within repository root "$TMPDIR/myrepo" + +Name: test-bundle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/test-bundle/default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/syncroot/dotdot-git/script b/acceptance/bundle/syncroot/dotdot-git/script new file mode 100644 index 000000000..0706a1d5e --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-git/script @@ -0,0 +1,6 @@ +# This should error, we do not allow syncroot outside of git repo. +mkdir myrepo +cd myrepo +cp ../databricks.yml . +git-repo-init +$CLI bundle validate | sed 's/\\\\/\//g' diff --git a/acceptance/bundle/syncroot/dotdot-nogit/databricks.yml b/acceptance/bundle/syncroot/dotdot-nogit/databricks.yml new file mode 100644 index 000000000..7215ffea2 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-nogit/databricks.yml @@ -0,0 +1,5 @@ +bundle: + name: test-bundle +sync: + paths: + - .. diff --git a/acceptance/bundle/syncroot/dotdot-nogit/output.txt b/acceptance/bundle/syncroot/dotdot-nogit/output.txt new file mode 100644 index 000000000..46f617f35 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-nogit/output.txt @@ -0,0 +1,7 @@ +Name: test-bundle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/test-bundle/default + +Validation OK! diff --git a/acceptance/bundle/syncroot/dotdot-nogit/script b/acceptance/bundle/syncroot/dotdot-nogit/script new file mode 100644 index 000000000..d3388903e --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-nogit/script @@ -0,0 +1,2 @@ +# This should not error, syncroot can be outside bundle root. +$CLI bundle validate diff --git a/acceptance/bundle/templates/dbt-sql/input.json b/acceptance/bundle/templates/dbt-sql/input.json new file mode 100644 index 000000000..201ac9667 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_dbt_sql", + "http_path": "/sql/2.0/warehouses/f00dcafe", + "default_catalog": "main", + "personal_schemas": "yes, use a schema based on the current user name during development" +} diff --git a/acceptance/bundle/templates/dbt-sql/output.txt b/acceptance/bundle/templates/dbt-sql/output.txt new file mode 100644 index 000000000..972c7e152 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output.txt @@ -0,0 +1,32 @@ + +>>> $CLI bundle init dbt-sql --config-file ./input.json --output-dir output + +Welcome to the dbt template for Databricks Asset Bundles! + +A workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html. +workspace_host: $DATABRICKS_URL + +📊 Your new project has been created in the 'my_dbt_sql' directory! +If you already have dbt installed, just type 'cd my_dbt_sql; dbt init' to get started. +Refer to the README.md file for full "getting started" guide and production setup instructions. + + +>>> $CLI bundle validate -t dev +Name: my_dbt_sql +Target: dev +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/dev + +Validation OK! + +>>> $CLI bundle validate -t prod +Name: my_dbt_sql +Target: prod +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/prod + +Validation OK! diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore new file mode 100644 index 000000000..de811f118 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore @@ -0,0 +1,2 @@ + +.databricks diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json new file mode 100644 index 000000000..28fe943fd --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "redhat.vscode-yaml", + "innoverio.vscode-dbt-power-user", + ] +} diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json new file mode 100644 index 000000000..e8dcd1a83 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json @@ -0,0 +1,32 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "python.envFile": "${workspaceFolder}/.databricks/.databricks.env", + "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "hive_metastore", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/2.0/warehouses/f00dcafe" + } + ], + "sqltools.autoConnectTo": "", + "[jinja-sql]": { + "editor.defaultFormatter": "innoverio.vscode-dbt-power-user" + } +} diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md new file mode 100644 index 000000000..756a2eda4 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md @@ -0,0 +1,138 @@ +# my_dbt_sql + +The 'my_dbt_sql' project was generated by using the dbt template for +Databricks Asset Bundles. It follows the standard dbt project structure +and has an additional `resources` directory to define Databricks resources such as jobs +that run dbt models. + +* Learn more about dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. +* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html + +The remainder of this file includes instructions for local development (using dbt) +and deployment to production (using Databricks Asset Bundles). + +## Development setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. Install dbt + + To install dbt, you need a recent version of Python. For the instructions below, + we assume `python3` refers to the Python version you want to use. On some systems, + you may need to refer to a different Python version, e.g. `python` or `/usr/bin/python`. + + Run these instructions from the `my_dbt_sql` directory. We recommend making + use of a Python virtual environment and installing dbt as follows: + + ``` + $ python3 -m venv .venv + $ . .venv/bin/activate + $ pip install -r requirements-dev.txt + ``` + +4. Initialize your dbt profile + + Use `dbt init` to initialize your profile. + + ``` + $ dbt init + ``` + + Note that dbt authentication uses personal access tokens by default + (see https://docs.databricks.com/dev-tools/auth/pat.html). + You can use OAuth as an alternative, but this currently requires manual configuration. + See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md + for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605 + for advice on setting up OAuth for Azure Databricks. + + To setup up additional profiles, such as a 'prod' profile, + see https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles. + +5. Activate dbt so it can be used from the terminal + + ``` + $ . .venv/bin/activate + ``` + +## Local development with dbt + +Use `dbt` to [run this project locally using a SQL warehouse](https://docs.databricks.com/partners/prep/dbt.html): + +``` +$ dbt seed +$ dbt run +``` + +(Did you get an error that the dbt command could not be found? You may need +to try the last step from the development setup above to re-activate +your Python virtual environment!) + + +To just evaluate a single model defined in a file called orders.sql, use: + +``` +$ dbt run --model orders +``` + +Use `dbt test` to run tests generated from yml files such as `models/schema.yml` +and any SQL tests from `tests/` + +``` +$ dbt test +``` + +## Production setup + +Your production dbt profiles are defined in dbt_profiles/profiles.yml. +These profiles define the default catalog, schema, and any other +target-specific settings. Read more about dbt profiles on Databricks at +https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. + +The target workspaces for staging and prod are defined in databricks.yml. +You can manually deploy based on these configurations (see below). +Or you can use CI/CD to automate deployment. See +https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation +on CI/CD setup. + +## Manually deploying to Databricks with Databricks Asset Bundles + +Databricks Asset Bundles can be used to deploy to Databricks and to execute +dbt commands as a job using Databricks Workflows. See +https://docs.databricks.com/dev-tools/bundles/index.html to learn more. + +Use the Databricks CLI to deploy a development copy of this project to a workspace: + +``` +$ databricks bundle deploy --target dev +``` + +(Note that "dev" is the default target, so the `--target` parameter +is optional here.) + +This deploys everything that's defined for this project. +For example, the default template would deploy a job called +`[dev yourname] my_dbt_sql_job` to your workspace. +You can find that job by opening your workpace and clicking on **Workflows**. + +You can also deploy to your production target directly from the command-line. +The warehouse, catalog, and schema for that target are configured in databricks.yml. +When deploying to this target, note that the default job at resources/my_dbt_sql.job.yml +has a schedule set that runs every day. The schedule is paused when deploying in development mode +(see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +To deploy a production copy, type: + +``` +$ databricks bundle deploy --target prod +``` + +## IDE support + +Optionally, install developer tools such as the Databricks extension for Visual Studio Code from +https://docs.databricks.com/dev-tools/vscode-ext.html. Third-party extensions +related to dbt may further enhance your dbt development experience! diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml new file mode 100644 index 000000000..1962bc543 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml @@ -0,0 +1,34 @@ +# This file defines the structure of this project and how it is deployed +# to production using Databricks Asset Bundles. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_dbt_sql + uuid: + +include: + - resources/*.yml + +# Deployment targets. +# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml +targets: + dev: + default: true + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml new file mode 100644 index 000000000..fdaf30dda --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml @@ -0,0 +1,38 @@ + +# This file defines dbt profiles for deployed dbt jobs. +my_dbt_sql: + target: dev # default target + outputs: + + # Doing local development with the dbt CLI? + # Then you should create your own profile in your .dbt/profiles.yml using 'dbt init' + # (See README.md) + + # The default target when deployed with the Databricks CLI + # N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml + dev: + type: databricks + method: http + catalog: main + schema: "{{ var('dev_schema') }}" + + http_path: /sql/2.0/warehouses/f00dcafe + + # The workspace host / token are provided by Databricks + # see databricks.yml for the workspace host used for 'dev' + host: "{{ env_var('DBT_HOST') }}" + token: "{{ env_var('DBT_ACCESS_TOKEN') }}" + + # The production target when deployed with the Databricks CLI + prod: + type: databricks + method: http + catalog: main + schema: default + + http_path: /sql/2.0/warehouses/f00dcafe + + # The workspace host / token are provided by Databricks + # see databricks.yml for the workspace host used for 'prod' + host: "{{ env_var('DBT_HOST') }}" + token: "{{ env_var('DBT_ACCESS_TOKEN') }}" diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml new file mode 100644 index 000000000..4218640d8 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml @@ -0,0 +1,32 @@ +name: 'my_dbt_sql' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'my_dbt_sql' + +# These configurations specify where dbt should look for different types of files. +# For Databricks asset bundles, we put everything in src, as you may have +# non-dbt resources in your project. +model-paths: ["src/models"] +analysis-paths: ["src/analyses"] +test-paths: ["src/tests"] +seed-paths: ["src/seeds"] +macro-paths: ["src/macros"] +snapshot-paths: ["src/snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views by default. These settings can be overridden in the +# individual model files using the `{{ config(...) }}` macro. +models: + my_dbt_sql: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml new file mode 100644 index 000000000..5e0f0fc29 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml @@ -0,0 +1,23 @@ +# This file defines prompts with defaults for dbt initializaton. +# It is used when the `dbt init` command is invoked. +# +fixed: + type: databricks +prompts: + host: + default: $DATABRICKS_HOST + token: + hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + hide_input: true + http_path: + hint: 'HTTP path of SQL warehouse to use' + default: /sql/2.0/warehouses/f00dcafe + catalog: + hint: 'initial catalog' + default: main + schema: + hint: 'personal schema where dbt will build objects during development, example: $USERNAME' + threads: + hint: 'threads to use during development, 1 or more' + type: 'int' + default: 4 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt new file mode 100644 index 000000000..e6b861203 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt @@ -0,0 +1,3 @@ +## requirements-dev.txt: dependencies for local development. + +dbt-databricks>=1.8.0,<2.0.0 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml new file mode 100644 index 000000000..d52f8ed50 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml @@ -0,0 +1,43 @@ +resources: + jobs: + my_dbt_sql_job: + name: my_dbt_sql_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + + tasks: + - task_key: dbt + + dbt_task: + project_directory: ../ + # The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml + profiles_directory: dbt_profiles/ + commands: + # The dbt commands to run (see also dbt_profiles/profiles.yml; dev_schema is used in the dev profile) + - 'dbt deps --target=${bundle.target}' + - 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + - 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + + libraries: + - pypi: + package: dbt-databricks>=1.8.0,<2.0.0 + + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/analyses/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/macros/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql new file mode 100644 index 000000000..e32736ceb --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql @@ -0,0 +1,17 @@ + +-- This model file defines a materialized view called 'orders_daily' +-- +-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'materialized_view') }} + +select order_date, count(*) AS number_of_orders + +from {{ ref('orders_raw') }} + +-- During development, only process a smaller range of data +{% if target.name != 'prod' %} +where order_date >= '2019-08-01' and order_date < '2019-09-01' +{% endif %} + +group by order_date diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql new file mode 100644 index 000000000..8faf8f38b --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql @@ -0,0 +1,16 @@ +-- This model file defines a streaming table called 'orders_raw' +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ +-- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'streaming_table') }} + +select + customer_name, + date(timestamp(from_unixtime(try_cast(order_datetime as bigint)))) as order_date, + order_number +from stream read_files( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml new file mode 100644 index 000000000..c64f1bfce --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml @@ -0,0 +1,21 @@ + +version: 2 + +models: + - name: orders_raw + description: "Raw ingested orders" + columns: + - name: customer_name + description: "The name of a customer" + data_tests: + - unique + - not_null + + - name: orders_daily + description: "Number of orders by day" + columns: + - name: order_date + description: "The date on which orders took place" + data_tests: + - unique + - not_null diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/seeds/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/snapshots/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/tests/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/script b/acceptance/bundle/templates/dbt-sql/script new file mode 100644 index 000000000..c4ca817fe --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/script @@ -0,0 +1,5 @@ +trace $CLI bundle init dbt-sql --config-file ./input.json --output-dir output + +cd output/my_dbt_sql +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/default-python/input.json b/acceptance/bundle/templates/default-python/input.json new file mode 100644 index 000000000..3e1d79c68 --- /dev/null +++ b/acceptance/bundle/templates/default-python/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_default_python", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes" +} diff --git a/acceptance/bundle/templates/default-python/output.txt b/acceptance/bundle/templates/default-python/output.txt new file mode 100644 index 000000000..5493ac2cf --- /dev/null +++ b/acceptance/bundle/templates/default-python/output.txt @@ -0,0 +1,30 @@ + +>>> $CLI bundle init default-python --config-file ./input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): $DATABRICKS_URL + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> $CLI bundle validate -t dev +Name: my_default_python +Target: dev +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/dev + +Validation OK! + +>>> $CLI bundle validate -t prod +Name: my_default_python +Target: prod +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore b/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json new file mode 100644 index 000000000..5d15eba36 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json new file mode 100644 index 000000000..8ee87c30d --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json @@ -0,0 +1,16 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/output/my_default_python/README.md new file mode 100644 index 000000000..97d7d7949 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/README.md @@ -0,0 +1,47 @@ +# my_default_python + +The 'my_default_python' project was generated by using the default-python template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_python_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_default_python.job.yml). The schedule + is paused when deploying in development mode (see + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +5. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` + +6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + +7. For documentation on the Databricks asset bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml new file mode 100644 index 000000000..9deca9cf5 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml @@ -0,0 +1,31 @@ +# This is a Databricks asset bundle definition for my_default_python. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_python + uuid: + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini b/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini new file mode 100644 index 000000000..80432c220 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = src diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt b/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt new file mode 100644 index 000000000..0ffbf6aed --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt @@ -0,0 +1,29 @@ +## requirements-dev.txt: dependencies for local development. +## +## For defining dependencies used by jobs in Databricks Workflows, see +## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + +## Add code completion support for DLT +databricks-dlt + +## pytest is the default package used for testing +pytest + +## Dependencies for building wheel files +setuptools +wheel + +## databricks-connect can be used to run parts of this project locally. +## See https://docs.databricks.com/dev-tools/databricks-connect.html. +## +## databricks-connect is automatically installed if you're using Databricks +## extension for Visual Studio Code +## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). +## +## To manually install databricks-connect, either follow the instructions +## at https://docs.databricks.com/dev-tools/databricks-connect.html +## to install the package system-wide. Or uncomment the line below to install a +## version of db-connect that corresponds to the Databricks Runtime version used +## for this project. +# +# databricks-connect>=15.4,<15.5 diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml new file mode 100644 index 000000000..e6148a4ad --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml @@ -0,0 +1,49 @@ +# The main job for my_default_python. +resources: + jobs: + my_default_python_job: + name: my_default_python_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + tasks: + - task_key: notebook_task + job_cluster_key: job_cluster + notebook_task: + notebook_path: ../src/notebook.ipynb + + - task_key: refresh_pipeline + depends_on: + - task_key: notebook_task + pipeline_task: + pipeline_id: ${resources.pipelines.my_default_python_pipeline.id} + + - task_key: main_task + depends_on: + - task_key: refresh_pipeline + job_cluster_key: job_cluster + python_wheel_task: + package_name: my_default_python + entry_point: main + libraries: + # By default we just include the .whl file generated for the my_default_python package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + - whl: ../dist/*.whl + + job_clusters: + - job_cluster_key: job_cluster + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + autoscale: + min_workers: 1 + max_workers: 4 diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml new file mode 100644 index 000000000..f9e083f4f --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml @@ -0,0 +1,13 @@ +# The main pipeline for my_default_python +resources: + pipelines: + my_default_python_pipeline: + name: my_default_python_pipeline + catalog: main + target: my_default_python_${bundle.target} + libraries: + - notebook: + path: ../src/dlt_pipeline.ipynb + + configuration: + bundle.sourcePath: ${workspace.file_path}/src diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb new file mode 100644 index 000000000..3b2fef4b4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append(\"../src\")\n", + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "ipynb-notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/setup.py b/acceptance/bundle/templates/default-python/output/my_default_python/setup.py new file mode 100644 index 000000000..84b24ecb8 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/setup.py @@ -0,0 +1,41 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_default_python project. +""" + +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import datetime +import my_default_python + +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + +setup( + name="my_default_python", + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + version=my_default_python.__version__ + "+" + local_version, + url="https://databricks.com", + author="$USERNAME", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb new file mode 100644 index 000000000..36e993af7 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Import DLT and src/my_default_python\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from my_default_python import main" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb new file mode 100644 index 000000000..0d560443b --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py new file mode 100644 index 000000000..dc449154a --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py @@ -0,0 +1,6 @@ +from my_default_python.main import get_taxis, get_spark + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/default-python/script b/acceptance/bundle/templates/default-python/script new file mode 100644 index 000000000..b11a7ea21 --- /dev/null +++ b/acceptance/bundle/templates/default-python/script @@ -0,0 +1,5 @@ +trace $CLI bundle init default-python --config-file ./input.json --output-dir output + +cd output/my_default_python +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/default-sql/.ruff.toml b/acceptance/bundle/templates/default-sql/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/acceptance/bundle/templates/default-sql/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/acceptance/bundle/templates/default-sql/input.json b/acceptance/bundle/templates/default-sql/input.json new file mode 100644 index 000000000..c728d25de --- /dev/null +++ b/acceptance/bundle/templates/default-sql/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_default_sql", + "http_path": "/sql/2.0/warehouses/f00dcafe", + "default_catalog": "main", + "personal_schemas": "yes, automatically use a schema based on the current user name during development" +} diff --git a/acceptance/bundle/templates/default-sql/output.txt b/acceptance/bundle/templates/default-sql/output.txt new file mode 100644 index 000000000..fe0139093 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output.txt @@ -0,0 +1,32 @@ + +>>> $CLI bundle init default-sql --config-file ./input.json --output-dir output + +Welcome to the default SQL template for Databricks Asset Bundles! + +A workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html. +workspace_host: $DATABRICKS_URL + +✨ Your new project has been created in the 'my_default_sql' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> $CLI bundle validate -t dev +Name: my_default_sql +Target: dev +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_sql/dev + +Validation OK! + +>>> $CLI bundle validate -t prod +Name: my_default_sql +Target: prod +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_sql/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore b/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore new file mode 100644 index 000000000..de811f118 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore @@ -0,0 +1,2 @@ + +.databricks diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json new file mode 100644 index 000000000..8e1023465 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "redhat.vscode-yaml", + "databricks.sqltools-databricks-driver", + ] +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json new file mode 100644 index 000000000..c641abe39 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json @@ -0,0 +1,27 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "main", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/2.0/warehouses/f00dcafe" + } + ], + "sqltools.autoConnectTo": "", +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md new file mode 100644 index 000000000..67ded153f --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md @@ -0,0 +1,41 @@ +# my_default_sql + +The 'my_default_sql' project was generated by using the default-sql template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/install.html + +2. Authenticate to your Databricks workspace (if you have not done so already): + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_sql_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + +5. To run a job, use the "run" command: + ``` + $ databricks bundle run + ``` + +6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. + +7. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml new file mode 100644 index 000000000..ab857287e --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks asset bundle definition for my_default_sql. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_sql + uuid: + +include: + - resources/*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + warehouse_id: + description: The warehouse to use + catalog: + description: The catalog to use + schema: + description: The schema to use + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + variables: + warehouse_id: f00dcafe + catalog: main + schema: ${workspace.current_user.short_name} + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + variables: + warehouse_id: f00dcafe + catalog: main + schema: default + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml new file mode 100644 index 000000000..86de0f9db --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml @@ -0,0 +1,38 @@ +# A job running SQL queries on a SQL warehouse +resources: + jobs: + my_default_sql_sql_job: + name: my_default_sql_sql_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} + - name: bundle_target + default: ${bundle.target} + + tasks: + - task_key: orders_raw + sql_task: + warehouse_id: ${var.warehouse_id} + file: + path: ../src/orders_raw.sql + + - task_key: orders_daily + depends_on: + - task_key: orders_raw + sql_task: + warehouse_id: ${var.warehouse_id} + file: + path: ../src/orders_daily.sql diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md new file mode 100644 index 000000000..5350d09cf --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks and SQL files. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb new file mode 100644 index 000000000..c3fd072e5 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb @@ -0,0 +1,35 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "SELECT * FROM json.`/databricks-datasets/nyctaxi/sample/json/`" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "exploration", + "widgets": {} + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql new file mode 100644 index 000000000..ea7b80b54 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql @@ -0,0 +1,21 @@ +-- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) + +USE CATALOG {{catalog}}; +USE IDENTIFIER({{schema}}); + +CREATE OR REPLACE MATERIALIZED VIEW + orders_daily +AS SELECT + order_date, count(*) AS number_of_orders +FROM + orders_raw + +WHERE if( + {{bundle_target}} = "prod", + true, + + -- During development, only process a smaller range of data + order_date >= '2019-08-01' AND order_date < '2019-09-01' +) + +GROUP BY order_date diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql new file mode 100644 index 000000000..79b1354cf --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql @@ -0,0 +1,19 @@ +-- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ +-- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html + +USE CATALOG {{catalog}}; +USE IDENTIFIER({{schema}}); + +CREATE OR REFRESH STREAMING TABLE + orders_raw +AS SELECT + customer_name, + DATE(TIMESTAMP(FROM_UNIXTIME(TRY_CAST(order_datetime AS BIGINT)))) AS order_date, + order_number +FROM STREAM READ_FILES( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/acceptance/bundle/templates/default-sql/script b/acceptance/bundle/templates/default-sql/script new file mode 100644 index 000000000..66e7a14a2 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/script @@ -0,0 +1,5 @@ +trace $CLI bundle init default-sql --config-file ./input.json --output-dir output + +cd output/my_default_sql +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/input.json b/acceptance/bundle/templates/experimental-jobs-as-code/input.json new file mode 100644 index 000000000..748076c75 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/input.json @@ -0,0 +1,5 @@ +{ + "project_name": "my_jobs_as_code", + "include_notebook": "yes", + "include_python": "yes" +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt new file mode 100644 index 000000000..1aa8a94d5 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt @@ -0,0 +1,85 @@ + +>>> $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output + +Welcome to (EXPERIMENTAL) "Jobs as code" template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): $DATABRICKS_URL + +✨ Your new project has been created in the 'my_jobs_as_code' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> $CLI bundle validate -t dev --output json +{ + "jobs": { + "my_jobs_as_code_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "email_notifications": { + "on_failure": [ + "$USERNAME" + ] + }, + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "autoscale": { + "max_workers": 4, + "min_workers": 1 + }, + "node_type_id": "i3.xlarge", + "spark_version": "15.4.x-scala2.12" + } + } + ], + "max_concurrent_runs": 4, + "name": "[dev $USERNAME] my_jobs_as_code_job", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": { + "dev": "$USERNAME" + }, + "tasks": [ + { + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/files/src/notebook" + }, + "task_key": "notebook_task" + }, + { + "depends_on": [ + { + "task_key": "notebook_task" + } + ], + "job_cluster_key": "job_cluster", + "libraries": [ + { + "whl": "dist/*.whl" + } + ], + "python_wheel_task": { + "entry_point": "main", + "package_name": "my_jobs_as_code" + }, + "task_key": "main_task" + } + ], + "trigger": { + "pause_status": "PAUSED", + "periodic": { + "interval": 1, + "unit": "DAYS" + } + } + } + } +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md new file mode 100644 index 000000000..8c429c6e5 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md @@ -0,0 +1,58 @@ +# my_jobs_as_code + +The 'my_jobs_as_code' project was generated by using the "Jobs as code" template. + +## Prerequisites + +1. Install Databricks CLI 0.238 or later. + See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). + +2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). + We use uv to create a virtual environment and install the required dependencies. + +3. Authenticate to your Databricks workspace if you have not done so already: + ``` + $ databricks configure + ``` + +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + +5. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. + +## Deploy and run jobs + +1. Create a new virtual environment and install the required dependencies: + ``` + $ uv sync + ``` + +2. To deploy the bundle to the development target: + ``` + $ databricks bundle deploy --target dev + ``` + + *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_jobs_as_code_job` to your workspace. + You can find that job by opening your workspace and clicking on **Workflows**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_jobs_as_code_job.py). The schedule + is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). + +4. To run a job: + ``` + $ databricks bundle run + ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml new file mode 100644 index 000000000..fd87aa381 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks asset bundle definition for my_jobs_as_code. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_jobs_as_code + uuid: + +experimental: + python: + # Activate virtual environment before loading resources defined in Python. + # If disabled, defaults to using the Python interpreter available in the current shell. + venv_path: .venv + # Functions called to load resources defined in Python. See resources/__init__.py + resources: + - "resources:load_resources" + +artifacts: + default: + type: whl + path: . + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml new file mode 100644 index 000000000..28240e3ec --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml @@ -0,0 +1,49 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "my_jobs_as_code" +requires-python = ">=3.10" +description = "wheel file based on my_jobs_as_code" + +# Dependencies in case the output wheel file is used as a library dependency. +# For defining dependencies, when this package is used in Databricks, see: +# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +# +# Example: +# dependencies = [ +# "requests==x.y.z", +# ] +dependencies = [ +] + +# see setup.py +dynamic = ["version"] + +[project.entry-points.packages] +main = "my_jobs_as_code.main:main" + +[tool.setuptools] +py-modules = ["resources", "my_jobs_as_code"] + +[tool.uv] +## Dependencies for local development +dev-dependencies = [ + "databricks-bundles==0.7.0", + + ## Add code completion support for DLT + # "databricks-dlt", + + ## databricks-connect can be used to run parts of this project locally. + ## See https://docs.databricks.com/dev-tools/databricks-connect.html. + ## + ## Uncomment line below to install a version of db-connect that corresponds to + ## the Databricks Runtime version used for this project. + # "databricks-connect>=15.4,<15.5", +] + +override-dependencies = [ + # pyspark package conflicts with 'databricks-connect' + "pyspark; sys_platform == 'never'", +] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py new file mode 100644 index 000000000..fbcb9dc5f --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import ( + Bundle, + Resources, + load_resources_from_current_package_module, +) + + +def load_resources(bundle: Bundle) -> Resources: + """ + 'load_resources' function is referenced in databricks.yml and is responsible for loading + bundle resources defined in Python code. This function is called by Databricks CLI during + bundle deployment. After deployment, this function is not used. + """ + + # the default implementation loads all Python files in 'resources' directory + return load_resources_from_current_package_module() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py new file mode 100644 index 000000000..4854d656f --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py @@ -0,0 +1,67 @@ +from databricks.bundles.jobs import Job + +""" +The main job for my_jobs_as_code. +""" + + +my_jobs_as_code_job = Job.from_dict( + { + "name": "my_jobs_as_code_job", + "trigger": { + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + "periodic": { + "interval": 1, + "unit": "DAYS", + }, + }, + "email_notifications": { + "on_failure": [ + "$USERNAME", + ], + }, + "tasks": [ + { + "task_key": "notebook_task", + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "src/notebook.ipynb", + }, + }, + { + "task_key": "main_task", + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + "job_cluster_key": "job_cluster", + "python_wheel_task": { + "package_name": "my_jobs_as_code", + "entry_point": "main", + }, + "libraries": [ + # By default we just include the .whl file generated for the my_jobs_as_code package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + { + "whl": "dist/*.whl", + }, + ], + }, + ], + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "spark_version": "15.4.x-scala2.12", + "node_type_id": "i3.xlarge", + "autoscale": { + "min_workers": 1, + "max_workers": 4, + }, + }, + }, + ], + } +) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py new file mode 100644 index 000000000..ba284ba82 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py @@ -0,0 +1,18 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_jobs_as_code project. +""" + +import os + +from setuptools import setup + +local_version = os.getenv("LOCAL_VERSION") +version = "0.0.1" + +setup( + version=f"{version}+{local_version}" if local_version else version, +) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb new file mode 100644 index 000000000..9bc3f1560 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_jobs_as_code import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py new file mode 100644 index 000000000..13e100ee2 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py @@ -0,0 +1,8 @@ +from my_jobs_as_code.main import get_taxis, get_spark + +# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/script b/acceptance/bundle/templates/experimental-jobs-as-code/script new file mode 100644 index 000000000..af28b9d0a --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/script @@ -0,0 +1,14 @@ +trace $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output + +cd output/my_jobs_as_code + +# silence uv output because it's non-deterministic +uv sync 2> /dev/null + +# remove version constraint because it always creates a warning on dev builds +cat databricks.yml | grep -v databricks_cli_version > databricks.yml.new +mv databricks.yml.new databricks.yml + +trace $CLI bundle validate -t dev --output json | jq ".resources" + +rm -fr .venv resources/__pycache__ uv.lock my_jobs_as_code.egg-info diff --git a/bundle/tests/undefined_resources/databricks.yml b/acceptance/bundle/undefined_resources/databricks.yml similarity index 100% rename from bundle/tests/undefined_resources/databricks.yml rename to acceptance/bundle/undefined_resources/databricks.yml diff --git a/acceptance/bundle/undefined_resources/output.txt b/acceptance/bundle/undefined_resources/output.txt new file mode 100644 index 000000000..29b51bc1a --- /dev/null +++ b/acceptance/bundle/undefined_resources/output.txt @@ -0,0 +1,19 @@ +Error: experiment undefined-experiment is not defined + at resources.experiments.undefined-experiment + in databricks.yml:11:26 + +Error: job undefined-job is not defined + at resources.jobs.undefined-job + in databricks.yml:6:19 + +Error: pipeline undefined-pipeline is not defined + at resources.pipelines.undefined-pipeline + in databricks.yml:14:24 + +Found 3 errors + +Name: undefined-job +Target: default + + +Exit code: 1 diff --git a/acceptance/bundle/undefined_resources/script b/acceptance/bundle/undefined_resources/script new file mode 100644 index 000000000..10a3c485a --- /dev/null +++ b/acceptance/bundle/undefined_resources/script @@ -0,0 +1,2 @@ +# We need sort_blocks.py because the order of diagnostics is currently randomized +$CLI bundle validate 2>&1 | sort_blocks.py diff --git a/acceptance/bundle/variables/arg-repeat/databricks.yml b/acceptance/bundle/variables/arg-repeat/databricks.yml new file mode 100644 index 000000000..377c6cfab --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: arg-repeat + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/arg-repeat/output.txt b/acceptance/bundle/variables/arg-repeat/output.txt new file mode 100644 index 000000000..2f9de1a3c --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/output.txt @@ -0,0 +1,18 @@ + +>>> errcode $CLI bundle validate --var a=one -o json +{ + "a": { + "default": "hello", + "value": "one" + } +} + +>>> errcode $CLI bundle validate --var a=one --var a=two +Error: failed to assign two to a: variable has already been assigned value: one + +Name: arg-repeat +Target: default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/variables/arg-repeat/script b/acceptance/bundle/variables/arg-repeat/script new file mode 100644 index 000000000..3e03dbcb1 --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle validate --var a=one -o json | jq .variables +trace errcode $CLI bundle validate --var a=one --var a=two diff --git a/acceptance/bundle/variables/complex-cross-ref/databricks.yml b/acceptance/bundle/variables/complex-cross-ref/databricks.yml new file mode 100644 index 000000000..4459f44df --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/databricks.yml @@ -0,0 +1,12 @@ +bundle: + name: complex-cross-ref + +variables: + a: + default: + a_1: 500 + a_2: ${var.b.b_2} + b: + default: + b_1: ${var.a.a_1} + b_2: 2.5 diff --git a/acceptance/bundle/variables/complex-cross-ref/output.txt b/acceptance/bundle/variables/complex-cross-ref/output.txt new file mode 100644 index 000000000..f1b624d29 --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/output.txt @@ -0,0 +1,22 @@ +{ + "a": { + "default": { + "a_1": 500, + "a_2": 2.5 + }, + "value": { + "a_1": 500, + "a_2": 2.5 + } + }, + "b": { + "default": { + "b_1": 500, + "b_2": 2.5 + }, + "value": { + "b_1": 500, + "b_2": 2.5 + } + } +} diff --git a/acceptance/bundle/variables/complex-cross-ref/script b/acceptance/bundle/variables/complex-cross-ref/script new file mode 100644 index 000000000..0e53f237e --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .variables diff --git a/acceptance/bundle/variables/complex-cycle-self/databricks.yml b/acceptance/bundle/variables/complex-cycle-self/databricks.yml new file mode 100644 index 000000000..bb461795c --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/databricks.yml @@ -0,0 +1,7 @@ +bundle: + name: cycle + +variables: + a: + default: + hello: ${var.a} diff --git a/acceptance/bundle/variables/complex-cycle-self/output.txt b/acceptance/bundle/variables/complex-cycle-self/output.txt new file mode 100644 index 000000000..fa80154ca --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/output.txt @@ -0,0 +1,9 @@ +Warning: Detected unresolved variables after 11 resolution rounds + +Name: cycle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/cycle/default + +Found 1 warning diff --git a/acceptance/bundle/variables/complex-cycle-self/script b/acceptance/bundle/variables/complex-cycle-self/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/variables/complex-cycle/databricks.yml b/acceptance/bundle/variables/complex-cycle/databricks.yml new file mode 100644 index 000000000..9784a4e25 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: cycle + +variables: + a: + default: + hello: ${var.b} + b: + default: + hello: ${var.a} diff --git a/acceptance/bundle/variables/complex-cycle/output.txt b/acceptance/bundle/variables/complex-cycle/output.txt new file mode 100644 index 000000000..fa80154ca --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/output.txt @@ -0,0 +1,9 @@ +Warning: Detected unresolved variables after 11 resolution rounds + +Name: cycle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/cycle/default + +Found 1 warning diff --git a/acceptance/bundle/variables/complex-cycle/script b/acceptance/bundle/variables/complex-cycle/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/variables/complex-simple/databricks.yml b/acceptance/bundle/variables/complex-simple/databricks.yml new file mode 100644 index 000000000..135ff86cf --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/databricks.yml @@ -0,0 +1,27 @@ +# This example works and properly merges resources.jobs.job1.job_clusters.new_cluster and ${var.cluster}. +# retaining num_workers, spark_version and overriding node_type_id. +bundle: + name: TestResolveComplexVariable + +variables: + cluster: + type: "complex" + value: + node_type_id: "Standard_DS3_v2" + num_workers: 2 + +resources: + jobs: + job1: + job_clusters: + - new_cluster: + node_type_id: "random" + spark_version: 13.3.x-scala2.12 + +targets: + dev: + resources: + jobs: + job1: + job_clusters: + - new_cluster: ${var.cluster} diff --git a/acceptance/bundle/variables/complex-simple/output.txt b/acceptance/bundle/variables/complex-simple/output.txt new file mode 100644 index 000000000..16b0ec80f --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/output.txt @@ -0,0 +1,10 @@ +[ + { + "job_cluster_key": "", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "spark_version": "13.3.x-scala2.12" + } + } +] diff --git a/acceptance/bundle/variables/complex-simple/script b/acceptance/bundle/variables/complex-simple/script new file mode 100644 index 000000000..1c31d0b40 --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.job_clusters diff --git a/acceptance/bundle/variables/complex-transitive-deep/databricks.yml b/acceptance/bundle/variables/complex-transitive-deep/databricks.yml new file mode 100644 index 000000000..1357c291a --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: complex-transitive + +variables: + catalog: + default: hive_metastore + spark_conf_1: + default: + "spark.databricks.sql.initial.catalog.name": ${var.catalog} + spark_conf: + default: ${var.spark_conf_1} + etl_cluster_config: + type: complex + default: + spark_version: 14.3.x-scala2.12 + runtime_engine: PHOTON + spark_conf: ${var.spark_conf} + +resources: + clusters: + my_cluster: ${var.etl_cluster_config} diff --git a/acceptance/bundle/variables/complex-transitive-deep/output.txt b/acceptance/bundle/variables/complex-transitive-deep/output.txt new file mode 100644 index 000000000..29c41cda5 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/output.txt @@ -0,0 +1,3 @@ +{ + "spark.databricks.sql.initial.catalog.name": "hive_metastore" +} diff --git a/acceptance/bundle/variables/complex-transitive-deep/script b/acceptance/bundle/variables/complex-transitive-deep/script new file mode 100644 index 000000000..52bb08ed4 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/script @@ -0,0 +1,2 @@ +# Currently, this incorrectly outputs variable reference instead of resolved value +$CLI bundle validate -o json | jq '.resources.clusters.my_cluster.spark_conf' diff --git a/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml b/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml new file mode 100644 index 000000000..3f9bea464 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml @@ -0,0 +1,22 @@ +bundle: + name: complex-transitive-deeper + +variables: + catalog_1: + default: + name: hive_metastore + catalog: + default: ${var.catalog_1} + spark_conf: + default: + "spark.databricks.sql.initial.catalog.name": ${var.catalog.name} + etl_cluster_config: + type: complex + default: + spark_version: 14.3.x-scala2.12 + runtime_engine: PHOTON + spark_conf: ${var.spark_conf} + +resources: + clusters: + my_cluster: ${var.etl_cluster_config} diff --git a/acceptance/bundle/variables/complex-transitive-deeper/output.txt b/acceptance/bundle/variables/complex-transitive-deeper/output.txt new file mode 100644 index 000000000..3bedbfb9a --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/output.txt @@ -0,0 +1,7 @@ +Error: expected a map to index "variables.catalog.value.name", found string + +{ + "my_cluster": "${var.etl_cluster_config}" +} + +Exit code: 1 diff --git a/acceptance/bundle/variables/complex-transitive-deeper/script b/acceptance/bundle/variables/complex-transitive-deeper/script new file mode 100644 index 000000000..d4fb404b1 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/script @@ -0,0 +1,2 @@ +# Currently, this errors instead of interpolating variables +$CLI bundle validate -o json | jq '.resources.clusters' diff --git a/acceptance/bundle/variables/complex-transitive/databricks.yml b/acceptance/bundle/variables/complex-transitive/databricks.yml new file mode 100644 index 000000000..9ef4e6386 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive/databricks.yml @@ -0,0 +1,19 @@ +bundle: + name: complex-transitive + +variables: + catalog: + default: hive_metastore + spark_conf: + default: + "spark.databricks.sql.initial.catalog.name": ${var.catalog} + etl_cluster_config: + type: complex + default: + spark_version: 14.3.x-scala2.12 + runtime_engine: PHOTON + spark_conf: ${var.spark_conf} + +resources: + clusters: + my_cluster: ${var.etl_cluster_config} diff --git a/acceptance/bundle/variables/complex-transitive/output.txt b/acceptance/bundle/variables/complex-transitive/output.txt new file mode 100644 index 000000000..29c41cda5 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive/output.txt @@ -0,0 +1,3 @@ +{ + "spark.databricks.sql.initial.catalog.name": "hive_metastore" +} diff --git a/acceptance/bundle/variables/complex-transitive/script b/acceptance/bundle/variables/complex-transitive/script new file mode 100644 index 000000000..52bb08ed4 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive/script @@ -0,0 +1,2 @@ +# Currently, this incorrectly outputs variable reference instead of resolved value +$CLI bundle validate -o json | jq '.resources.clusters.my_cluster.spark_conf' diff --git a/acceptance/bundle/variables/complex-with-var-reference/databricks.yml b/acceptance/bundle/variables/complex-with-var-reference/databricks.yml new file mode 100644 index 000000000..104f9a470 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/databricks.yml @@ -0,0 +1,17 @@ +bundle: + name: TestResolveComplexVariableWithVarReference + +variables: + package_version: + default: "1.0.0" + cluster_libraries: + type: "complex" + default: + - pypi: + package: "cicd_template==${var.package_version}" + +resources: + jobs: + job1: + tasks: + - libraries: ${var.cluster_libraries} diff --git a/acceptance/bundle/variables/complex-with-var-reference/output.txt b/acceptance/bundle/variables/complex-with-var-reference/output.txt new file mode 100644 index 000000000..a5b792ac4 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/output.txt @@ -0,0 +1,12 @@ +[ + { + "libraries": [ + { + "pypi": { + "package": "cicd_template==1.0.0" + } + } + ], + "task_key": "" + } +] diff --git a/acceptance/bundle/variables/complex-with-var-reference/script b/acceptance/bundle/variables/complex-with-var-reference/script new file mode 100644 index 000000000..0f7353ad1 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.tasks diff --git a/acceptance/bundle/variables/complex-within-complex/databricks.yml b/acceptance/bundle/variables/complex-within-complex/databricks.yml new file mode 100644 index 000000000..f1d77289e --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/databricks.yml @@ -0,0 +1,34 @@ +# Does not work currently, explicitly disabled, even though it works if you remove 'type: "complex"' lines +# Also fails to merge clusters. +bundle: + name: TestResolveComplexVariableReferencesWithComplexVariablesError + +variables: + cluster: + type: "complex" + value: + node_type_id: "Standard_DS3_v2" + num_workers: 2 + spark_conf: "${var.spark_conf}" + spark_conf: + type: "complex" + value: + spark.executor.memory: "4g" + spark.executor.cores: "2" + +resources: + jobs: + job1: + job_clusters: + - job_cluster_key: my_cluster + new_cluster: + node_type_id: "random" + +targets: + dev: + resources: + jobs: + job1: + job_clusters: + - job_cluster_key: my_cluster + new_cluster: ${var.cluster} diff --git a/acceptance/bundle/variables/complex-within-complex/output.txt b/acceptance/bundle/variables/complex-within-complex/output.txt new file mode 100644 index 000000000..72e6ef69a --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/output.txt @@ -0,0 +1,17 @@ +Warning: unknown field: node_type_id + at resources.jobs.job1.job_clusters[0] + in databricks.yml:25:11 + +[ + { + "job_cluster_key": "my_cluster", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "spark_conf": { + "spark.executor.cores": "2", + "spark.executor.memory": "4g" + } + } + } +] diff --git a/acceptance/bundle/variables/complex-within-complex/script b/acceptance/bundle/variables/complex-within-complex/script new file mode 100644 index 000000000..1c31d0b40 --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.job_clusters diff --git a/bundle/tests/variables/complex/databricks.yml b/acceptance/bundle/variables/complex/databricks.yml similarity index 82% rename from bundle/tests/variables/complex/databricks.yml rename to acceptance/bundle/variables/complex/databricks.yml index 3b32a7c8e..5dcc30b08 100644 --- a/bundle/tests/variables/complex/databricks.yml +++ b/acceptance/bundle/variables/complex/databricks.yml @@ -11,6 +11,7 @@ resources: - task_key: test job_cluster_key: key libraries: ${variables.libraries.value} + # specific fields of complex variable are referenced: task_key: "task with spark version ${var.cluster.spark_version} and jar ${var.libraries[0].jar}" variables: @@ -35,30 +36,21 @@ variables: - jar: "/path/to/jar" - egg: "/path/to/egg" - whl: "/path/to/whl" - complexvar: - type: complex - description: "A complex variable" - default: - key1: "value1" - key2: "value2" - key3: "value3" - targets: default: + default: true dev: variables: node_type: "Standard_DS3_v3" cluster: + # complex variables are not merged, so missing variables (policy_id) are not inherited spark_version: "14.2.x-scala2.11" node_type_id: ${var.node_type} num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false - complexvar: - type: complex - default: - key1: "1" - key2: "2" - key3: "3" + libraries: + - jar: "/newpath/to/jar" + - whl: "/newpath/to/whl" diff --git a/acceptance/bundle/variables/complex/out.default.json b/acceptance/bundle/variables/complex/out.default.json new file mode 100644 index 000000000..a1ccd52bc --- /dev/null +++ b/acceptance/bundle/variables/complex/out.default.json @@ -0,0 +1,110 @@ +{ + "resources": { + "jobs": { + "my_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/complex-variables/default/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "key", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "policy_id": "some-policy-id", + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": "false", + "spark.random": "true", + "spark.speculation": "true" + }, + "spark_version": "13.2.x-scala2.11" + } + } + ], + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "job_cluster_key": "key", + "libraries": [ + { + "jar": "/path/to/jar" + }, + { + "egg": "/path/to/egg" + }, + { + "whl": "/path/to/whl" + } + ], + "task_key": "task with spark version 13.2.x-scala2.11 and jar /path/to/jar" + } + ] + } + } + }, + "variables": { + "cluster": { + "default": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "policy_id": "some-policy-id", + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.random": true, + "spark.speculation": true + }, + "spark_version": "13.2.x-scala2.11" + }, + "description": "A cluster definition", + "type": "complex", + "value": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "policy_id": "some-policy-id", + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.random": true, + "spark.speculation": true + }, + "spark_version": "13.2.x-scala2.11" + } + }, + "libraries": { + "default": [ + { + "jar": "/path/to/jar" + }, + { + "egg": "/path/to/egg" + }, + { + "whl": "/path/to/whl" + } + ], + "description": "A libraries definition", + "type": "complex", + "value": [ + { + "jar": "/path/to/jar" + }, + { + "egg": "/path/to/egg" + }, + { + "whl": "/path/to/whl" + } + ] + }, + "node_type": { + "default": "Standard_DS3_v2", + "value": "Standard_DS3_v2" + } + } +} diff --git a/acceptance/bundle/variables/complex/out.dev.json b/acceptance/bundle/variables/complex/out.dev.json new file mode 100644 index 000000000..bb939091b --- /dev/null +++ b/acceptance/bundle/variables/complex/out.dev.json @@ -0,0 +1,95 @@ +{ + "resources": { + "jobs": { + "my_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/complex-variables/dev/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "key", + "new_cluster": { + "node_type_id": "Standard_DS3_v3", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": "false", + "spark.speculation": "false" + }, + "spark_version": "14.2.x-scala2.11" + } + } + ], + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "job_cluster_key": "key", + "libraries": [ + { + "jar": "/newpath/to/jar" + }, + { + "whl": "/newpath/to/whl" + } + ], + "task_key": "task with spark version 14.2.x-scala2.11 and jar /newpath/to/jar" + } + ] + } + } + }, + "variables": { + "cluster": { + "default": { + "node_type_id": "Standard_DS3_v3", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.speculation": false + }, + "spark_version": "14.2.x-scala2.11" + }, + "description": "A cluster definition", + "type": "complex", + "value": { + "node_type_id": "Standard_DS3_v3", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.speculation": false + }, + "spark_version": "14.2.x-scala2.11" + } + }, + "libraries": { + "default": [ + { + "jar": "/newpath/to/jar" + }, + { + "whl": "/newpath/to/whl" + } + ], + "description": "A libraries definition", + "type": "complex", + "value": [ + { + "jar": "/newpath/to/jar" + }, + { + "whl": "/newpath/to/whl" + } + ] + }, + "node_type": { + "default": "Standard_DS3_v3", + "value": "Standard_DS3_v3" + } + } +} diff --git a/acceptance/bundle/variables/complex/output.txt b/acceptance/bundle/variables/complex/output.txt new file mode 100644 index 000000000..ce295421f --- /dev/null +++ b/acceptance/bundle/variables/complex/output.txt @@ -0,0 +1,14 @@ + +>>> $CLI bundle validate -o json + +>>> jq .resources.jobs.my_job.tasks[0].task_key out.default.json +"task with spark version 13.2.x-scala2.11 and jar /path/to/jar" + +>>> $CLI bundle validate -o json -t dev + +>>> jq .resources.jobs.my_job.tasks[0].task_key out.dev.json +"task with spark version 14.2.x-scala2.11 and jar /newpath/to/jar" +policy_id and spark_conf.spark_random fields do not exist in dev target: + +>>> jq .resources.jobs.my_job.job_clusters[0].new_cluster.policy_id out.dev.json +null diff --git a/acceptance/bundle/variables/complex/script b/acceptance/bundle/variables/complex/script new file mode 100644 index 000000000..f8b61f18d --- /dev/null +++ b/acceptance/bundle/variables/complex/script @@ -0,0 +1,8 @@ +trace $CLI bundle validate -o json | jq '{resources,variables}' > out.default.json +trace jq .resources.jobs.my_job.tasks[0].task_key out.default.json | grep "task with spark version 13.2.x-scala2.11 and jar /path/to/jar" + +trace $CLI bundle validate -o json -t dev | jq '{resources,variables}' > out.dev.json +trace jq .resources.jobs.my_job.tasks[0].task_key out.dev.json | grep "task with spark version 14.2.x-scala2.11 and jar /newpath/to/jar" + +echo policy_id and spark_conf.spark_random fields do not exist in dev target: +trace jq .resources.jobs.my_job.job_clusters[0].new_cluster.policy_id out.dev.json | grep null diff --git a/bundle/tests/variables/complex_multiple_files/databricks.yml b/acceptance/bundle/variables/complex_multiple_files/databricks.yml similarity index 100% rename from bundle/tests/variables/complex_multiple_files/databricks.yml rename to acceptance/bundle/variables/complex_multiple_files/databricks.yml diff --git a/acceptance/bundle/variables/complex_multiple_files/output.txt b/acceptance/bundle/variables/complex_multiple_files/output.txt new file mode 100644 index 000000000..ec2cad1ce --- /dev/null +++ b/acceptance/bundle/variables/complex_multiple_files/output.txt @@ -0,0 +1,159 @@ +{ + "resources": { + "jobs": { + "my_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/complex-variables-multiple-files/dev/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "key1", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": "false", + "spark.speculation": "false" + }, + "spark_version": "14.2.x-scala2.11" + } + }, + { + "job_cluster_key": "key2", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": "false", + "spark.speculation": "false" + }, + "spark_version": "14.2.x-scala2.11" + } + }, + { + "job_cluster_key": "key3", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": "false", + "spark.speculation": "false" + }, + "spark_version": "14.2.x-scala2.11" + } + }, + { + "job_cluster_key": "key4", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": "false", + "spark.speculation": "false" + }, + "spark_version": "14.2.x-scala2.11" + } + } + ], + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {} + } + } + }, + "variables": { + "cluster1": { + "default": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.speculation": false + }, + "spark_version": "14.2.x-scala2.11" + }, + "description": "A cluster definition", + "type": "complex", + "value": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.speculation": false + }, + "spark_version": "14.2.x-scala2.11" + } + }, + "cluster2": { + "default": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.speculation": false + }, + "spark_version": "14.2.x-scala2.11" + }, + "description": "A cluster definition", + "type": "complex", + "value": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.speculation": false + }, + "spark_version": "14.2.x-scala2.11" + } + }, + "cluster3": { + "default": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.speculation": false + }, + "spark_version": "14.2.x-scala2.11" + }, + "description": "A cluster definition", + "type": "complex", + "value": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.speculation": false + }, + "spark_version": "14.2.x-scala2.11" + } + }, + "cluster4": { + "default": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.speculation": false + }, + "spark_version": "14.2.x-scala2.11" + }, + "description": "A cluster definition", + "type": "complex", + "value": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 4, + "spark_conf": { + "spark.databricks.delta.retentionDurationCheck.enabled": false, + "spark.speculation": false + }, + "spark_version": "14.2.x-scala2.11" + } + } + } +} diff --git a/acceptance/bundle/variables/complex_multiple_files/script b/acceptance/bundle/variables/complex_multiple_files/script new file mode 100644 index 000000000..24f1d58d5 --- /dev/null +++ b/acceptance/bundle/variables/complex_multiple_files/script @@ -0,0 +1 @@ +$CLI bundle validate -t dev -o json | jq '{resources, variables}' diff --git a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml b/acceptance/bundle/variables/complex_multiple_files/variables/clusters.yml similarity index 100% rename from bundle/tests/variables/complex_multiple_files/variables/clusters.yml rename to acceptance/bundle/variables/complex_multiple_files/variables/clusters.yml diff --git a/acceptance/bundle/variables/cycle/databricks.yml b/acceptance/bundle/variables/cycle/databricks.yml new file mode 100644 index 000000000..b35196671 --- /dev/null +++ b/acceptance/bundle/variables/cycle/databricks.yml @@ -0,0 +1,8 @@ +bundle: + name: cycle + +variables: + a: + default: ${var.b} + b: + default: ${var.a} diff --git a/acceptance/bundle/variables/cycle/output.txt b/acceptance/bundle/variables/cycle/output.txt new file mode 100644 index 000000000..ea9c95cd4 --- /dev/null +++ b/acceptance/bundle/variables/cycle/output.txt @@ -0,0 +1,14 @@ +Error: cycle detected in field resolution: variables.a.default -> var.b -> var.a -> var.b + +{ + "a": { + "default": "${var.b}", + "value": "${var.b}" + }, + "b": { + "default": "${var.a}", + "value": "${var.a}" + } +} + +Exit code: 1 diff --git a/acceptance/bundle/variables/cycle/script b/acceptance/bundle/variables/cycle/script new file mode 100644 index 000000000..0e53f237e --- /dev/null +++ b/acceptance/bundle/variables/cycle/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .variables diff --git a/bundle/tests/variables/empty/databricks.yml b/acceptance/bundle/variables/empty/databricks.yml similarity index 100% rename from bundle/tests/variables/empty/databricks.yml rename to acceptance/bundle/variables/empty/databricks.yml diff --git a/acceptance/bundle/variables/empty/output.txt b/acceptance/bundle/variables/empty/output.txt new file mode 100644 index 000000000..8933443df --- /dev/null +++ b/acceptance/bundle/variables/empty/output.txt @@ -0,0 +1,11 @@ +Error: no value assigned to required variable a. Assignment can be done using "--var", by setting the BUNDLE_VAR_a environment variable, or in .databricks/bundle//variable-overrides.json file + +Name: empty${var.a} +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/empty${var.a}/default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/variables/empty/script b/acceptance/bundle/variables/empty/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/empty/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/bundle/tests/variables/env_overrides/databricks.yml b/acceptance/bundle/variables/env_overrides/databricks.yml similarity index 100% rename from bundle/tests/variables/env_overrides/databricks.yml rename to acceptance/bundle/variables/env_overrides/databricks.yml diff --git a/acceptance/bundle/variables/env_overrides/output.txt b/acceptance/bundle/variables/env_overrides/output.txt new file mode 100644 index 000000000..1ee9ef625 --- /dev/null +++ b/acceptance/bundle/variables/env_overrides/output.txt @@ -0,0 +1,40 @@ + +>>> $CLI bundle validate -t env-with-single-variable-override -o json +"default-a dev-b" + +>>> $CLI bundle validate -t env-with-two-variable-overrides -o json +"prod-a prod-b" + +>>> BUNDLE_VAR_b=env-var-b $CLI bundle validate -t env-with-two-variable-overrides -o json +"prod-a env-var-b" + +>>> errcode $CLI bundle validate -t env-missing-a-required-variable-assignment +Error: no value assigned to required variable b. Assignment can be done using "--var", by setting the BUNDLE_VAR_b environment variable, or in .databricks/bundle//variable-overrides.json file + +Name: test bundle +Target: env-missing-a-required-variable-assignment +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/test bundle/env-missing-a-required-variable-assignment + +Found 1 error + +Exit code: 1 + +>>> errcode $CLI bundle validate -t env-using-an-undefined-variable +Error: variable c is not defined but is assigned a value + +Name: test bundle + +Found 1 error + +Exit code: 1 + +>>> $CLI bundle validate -t env-overrides-lookup -o json +{ + "a": "default-a", + "b": "prod-b", + "d": "4321", + "e": "1234", + "f": "9876" +} diff --git a/acceptance/bundle/variables/env_overrides/script b/acceptance/bundle/variables/env_overrides/script new file mode 100644 index 000000000..30919fd8a --- /dev/null +++ b/acceptance/bundle/variables/env_overrides/script @@ -0,0 +1,6 @@ +trace $CLI bundle validate -t env-with-single-variable-override -o json | jq .workspace.profile +trace $CLI bundle validate -t env-with-two-variable-overrides -o json | jq .workspace.profile +trace BUNDLE_VAR_b=env-var-b $CLI bundle validate -t env-with-two-variable-overrides -o json | jq .workspace.profile +trace errcode $CLI bundle validate -t env-missing-a-required-variable-assignment +trace errcode $CLI bundle validate -t env-using-an-undefined-variable +trace $CLI bundle validate -t env-overrides-lookup -o json | jq '.variables | map_values(.value)' diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/complex_to_string/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/complex_to_string/variable-overrides.json new file mode 100644 index 000000000..602567a68 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/complex_to_string/variable-overrides.json @@ -0,0 +1,5 @@ +{ + "cluster_key": { + "node_type_id": "Standard_DS3_v2" + } +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/default/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/default/variable-overrides.json new file mode 100644 index 000000000..3a865e120 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/default/variable-overrides.json @@ -0,0 +1,7 @@ +{ + "cluster": { + "node_type_id": "Standard_DS3_v2" + }, + "cluster_key": "mlops_stacks-cluster", + "cluster_workers": 2 +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/invalid_json/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/invalid_json/variable-overrides.json new file mode 100644 index 000000000..257cc5642 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/invalid_json/variable-overrides.json @@ -0,0 +1 @@ +foo diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/string_to_complex/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/string_to_complex/variable-overrides.json new file mode 100644 index 000000000..1ea719446 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/string_to_complex/variable-overrides.json @@ -0,0 +1,3 @@ +{ + "cluster": "mlops_stacks-cluster" +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/with_value/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/with_value/variable-overrides.json new file mode 100644 index 000000000..686d68548 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/with_value/variable-overrides.json @@ -0,0 +1,3 @@ +{ + "cluster_key": "mlops_stacks-cluster-from-file" +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/without_defaults/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/without_defaults/variable-overrides.json new file mode 100644 index 000000000..86166408e --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/without_defaults/variable-overrides.json @@ -0,0 +1,4 @@ +{ + "cluster_key": "mlops_stacks-cluster", + "cluster_workers": 2 +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/wrong_file_structure/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/wrong_file_structure/variable-overrides.json new file mode 100644 index 000000000..de140ba36 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/wrong_file_structure/variable-overrides.json @@ -0,0 +1,3 @@ +[ + "foo" +] diff --git a/acceptance/bundle/variables/file-defaults/.gitignore b/acceptance/bundle/variables/file-defaults/.gitignore new file mode 100644 index 000000000..bd1711fd1 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.gitignore @@ -0,0 +1 @@ +!.databricks diff --git a/acceptance/bundle/variables/file-defaults/databricks.yml b/acceptance/bundle/variables/file-defaults/databricks.yml new file mode 100644 index 000000000..5838843e1 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/databricks.yml @@ -0,0 +1,53 @@ +bundle: + name: TestResolveVariablesFromFile + +variables: + cluster: + type: "complex" + cluster_key: + cluster_workers: + +resources: + jobs: + job1: + job_clusters: + - job_cluster_key: ${var.cluster_key} + new_cluster: + node_type_id: "${var.cluster.node_type_id}" + num_workers: ${var.cluster_workers} + +targets: + default: + default: true + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: "default" + + without_defaults: + + complex_to_string: + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: "default" + + string_to_complex: + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: "default" + + wrong_file_structure: + + invalid_json: + + with_value: + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: cluster_key_value diff --git a/acceptance/bundle/variables/file-defaults/output.txt b/acceptance/bundle/variables/file-defaults/output.txt new file mode 100644 index 000000000..73830aae3 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/output.txt @@ -0,0 +1,82 @@ + +=== variable file +>>> $CLI bundle validate -o json +{ + "job_cluster_key": "mlops_stacks-cluster", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2 + } +} + +=== variable file and variable flag +>>> $CLI bundle validate -o json --var=cluster_key=mlops_stacks-cluster-overriden +{ + "job_cluster_key": "mlops_stacks-cluster-overriden", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2 + } +} + +=== variable file and environment variable +>>> BUNDLE_VAR_cluster_key=mlops_stacks-cluster-overriden $CLI bundle validate -o json +{ + "job_cluster_key": "mlops_stacks-cluster-overriden", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2 + } +} + +=== variable has value in config file +>>> $CLI bundle validate -o json --target with_value +{ + "job_cluster_key": "mlops_stacks-cluster-from-file", + "new_cluster": { + "node_type_id": "default", + "num_workers": 1 + } +} + +=== file has variable that is complex but default is string +>>> errcode $CLI bundle validate -o json --target complex_to_string +Error: variable cluster_key is not of type complex, but the value in the variable file is a complex type + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} + +=== file has variable that is string but default is complex +>>> errcode $CLI bundle validate -o json --target string_to_complex +Error: variable cluster is of type complex, but the value in the variable file is not a complex type + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} + +=== variable is required but it's not provided in the file +>>> errcode $CLI bundle validate -o json --target without_defaults +Error: no value assigned to required variable cluster. Assignment can be done using "--var", by setting the BUNDLE_VAR_cluster environment variable, or in .databricks/bundle//variable-overrides.json file + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} diff --git a/acceptance/bundle/variables/file-defaults/script b/acceptance/bundle/variables/file-defaults/script new file mode 100644 index 000000000..c5b208755 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/script @@ -0,0 +1,30 @@ +cluster_expr=".resources.jobs.job1.job_clusters[0]" + +# defaults from variable file, see .databricks/bundle//variable-overrides.json + +title "variable file" +trace $CLI bundle validate -o json | jq $cluster_expr + +title "variable file and variable flag" +trace $CLI bundle validate -o json --var="cluster_key=mlops_stacks-cluster-overriden" | jq $cluster_expr + +title "variable file and environment variable" +trace BUNDLE_VAR_cluster_key=mlops_stacks-cluster-overriden $CLI bundle validate -o json | jq $cluster_expr + +title "variable has value in config file" +trace $CLI bundle validate -o json --target with_value | jq $cluster_expr + +# title "file cannot be parsed" +# trace errcode $CLI bundle validate -o json --target invalid_json | jq $cluster_expr + +# title "file has wrong structure" +# trace errcode $CLI bundle validate -o json --target wrong_file_structure | jq $cluster_expr + +title "file has variable that is complex but default is string" +trace errcode $CLI bundle validate -o json --target complex_to_string | jq $cluster_expr + +title "file has variable that is string but default is complex" +trace errcode $CLI bundle validate -o json --target string_to_complex | jq $cluster_expr + +title "variable is required but it's not provided in the file" +trace errcode $CLI bundle validate -o json --target without_defaults | jq $cluster_expr diff --git a/acceptance/bundle/variables/git-branch/databricks.yml b/acceptance/bundle/variables/git-branch/databricks.yml new file mode 100644 index 000000000..7cf210722 --- /dev/null +++ b/acceptance/bundle/variables/git-branch/databricks.yml @@ -0,0 +1,19 @@ +bundle: + name: git + git: + # This is currently not supported + branch: ${var.deployment_branch} + +variables: + deployment_branch: + # By setting deployment_branch to "" we set bundle.git.branch to "" which is the same unsetting it. + # This this should make CLI read branch from git and update bundle.git.branch accordingly. It should + # Also set bundle.git.inferred to true. + default: "" + +targets: + prod: + default: true + dev: + variables: + deployment_branch: dev-branch diff --git a/acceptance/bundle/variables/git-branch/output.txt b/acceptance/bundle/variables/git-branch/output.txt new file mode 100644 index 000000000..d6d824394 --- /dev/null +++ b/acceptance/bundle/variables/git-branch/output.txt @@ -0,0 +1,98 @@ + +>>> $CLI bundle validate -o json +{ + "bundle": { + "environment": "prod", + "git": { + "actual_branch": "main", + "branch": "", + "bundle_root_path": ".", + }, + "name": "git", + "target": "prod", + "terraform": { + "exec_path": "$TMPHOME" + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "deployment_branch": { + "default": "", + "value": "" + } + }, + "workspace": { + "artifact_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/files", + "resource_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/resources", + "root_path": "/Workspace/Users/$USERNAME/.bundle/git/prod", + "state_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/state" + } +} + +>>> $CLI bundle validate +Name: git +Target: prod +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/git/prod + +Validation OK! + +>>> $CLI bundle validate -o json -t dev +{ + "bundle": { + "environment": "dev", + "git": { + "actual_branch": "main", + "branch": "dev-branch", + "bundle_root_path": ".", + }, + "name": "git", + "target": "dev", + "terraform": { + "exec_path": "$TMPHOME" + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "deployment_branch": { + "default": "dev-branch", + "value": "dev-branch" + } + }, + "workspace": { + "artifact_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/files", + "resource_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/resources", + "root_path": "/Workspace/Users/$USERNAME/.bundle/git/dev", + "state_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/state" + } +} + +>>> $CLI bundle validate -t dev +Name: git +Target: dev +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/git/dev + +Validation OK! diff --git a/acceptance/bundle/variables/git-branch/script b/acceptance/bundle/variables/git-branch/script new file mode 100644 index 000000000..aed881f1f --- /dev/null +++ b/acceptance/bundle/variables/git-branch/script @@ -0,0 +1,6 @@ +git-repo-init +trace $CLI bundle validate -o json | grep -v '"commit"' +trace $CLI bundle validate +trace $CLI bundle validate -o json -t dev | grep -v '"commit"' +trace $CLI bundle validate -t dev | grep -v '"commit"' +rm -fr .git diff --git a/acceptance/bundle/variables/host/databricks.yml b/acceptance/bundle/variables/host/databricks.yml new file mode 100644 index 000000000..b25020a1f --- /dev/null +++ b/acceptance/bundle/variables/host/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: host + +variables: + host: + default: https://nonexistent123.staging.cloud.databricks.com + +workspace: + # This is currently not supported + host: ${var.host} diff --git a/acceptance/bundle/variables/host/output.txt b/acceptance/bundle/variables/host/output.txt new file mode 100644 index 000000000..89342908c --- /dev/null +++ b/acceptance/bundle/variables/host/output.txt @@ -0,0 +1,38 @@ + +>>> errcode $CLI bundle validate -o json +Error: failed during request visitor: parse "https://${var.host}": invalid character "{" in host name + +{ + "bundle": { + "environment": "default", + "name": "host", + "target": "default" + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "host": { + "default": "https://nonexistent123.staging.cloud.databricks.com" + } + }, + "workspace": { + "host": "${var.host}" + } +} +Exit code: 1 + +>>> errcode $CLI bundle validate +Error: failed during request visitor: parse "https://${var.host}": invalid character "{" in host name + +Name: host +Target: default +Workspace: + Host: ${var.host} + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/variables/host/script b/acceptance/bundle/variables/host/script new file mode 100644 index 000000000..90e083627 --- /dev/null +++ b/acceptance/bundle/variables/host/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle validate -o json +trace errcode $CLI bundle validate diff --git a/acceptance/bundle/variables/prepend-workspace-var/databricks.yml b/acceptance/bundle/variables/prepend-workspace-var/databricks.yml new file mode 100644 index 000000000..c843752f8 --- /dev/null +++ b/acceptance/bundle/variables/prepend-workspace-var/databricks.yml @@ -0,0 +1,24 @@ +workspace: + profile: profile_name + root_path: ${var.workspace_root}/path/to/root + +variables: + workspace_root: + description: "root directory in the Databricks workspace to store the asset bundle and associated artifacts" + default: /Users/${workspace.current_user.userName} + +targets: + dev: + default: true + prod: + variables: + workspace_root: /Shared + +resources: + jobs: + my_job: + tasks: + - existing_cluster_id: 500 + python_wheel_task: + named_parameters: + conf-file: "${workspace.file_path}/path/to/config.yaml" diff --git a/acceptance/bundle/variables/prepend-workspace-var/output.txt b/acceptance/bundle/variables/prepend-workspace-var/output.txt new file mode 100644 index 000000000..575fac6d4 --- /dev/null +++ b/acceptance/bundle/variables/prepend-workspace-var/output.txt @@ -0,0 +1,67 @@ +/Workspace should be prepended on all paths, but it is not the case: +{ + "bundle": { + "environment": "dev", + "git": { + "bundle_root_path": ".", + "inferred": true + }, + "target": "dev", + "terraform": { + "exec_path": "$TMPHOME" + } + }, + "resources": { + "jobs": { + "my_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Users/$USERNAME/path/to/root/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "existing_cluster_id": "500", + "python_wheel_task": { + "named_parameters": { + "conf-file": "/Users/$USERNAME/path/to/root/files/path/to/config.yaml" + } + }, + "task_key": "" + } + ] + } + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "workspace_root": { + "default": "/Users/$USERNAME", + "description": "root directory in the Databricks workspace to store the asset bundle and associated artifacts", + "value": "/Users/$USERNAME" + } + }, + "workspace": { + "artifact_path": "/Users/$USERNAME/path/to/root/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "/Users/$USERNAME/path/to/root/files", + "profile": "profile_name", + "resource_path": "/Users/$USERNAME/path/to/root/resources", + "root_path": "/Users/$USERNAME/path/to/root", + "state_path": "/Users/$USERNAME/path/to/root/state" + } +} \ No newline at end of file diff --git a/acceptance/bundle/variables/prepend-workspace-var/script b/acceptance/bundle/variables/prepend-workspace-var/script new file mode 100644 index 000000000..de6bc8a17 --- /dev/null +++ b/acceptance/bundle/variables/prepend-workspace-var/script @@ -0,0 +1,2 @@ +echo /Workspace should be prepended on all paths, but it is not the case: #2181 +$CLI bundle validate -o json diff --git a/acceptance/bundle/variables/resolve-builtin/databricks.yml b/acceptance/bundle/variables/resolve-builtin/databricks.yml new file mode 100644 index 000000000..4bb71c8db --- /dev/null +++ b/acceptance/bundle/variables/resolve-builtin/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: TestResolveVariableReferences + +workspace: + root_path: "${bundle.name}/bar" + file_path: "${workspace.root_path}/baz" diff --git a/acceptance/bundle/variables/resolve-builtin/output.txt b/acceptance/bundle/variables/resolve-builtin/output.txt new file mode 100644 index 000000000..f060c472e --- /dev/null +++ b/acceptance/bundle/variables/resolve-builtin/output.txt @@ -0,0 +1,11 @@ +{ + "artifact_path": "TestResolveVariableReferences/bar/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "TestResolveVariableReferences/bar/baz", + "resource_path": "TestResolveVariableReferences/bar/resources", + "root_path": "TestResolveVariableReferences/bar", + "state_path": "TestResolveVariableReferences/bar/state" +} diff --git a/acceptance/bundle/variables/resolve-builtin/script b/acceptance/bundle/variables/resolve-builtin/script new file mode 100644 index 000000000..fefd9abe6 --- /dev/null +++ b/acceptance/bundle/variables/resolve-builtin/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .workspace diff --git a/acceptance/bundle/variables/resolve-empty/databricks.yml b/acceptance/bundle/variables/resolve-empty/databricks.yml new file mode 100644 index 000000000..7563ada34 --- /dev/null +++ b/acceptance/bundle/variables/resolve-empty/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: TestResolveVariableReferencesToEmptyFields + git: + branch: "" + +resources: + jobs: + job1: + tags: + git_branch: "${bundle.git.branch}" diff --git a/acceptance/bundle/variables/resolve-empty/output.txt b/acceptance/bundle/variables/resolve-empty/output.txt new file mode 100644 index 000000000..a05cbbf54 --- /dev/null +++ b/acceptance/bundle/variables/resolve-empty/output.txt @@ -0,0 +1,3 @@ +{ + "git_branch": "" +} diff --git a/acceptance/bundle/variables/resolve-empty/script b/acceptance/bundle/variables/resolve-empty/script new file mode 100644 index 000000000..614673054 --- /dev/null +++ b/acceptance/bundle/variables/resolve-empty/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.tags diff --git a/acceptance/bundle/variables/resolve-field-within-complex/databricks.yml b/acceptance/bundle/variables/resolve-field-within-complex/databricks.yml new file mode 100644 index 000000000..7250dd5df --- /dev/null +++ b/acceptance/bundle/variables/resolve-field-within-complex/databricks.yml @@ -0,0 +1,16 @@ +bundle: + name: TestResolveComplexVariableReferencesToFields + +variables: + cluster: + type: "complex" + default: + node_type_id: "Standard_DS3_v2" + num_workers: 2 + +resources: + jobs: + job1: + job_clusters: + - new_cluster: + node_type_id: "${var.cluster.node_type_id}" diff --git a/acceptance/bundle/variables/resolve-field-within-complex/output.txt b/acceptance/bundle/variables/resolve-field-within-complex/output.txt new file mode 100644 index 000000000..1f6bdbbf4 --- /dev/null +++ b/acceptance/bundle/variables/resolve-field-within-complex/output.txt @@ -0,0 +1,3 @@ +{ + "node_type_id": "Standard_DS3_v2" +} diff --git a/acceptance/bundle/variables/resolve-field-within-complex/script b/acceptance/bundle/variables/resolve-field-within-complex/script new file mode 100644 index 000000000..a885870a5 --- /dev/null +++ b/acceptance/bundle/variables/resolve-field-within-complex/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.job_clusters[0].new_cluster diff --git a/acceptance/bundle/variables/resolve-nonstrings/databricks.yml b/acceptance/bundle/variables/resolve-nonstrings/databricks.yml new file mode 100644 index 000000000..a02c78a7e --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/databricks.yml @@ -0,0 +1,23 @@ +bundle: + name: TestResolveVariableReferencesForPrimitiveNonStringFields + +variables: + no_alert_for_canceled_runs: {} + no_alert_for_skipped_runs: {} + min_workers: {} + max_workers: {} + spot_bid_max_price: {} + +resources: + jobs: + job1: + notification_settings: + no_alert_for_canceled_runs: ${var.no_alert_for_canceled_runs} + no_alert_for_skipped_runs: ${var.no_alert_for_skipped_runs} + tasks: + - new_cluster: + autoscale: + min_workers: ${var.min_workers} + max_workers: ${var.max_workers} + azure_attributes: + spot_bid_max_price: ${var.spot_bid_max_price} diff --git a/acceptance/bundle/variables/resolve-nonstrings/output.txt b/acceptance/bundle/variables/resolve-nonstrings/output.txt new file mode 100644 index 000000000..3a1eb9c47 --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/output.txt @@ -0,0 +1,52 @@ +{ + "variables": { + "max_workers": { + "value": "2" + }, + "min_workers": { + "value": "1" + }, + "no_alert_for_canceled_runs": { + "value": "true" + }, + "no_alert_for_skipped_runs": { + "value": "false" + }, + "spot_bid_max_price": { + "value": "0.5" + } + }, + "jobs": { + "job1": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/TestResolveVariableReferencesForPrimitiveNonStringFields/default/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "notification_settings": { + "no_alert_for_canceled_runs": true, + "no_alert_for_skipped_runs": false + }, + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "new_cluster": { + "autoscale": { + "max_workers": 2, + "min_workers": 1 + }, + "azure_attributes": { + "spot_bid_max_price": 0.5 + } + }, + "task_key": "" + } + ] + } + } +} diff --git a/acceptance/bundle/variables/resolve-nonstrings/script b/acceptance/bundle/variables/resolve-nonstrings/script new file mode 100644 index 000000000..cb9e45b61 --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/script @@ -0,0 +1,4 @@ +export BUNDLE_VAR_no_alert_for_skipped_runs=false +export BUNDLE_VAR_max_workers=2 +export BUNDLE_VAR_min_workers=3 # shadowed by --var below +$CLI bundle validate -o json --var no_alert_for_canceled_runs=true --var min_workers=1 --var spot_bid_max_price=0.5 | jq '{ variables, jobs: .resources.jobs }' diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml b/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml new file mode 100644 index 000000000..6a45de330 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml @@ -0,0 +1,9 @@ +bundle: + name: TestResolveVariableReferencesToBundleVariables + +workspace: + root_path: "${bundle.name}/${var.foo}" + +variables: + foo: + value: "bar" diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt b/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt new file mode 100644 index 000000000..c56fbe415 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt @@ -0,0 +1,11 @@ +{ + "artifact_path": "TestResolveVariableReferencesToBundleVariables/bar/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "TestResolveVariableReferencesToBundleVariables/bar/files", + "resource_path": "TestResolveVariableReferencesToBundleVariables/bar/resources", + "root_path": "TestResolveVariableReferencesToBundleVariables/bar", + "state_path": "TestResolveVariableReferencesToBundleVariables/bar/state" +} diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/script b/acceptance/bundle/variables/resolve-vars-in-root-path/script new file mode 100644 index 000000000..fefd9abe6 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .workspace diff --git a/bundle/tests/variables/vanilla/databricks.yml b/acceptance/bundle/variables/vanilla/databricks.yml similarity index 100% rename from bundle/tests/variables/vanilla/databricks.yml rename to acceptance/bundle/variables/vanilla/databricks.yml diff --git a/acceptance/bundle/variables/vanilla/output.txt b/acceptance/bundle/variables/vanilla/output.txt new file mode 100644 index 000000000..e98882bb0 --- /dev/null +++ b/acceptance/bundle/variables/vanilla/output.txt @@ -0,0 +1,16 @@ + +>>> BUNDLE_VAR_b=def $CLI bundle validate -o json +"abc def" + +>>> errcode $CLI bundle validate +Error: no value assigned to required variable b. Assignment can be done using "--var", by setting the BUNDLE_VAR_b environment variable, or in .databricks/bundle//variable-overrides.json file + +Name: ${var.a} ${var.b} +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/${var.a} ${var.b}/default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/variables/vanilla/script b/acceptance/bundle/variables/vanilla/script new file mode 100644 index 000000000..10da3183d --- /dev/null +++ b/acceptance/bundle/variables/vanilla/script @@ -0,0 +1,2 @@ +trace BUNDLE_VAR_b=def $CLI bundle validate -o json | jq .bundle.name +trace errcode $CLI bundle validate diff --git a/bundle/tests/variables/variable_overrides_in_target/databricks.yml b/acceptance/bundle/variables/variable_overrides_in_target/databricks.yml similarity index 100% rename from bundle/tests/variables/variable_overrides_in_target/databricks.yml rename to acceptance/bundle/variables/variable_overrides_in_target/databricks.yml diff --git a/acceptance/bundle/variables/variable_overrides_in_target/output.txt b/acceptance/bundle/variables/variable_overrides_in_target/output.txt new file mode 100644 index 000000000..8998b691d --- /dev/null +++ b/acceptance/bundle/variables/variable_overrides_in_target/output.txt @@ -0,0 +1,84 @@ + +>>> $CLI bundle validate -o json -t use-default-variable-values +{ + "pipelines": { + "my_pipeline": { + "clusters": [ + { + "label": "default", + "num_workers": 42 + } + ], + "continuous": true, + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/use-default-variable-values/state/metadata.json" + }, + "name": "a_string", + "permissions": [] + } + } +} + +>>> $CLI bundle validate -o json -t override-string-variable +{ + "pipelines": { + "my_pipeline": { + "clusters": [ + { + "label": "default", + "num_workers": 42 + } + ], + "continuous": true, + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/override-string-variable/state/metadata.json" + }, + "name": "overridden_string", + "permissions": [] + } + } +} + +>>> $CLI bundle validate -o json -t override-int-variable +{ + "pipelines": { + "my_pipeline": { + "clusters": [ + { + "label": "default", + "num_workers": 43 + } + ], + "continuous": true, + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/override-int-variable/state/metadata.json" + }, + "name": "a_string", + "permissions": [] + } + } +} + +>>> $CLI bundle validate -o json -t override-both-bool-and-string-variables +{ + "pipelines": { + "my_pipeline": { + "clusters": [ + { + "label": "default", + "num_workers": 42 + } + ], + "continuous": false, + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/override-both-bool-and-string-variables/state/metadata.json" + }, + "name": "overridden_string", + "permissions": [] + } + } +} diff --git a/acceptance/bundle/variables/variable_overrides_in_target/script b/acceptance/bundle/variables/variable_overrides_in_target/script new file mode 100644 index 000000000..686b3102a --- /dev/null +++ b/acceptance/bundle/variables/variable_overrides_in_target/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate -o json -t use-default-variable-values | jq .resources +trace $CLI bundle validate -o json -t override-string-variable | jq .resources +trace $CLI bundle validate -o json -t override-int-variable | jq .resources +trace $CLI bundle validate -o json -t override-both-bool-and-string-variables | jq .resources diff --git a/bundle/tests/variables/without_definition/databricks.yml b/acceptance/bundle/variables/without_definition/databricks.yml similarity index 53% rename from bundle/tests/variables/without_definition/databricks.yml rename to acceptance/bundle/variables/without_definition/databricks.yml index 68227b683..c26a85f56 100644 --- a/bundle/tests/variables/without_definition/databricks.yml +++ b/acceptance/bundle/variables/without_definition/databricks.yml @@ -1,3 +1,5 @@ +bundle: + name: x variables: a: b: diff --git a/acceptance/bundle/variables/without_definition/output.txt b/acceptance/bundle/variables/without_definition/output.txt new file mode 100644 index 000000000..4dd1e6609 --- /dev/null +++ b/acceptance/bundle/variables/without_definition/output.txt @@ -0,0 +1,4 @@ +{ + "a": "foo", + "b": "bar" +} diff --git a/acceptance/bundle/variables/without_definition/script b/acceptance/bundle/variables/without_definition/script new file mode 100644 index 000000000..49b9b5448 --- /dev/null +++ b/acceptance/bundle/variables/without_definition/script @@ -0,0 +1 @@ +BUNDLE_VAR_a=foo BUNDLE_VAR_b=bar $CLI bundle validate -o json | jq '.variables | map_values(.value)' diff --git a/acceptance/cmd_server_test.go b/acceptance/cmd_server_test.go new file mode 100644 index 000000000..28feec1bd --- /dev/null +++ b/acceptance/cmd_server_test.go @@ -0,0 +1,73 @@ +package acceptance_test + +import ( + "encoding/json" + "net/http" + "os" + "strings" + "testing" + + "github.com/databricks/cli/internal/testcli" + "github.com/stretchr/testify/require" +) + +func StartCmdServer(t *testing.T) *TestServer { + server := StartServer(t) + server.Handle("/", func(r *http.Request) (any, error) { + q := r.URL.Query() + args := strings.Split(q.Get("args"), " ") + + var env map[string]string + require.NoError(t, json.Unmarshal([]byte(q.Get("env")), &env)) + + for key, val := range env { + defer Setenv(t, key, val)() + } + + defer Chdir(t, q.Get("cwd"))() + + c := testcli.NewRunner(t, r.Context(), args...) + c.Verbose = false + stdout, stderr, err := c.Run() + result := map[string]any{ + "stdout": stdout.String(), + "stderr": stderr.String(), + } + exitcode := 0 + if err != nil { + exitcode = 1 + } + result["exitcode"] = exitcode + return result, nil + }) + return server +} + +// Chdir variant that is intended to be used with defer so that it can switch back before function ends. +// This is unlike testutil.Chdir which switches back only when tests end. +func Chdir(t *testing.T, cwd string) func() { + require.NotEmpty(t, cwd) + prevDir, err := os.Getwd() + require.NoError(t, err) + err = os.Chdir(cwd) + require.NoError(t, err) + return func() { + _ = os.Chdir(prevDir) + } +} + +// Setenv variant that is intended to be used with defer so that it can switch back before function ends. +// This is unlike t.Setenv which switches back only when tests end. +func Setenv(t *testing.T, key, value string) func() { + prevVal, exists := os.LookupEnv(key) + + require.NoError(t, os.Setenv(key, value)) + + return func() { + if exists { + _ = os.Setenv(key, prevVal) + } else { + _ = os.Unsetenv(key) + } + } +} diff --git a/acceptance/config_test.go b/acceptance/config_test.go new file mode 100644 index 000000000..49dce06ba --- /dev/null +++ b/acceptance/config_test.go @@ -0,0 +1,99 @@ +package acceptance_test + +import ( + "os" + "path/filepath" + "sync" + "testing" + + "github.com/BurntSushi/toml" + "github.com/stretchr/testify/require" +) + +const configFilename = "test.toml" + +var ( + configCache map[string]TestConfig + configMutex sync.Mutex +) + +type TestConfig struct { + // Place to describe what's wrong with this test. Does not affect how the test is run. + Badness string + + // Which OSes the test is enabled on. Each string is compared against runtime.GOOS. + // If absent, default to true. + GOOS map[string]bool +} + +// FindConfig finds the closest config file. +func FindConfig(t *testing.T, dir string) (string, bool) { + shared := false + for { + path := filepath.Join(dir, configFilename) + _, err := os.Stat(path) + + if err == nil { + return path, shared + } + + shared = true + + if dir == "" || dir == "." { + break + } + + if os.IsNotExist(err) { + dir = filepath.Dir(dir) + continue + } + + t.Fatalf("Error while reading %s: %s", path, err) + } + + t.Fatal("Config not found: " + configFilename) + return "", shared +} + +// LoadConfig loads the config file. Non-leaf configs are cached. +func LoadConfig(t *testing.T, dir string) (TestConfig, string) { + path, leafConfig := FindConfig(t, dir) + + if leafConfig { + return DoLoadConfig(t, path), path + } + + configMutex.Lock() + defer configMutex.Unlock() + + if configCache == nil { + configCache = make(map[string]TestConfig) + } + + result, ok := configCache[path] + if ok { + return result, path + } + + result = DoLoadConfig(t, path) + configCache[path] = result + return result, path +} + +func DoLoadConfig(t *testing.T, path string) TestConfig { + bytes, err := os.ReadFile(path) + if err != nil { + t.Fatalf("failed to read config: %s", err) + } + + var config TestConfig + meta, err := toml.Decode(string(bytes), &config) + require.NoError(t, err) + + keys := meta.Undecoded() + if len(keys) > 0 { + t.Fatalf("Undecoded keys in %s: %#v", path, keys) + } + + return config +} diff --git a/acceptance/help/output.txt b/acceptance/help/output.txt new file mode 100644 index 000000000..18434251d --- /dev/null +++ b/acceptance/help/output.txt @@ -0,0 +1,142 @@ +Databricks CLI + +Usage: + databricks [command] + +Databricks Workspace + fs Filesystem related commands + git-credentials Registers personal access token for Databricks to do operations on behalf of the user. + repos The Repos API allows users to manage their git repos. + secrets The Secrets API allows you to manage secrets, secret scopes, and access permissions. + workspace The Workspace API allows you to list, import, export, and delete notebooks and folders. + +Compute + cluster-policies You can use cluster policies to control users' ability to configure clusters based on a set of rules. + clusters The Clusters API allows you to create, start, edit, list, terminate, and delete clusters. + global-init-scripts The Global Init Scripts API enables Workspace administrators to configure global initialization scripts for their workspace. + instance-pools Instance Pools API are used to create, edit, delete and list instance pools by using ready-to-use cloud instances which reduces a cluster start and auto-scaling times. + instance-profiles The Instance Profiles API allows admins to add, list, and remove instance profiles that users can launch clusters with. + libraries The Libraries API allows you to install and uninstall libraries and get the status of libraries on a cluster. + policy-compliance-for-clusters The policy compliance APIs allow you to view and manage the policy compliance status of clusters in your workspace. + policy-families View available policy families. + +Workflows + jobs The Jobs API allows you to create, edit, and delete jobs. + policy-compliance-for-jobs The compliance APIs allow you to view and manage the policy compliance status of jobs in your workspace. + +Delta Live Tables + pipelines The Delta Live Tables API allows you to create, edit, delete, start, and view details about pipelines. + +Machine Learning + experiments Experiments are the primary unit of organization in MLflow; all MLflow runs belong to an experiment. + model-registry Note: This API reference documents APIs for the Workspace Model Registry. + +Real-time Serving + serving-endpoints The Serving Endpoints API allows you to create, update, and delete model serving endpoints. + +Identity and Access Management + current-user This API allows retrieving information about currently authenticated user or service principal. + groups Groups simplify identity management, making it easier to assign access to Databricks workspace, data, and other securable objects. + permissions Permissions API are used to create read, write, edit, update and manage access for various users on different objects and endpoints. + service-principals Identities for use with jobs, automated tools, and systems such as scripts, apps, and CI/CD platforms. + users User identities recognized by Databricks and represented by email addresses. + +Databricks SQL + alerts The alerts API can be used to perform CRUD operations on alerts. + alerts-legacy The alerts API can be used to perform CRUD operations on alerts. + dashboards In general, there is little need to modify dashboards using the API. + data-sources This API is provided to assist you in making new query objects. + queries The queries API can be used to perform CRUD operations on queries. + queries-legacy These endpoints are used for CRUD operations on query definitions. + query-history A service responsible for storing and retrieving the list of queries run against SQL endpoints and serverless compute. + warehouses A SQL warehouse is a compute resource that lets you run SQL commands on data objects within Databricks SQL. + +Unity Catalog + artifact-allowlists In Databricks Runtime 13.3 and above, you can add libraries and init scripts to the allowlist in UC so that users can leverage these artifacts on compute configured with shared access mode. + catalogs A catalog is the first layer of Unity Catalog’s three-level namespace. + connections Connections allow for creating a connection to an external data source. + credentials A credential represents an authentication and authorization mechanism for accessing services on your cloud tenant. + external-locations An external location is an object that combines a cloud storage path with a storage credential that authorizes access to the cloud storage path. + functions Functions implement User-Defined Functions (UDFs) in Unity Catalog. + grants In Unity Catalog, data is secure by default. + metastores A metastore is the top-level container of objects in Unity Catalog. + model-versions Databricks provides a hosted version of MLflow Model Registry in Unity Catalog. + online-tables Online tables provide lower latency and higher QPS access to data from Delta tables. + quality-monitors A monitor computes and monitors data or model quality metrics for a table over time. + registered-models Databricks provides a hosted version of MLflow Model Registry in Unity Catalog. + resource-quotas Unity Catalog enforces resource quotas on all securable objects, which limits the number of resources that can be created. + schemas A schema (also called a database) is the second layer of Unity Catalog’s three-level namespace. + storage-credentials A storage credential represents an authentication and authorization mechanism for accessing data stored on your cloud tenant. + system-schemas A system schema is a schema that lives within the system catalog. + table-constraints Primary key and foreign key constraints encode relationships between fields in tables. + tables A table resides in the third layer of Unity Catalog’s three-level namespace. + temporary-table-credentials Temporary Table Credentials refer to short-lived, downscoped credentials used to access cloud storage locationswhere table data is stored in Databricks. + volumes Volumes are a Unity Catalog (UC) capability for accessing, storing, governing, organizing and processing files. + workspace-bindings A securable in Databricks can be configured as __OPEN__ or __ISOLATED__. + +Delta Sharing + providers A data provider is an object representing the organization in the real world who shares the data. + recipient-activation The Recipient Activation API is only applicable in the open sharing model where the recipient object has the authentication type of TOKEN. + recipients A recipient is an object you create using :method:recipients/create to represent an organization which you want to allow access shares. + shares A share is a container instantiated with :method:shares/create. + +Settings + ip-access-lists IP Access List enables admins to configure IP access lists. + notification-destinations The notification destinations API lets you programmatically manage a workspace's notification destinations. + settings Workspace Settings API allows users to manage settings at the workspace level. + token-management Enables administrators to get all tokens and delete tokens for other users. + tokens The Token API allows you to create, list, and revoke tokens that can be used to authenticate and access Databricks REST APIs. + workspace-conf This API allows updating known workspace settings for advanced users. + +Developer Tools + bundle Databricks Asset Bundles let you express data/AI/analytics projects as code. + sync Synchronize a local directory to a workspace directory + +Vector Search + vector-search-endpoints **Endpoint**: Represents the compute resources to host vector search indexes. + vector-search-indexes **Index**: An efficient representation of your embedding vectors that supports real-time and efficient approximate nearest neighbor (ANN) search queries. + +Dashboards + lakeview These APIs provide specific management operations for Lakeview dashboards. + +Marketplace + consumer-fulfillments Fulfillments are entities that allow consumers to preview installations. + consumer-installations Installations are entities that allow consumers to interact with Databricks Marketplace listings. + consumer-listings Listings are the core entities in the Marketplace. + consumer-personalization-requests Personalization Requests allow customers to interact with the individualized Marketplace listing flow. + consumer-providers Providers are the entities that publish listings to the Marketplace. + provider-exchange-filters Marketplace exchanges filters curate which groups can access an exchange. + provider-exchanges Marketplace exchanges allow providers to share their listings with a curated set of customers. + provider-files Marketplace offers a set of file APIs for various purposes such as preview notebooks and provider icons. + provider-listings Listings are the core entities in the Marketplace. + provider-personalization-requests Personalization requests are an alternate to instantly available listings. + provider-provider-analytics-dashboards Manage templated analytics solution for providers. + provider-providers Providers are entities that manage assets in Marketplace. + +Apps + apps Apps run directly on a customer’s Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on. + +Clean Rooms + clean-room-assets Clean room assets are data and code objects — Tables, volumes, and notebooks that are shared with the clean room. + clean-room-task-runs Clean room task runs are the executions of notebooks in a clean room. + clean-rooms A clean room uses Delta Sharing and serverless compute to provide a secure and privacy-protecting environment where multiple parties can work together on sensitive enterprise data without direct access to each other’s data. + +Additional Commands: + account Databricks Account Commands + api Perform Databricks API call + auth Authentication related commands + completion Generate the autocompletion script for the specified shell + configure Configure authentication + help Help about any command + labs Manage Databricks Labs installations + version Retrieve information about the current version of this CLI + +Flags: + --debug enable debug logging + -h, --help help for databricks + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + -v, --version version for databricks + +Use "databricks [command] --help" for more information about a command. diff --git a/acceptance/help/script b/acceptance/help/script new file mode 100644 index 000000000..5fa569470 --- /dev/null +++ b/acceptance/help/script @@ -0,0 +1 @@ +$CLI diff --git a/acceptance/script.cleanup b/acceptance/script.cleanup new file mode 100644 index 000000000..3c3e29ebc --- /dev/null +++ b/acceptance/script.cleanup @@ -0,0 +1 @@ +rm -fr .databricks .gitignore diff --git a/acceptance/script.prepare b/acceptance/script.prepare new file mode 100644 index 000000000..87910654d --- /dev/null +++ b/acceptance/script.prepare @@ -0,0 +1,49 @@ +errcode() { + # Temporarily disable 'set -e' to prevent the script from exiting on error + set +e + # Execute the provided command with all arguments + "$@" + local exit_code=$? + # Re-enable 'set -e' if it was previously set + set -e + if [ $exit_code -ne 0 ]; then + >&2 printf "\nExit code: $exit_code\n" + fi +} + +trace() { + >&2 printf "\n>>> %s\n" "$*" + + if [[ "$1" == *"="* ]]; then + # If the first argument contains '=', collect all env vars + local env_vars=() + while [[ "$1" == *"="* ]]; do + env_vars+=("$1") + shift + done + # Export environment variables in a subshell and execute the command + ( + export "${env_vars[@]}" + "$@" + ) + else + # Execute the command normally + "$@" + fi + + return $? +} + +git-repo-init() { + git init -qb main + git config core.autocrlf false + git config user.name "Tester" + git config user.email "tester@databricks.com" + git add databricks.yml + git commit -qm 'Add databricks.yml' +} + +title() { + local label="$1" + printf "\n=== %s" "$label" +} diff --git a/acceptance/server_test.go b/acceptance/server_test.go new file mode 100644 index 000000000..eb8cbb24a --- /dev/null +++ b/acceptance/server_test.go @@ -0,0 +1,149 @@ +package acceptance_test + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/databricks/databricks-sdk-go/service/workspace" +) + +type TestServer struct { + *httptest.Server + Mux *http.ServeMux +} + +type HandlerFunc func(r *http.Request) (any, error) + +func NewTestServer() *TestServer { + mux := http.NewServeMux() + server := httptest.NewServer(mux) + + return &TestServer{ + Server: server, + Mux: mux, + } +} + +func (s *TestServer) Handle(pattern string, handler HandlerFunc) { + s.Mux.HandleFunc(pattern, func(w http.ResponseWriter, r *http.Request) { + resp, err := handler(r) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + + var respBytes []byte + + respString, ok := resp.(string) + if ok { + respBytes = []byte(respString) + } else { + respBytes, err = json.MarshalIndent(resp, "", " ") + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } + + if _, err := w.Write(respBytes); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + }) +} + +func StartServer(t *testing.T) *TestServer { + server := NewTestServer() + t.Cleanup(func() { + server.Close() + }) + return server +} + +func AddHandlers(server *TestServer) { + server.Handle("GET /api/2.0/policies/clusters/list", func(r *http.Request) (any, error) { + return compute.ListPoliciesResponse{ + Policies: []compute.Policy{ + { + PolicyId: "5678", + Name: "wrong-cluster-policy", + }, + { + PolicyId: "9876", + Name: "some-test-cluster-policy", + }, + }, + }, nil + }) + + server.Handle("GET /api/2.0/instance-pools/list", func(r *http.Request) (any, error) { + return compute.ListInstancePools{ + InstancePools: []compute.InstancePoolAndStats{ + { + InstancePoolName: "some-test-instance-pool", + InstancePoolId: "1234", + }, + }, + }, nil + }) + + server.Handle("GET /api/2.1/clusters/list", func(r *http.Request) (any, error) { + return compute.ListClustersResponse{ + Clusters: []compute.ClusterDetails{ + { + ClusterName: "some-test-cluster", + ClusterId: "4321", + }, + { + ClusterName: "some-other-cluster", + ClusterId: "9876", + }, + }, + }, nil + }) + + server.Handle("GET /api/2.0/preview/scim/v2/Me", func(r *http.Request) (any, error) { + return iam.User{ + UserName: "tester@databricks.com", + }, nil + }) + + server.Handle("GET /api/2.0/workspace/get-status", func(r *http.Request) (any, error) { + return workspace.ObjectInfo{ + ObjectId: 1001, + ObjectType: "DIRECTORY", + Path: "", + ResourceId: "1001", + }, nil + }) + + server.Handle("GET /api/2.1/unity-catalog/current-metastore-assignment", func(r *http.Request) (any, error) { + return catalog.MetastoreAssignment{ + DefaultCatalogName: "main", + }, nil + }) + + server.Handle("GET /api/2.0/permissions/directories/1001", func(r *http.Request) (any, error) { + return workspace.WorkspaceObjectPermissions{ + ObjectId: "1001", + ObjectType: "DIRECTORY", + AccessControlList: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "tester@databricks.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + { + PermissionLevel: "CAN_MANAGE", + }, + }, + }, + }, + }, nil + }) +} diff --git a/acceptance/test.toml b/acceptance/test.toml new file mode 100644 index 000000000..eee94d0ea --- /dev/null +++ b/acceptance/test.toml @@ -0,0 +1,2 @@ +# If test directory nor any of its parents do not have test.toml then this file serves as fallback configuration. +# The configurations are not merged across parents; the closest one is used fully. diff --git a/bundle/apps/interpolate_variables.go b/bundle/apps/interpolate_variables.go new file mode 100644 index 000000000..f88e7e9db --- /dev/null +++ b/bundle/apps/interpolate_variables.go @@ -0,0 +1,50 @@ +package apps + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/dynvar" +) + +type interpolateVariables struct{} + +func (i *interpolateVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + pattern := dyn.NewPattern( + dyn.Key("resources"), + dyn.Key("apps"), + dyn.AnyKey(), + dyn.Key("config"), + ) + + tfToConfigMap := map[string]string{} + for k, r := range config.SupportedResources() { + tfToConfigMap[r.TerraformResourceName] = k + } + + err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { + return dyn.MapByPattern(root, pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + return dynvar.Resolve(v, func(path dyn.Path) (dyn.Value, error) { + key, ok := tfToConfigMap[path[0].Key()] + if ok { + path = dyn.NewPath(dyn.Key("resources"), dyn.Key(key)).Append(path[1:]...) + } + + return dyn.GetByPath(root, path) + }) + }) + }) + + return diag.FromErr(err) +} + +func (i *interpolateVariables) Name() string { + return "apps.InterpolateVariables" +} + +func InterpolateVariables() bundle.Mutator { + return &interpolateVariables{} +} diff --git a/bundle/apps/interpolate_variables_test.go b/bundle/apps/interpolate_variables_test.go new file mode 100644 index 000000000..b6c424a95 --- /dev/null +++ b/bundle/apps/interpolate_variables_test.go @@ -0,0 +1,49 @@ +package apps + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/require" +) + +func TestAppInterpolateVariables(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "my_app_1": { + App: &apps.App{ + Name: "my_app_1", + }, + Config: map[string]any{ + "command": []string{"echo", "hello"}, + "env": []map[string]string{ + {"name": "JOB_ID", "value": "${databricks_job.my_job.id}"}, + }, + }, + }, + "my_app_2": { + App: &apps.App{ + Name: "my_app_2", + }, + }, + }, + Jobs: map[string]*resources.Job{ + "my_job": { + ID: "123", + }, + }, + }, + }, + } + + diags := bundle.Apply(context.Background(), b, InterpolateVariables()) + require.Empty(t, diags) + require.Equal(t, []any{map[string]any{"name": "JOB_ID", "value": "123"}}, b.Config.Resources.Apps["my_app_1"].Config["env"]) + require.Nil(t, b.Config.Resources.Apps["my_app_2"].Config) +} diff --git a/bundle/apps/slow_deploy_message.go b/bundle/apps/slow_deploy_message.go new file mode 100644 index 000000000..87275980a --- /dev/null +++ b/bundle/apps/slow_deploy_message.go @@ -0,0 +1,29 @@ +package apps + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" +) + +type slowDeployMessage struct{} + +// TODO: needs to be removed when when no_compute option becomes available in TF provider and used in DABs +// See https://github.com/databricks/cli/pull/2144 +func (v *slowDeployMessage) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if len(b.Config.Resources.Apps) > 0 { + cmdio.LogString(ctx, "Note: Databricks apps included in this bundle may increase initial deployment time due to compute provisioning.") + } + + return nil +} + +func (v *slowDeployMessage) Name() string { + return "apps.SlowDeployMessage" +} + +func SlowDeployMessage() bundle.Mutator { + return &slowDeployMessage{} +} diff --git a/bundle/apps/upload_config.go b/bundle/apps/upload_config.go new file mode 100644 index 000000000..5c58c5c6f --- /dev/null +++ b/bundle/apps/upload_config.go @@ -0,0 +1,97 @@ +package apps + +import ( + "bytes" + "context" + "fmt" + "path" + "strings" + "sync" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/deploy" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/filer" + "golang.org/x/sync/errgroup" + + "gopkg.in/yaml.v3" +) + +type uploadConfig struct { + filerFactory deploy.FilerFactory +} + +func (u *uploadConfig) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + errGroup, ctx := errgroup.WithContext(ctx) + + mu := sync.Mutex{} + for key, app := range b.Config.Resources.Apps { + // If the app has a config, we need to deploy it first. + // It means we need to write app.yml file with the content of the config field + // to the remote source code path of the app. + if app.Config != nil { + appPath := strings.TrimPrefix(app.SourceCodePath, b.Config.Workspace.FilePath) + + buf, err := configToYaml(app) + if err != nil { + return diag.FromErr(err) + } + + f, err := u.filerFactory(b) + if err != nil { + return diag.FromErr(err) + } + + errGroup.Go(func() error { + err := f.Write(ctx, path.Join(appPath, "app.yml"), buf, filer.OverwriteIfExists) + if err != nil { + mu.Lock() + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: "Failed to save config", + Detail: fmt.Sprintf("Failed to write %s file: %s", path.Join(app.SourceCodePath, "app.yml"), err), + Locations: b.Config.GetLocations("resources.apps." + key), + }) + mu.Unlock() + } + return nil + }) + } + } + + if err := errGroup.Wait(); err != nil { + return diags.Extend(diag.FromErr(err)) + } + + return diags +} + +// Name implements bundle.Mutator. +func (u *uploadConfig) Name() string { + return "apps:UploadConfig" +} + +func UploadConfig() bundle.Mutator { + return &uploadConfig{ + filerFactory: func(b *bundle.Bundle) (filer.Filer, error) { + return filer.NewWorkspaceFilesClient(b.WorkspaceClient(), b.Config.Workspace.FilePath) + }, + } +} + +func configToYaml(app *resources.App) (*bytes.Buffer, error) { + buf := bytes.NewBuffer(nil) + enc := yaml.NewEncoder(buf) + enc.SetIndent(2) + + err := enc.Encode(app.Config) + defer enc.Close() + + if err != nil { + return nil, fmt.Errorf("failed to encode app config to yaml: %w", err) + } + + return buf, nil +} diff --git a/bundle/apps/upload_config_test.go b/bundle/apps/upload_config_test.go new file mode 100644 index 000000000..a1a6b3afb --- /dev/null +++ b/bundle/apps/upload_config_test.go @@ -0,0 +1,75 @@ +package apps + +import ( + "bytes" + "context" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + mockfiler "github.com/databricks/cli/internal/mocks/libs/filer" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestAppUploadConfig(t *testing.T) { + root := t.TempDir() + err := os.MkdirAll(filepath.Join(root, "my_app"), 0o700) + require.NoError(t, err) + + b := &bundle.Bundle{ + BundleRootPath: root, + SyncRootPath: root, + SyncRoot: vfs.MustNew(root), + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Users/foo@bar.com/", + }, + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "my_app": { + App: &apps.App{ + Name: "my_app", + }, + SourceCodePath: "./my_app", + Config: map[string]any{ + "command": []string{"echo", "hello"}, + "env": []map[string]string{ + {"name": "MY_APP", "value": "my value"}, + }, + }, + }, + }, + }, + }, + } + + mockFiler := mockfiler.NewMockFiler(t) + mockFiler.EXPECT().Write(mock.Anything, "my_app/app.yml", bytes.NewBufferString(`command: + - echo + - hello +env: + - name: MY_APP + value: my value +`), filer.OverwriteIfExists).Return(nil) + + u := uploadConfig{ + filerFactory: func(b *bundle.Bundle) (filer.Filer, error) { + return mockFiler, nil + }, + } + + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(root, "databricks.yml")}}) + + diags := bundle.Apply(context.Background(), b, bundle.Seq(mutator.TranslatePaths(), &u)) + require.NoError(t, diags.Error()) +} diff --git a/bundle/apps/validate.go b/bundle/apps/validate.go new file mode 100644 index 000000000..fc50aeafc --- /dev/null +++ b/bundle/apps/validate.go @@ -0,0 +1,53 @@ +package apps + +import ( + "context" + "fmt" + "path" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +type validate struct{} + +func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + possibleConfigFiles := []string{"app.yml", "app.yaml"} + usedSourceCodePaths := make(map[string]string) + + for key, app := range b.Config.Resources.Apps { + if _, ok := usedSourceCodePaths[app.SourceCodePath]; ok { + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: "Duplicate app source code path", + Detail: fmt.Sprintf("app resource '%s' has the same source code path as app resource '%s', this will lead to the app configuration being overriden by each other", key, usedSourceCodePaths[app.SourceCodePath]), + Locations: b.Config.GetLocations(fmt.Sprintf("resources.apps.%s.source_code_path", key)), + }) + } + usedSourceCodePaths[app.SourceCodePath] = key + + for _, configFile := range possibleConfigFiles { + appPath := strings.TrimPrefix(app.SourceCodePath, b.Config.Workspace.FilePath) + cf := path.Join(appPath, configFile) + if _, err := b.SyncRoot.Stat(cf); err == nil { + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: configFile + " detected", + Detail: fmt.Sprintf("remove %s and use 'config' property for app resource '%s' instead", cf, app.Name), + }) + } + } + } + + return diags +} + +func (v *validate) Name() string { + return "apps.Validate" +} + +func Validate() bundle.Mutator { + return &validate{} +} diff --git a/bundle/apps/validate_test.go b/bundle/apps/validate_test.go new file mode 100644 index 000000000..6c3a88191 --- /dev/null +++ b/bundle/apps/validate_test.go @@ -0,0 +1,97 @@ +package apps + +import ( + "context" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/require" +) + +func TestAppsValidate(t *testing.T) { + tmpDir := t.TempDir() + testutil.Touch(t, tmpDir, "app1", "app.yml") + testutil.Touch(t, tmpDir, "app2", "app.py") + + b := &bundle.Bundle{ + BundleRootPath: tmpDir, + SyncRootPath: tmpDir, + SyncRoot: vfs.MustNew(tmpDir), + Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/foo/bar/", + }, + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "app1": { + App: &apps.App{ + Name: "app1", + }, + SourceCodePath: "./app1", + }, + "app2": { + App: &apps.App{ + Name: "app2", + }, + SourceCodePath: "./app2", + }, + }, + }, + }, + } + + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) + + diags := bundle.Apply(context.Background(), b, bundle.Seq(mutator.TranslatePaths(), Validate())) + require.Len(t, diags, 1) + require.Equal(t, "app.yml detected", diags[0].Summary) + require.Contains(t, diags[0].Detail, "app.yml and use 'config' property for app resource") +} + +func TestAppsValidateSameSourcePath(t *testing.T) { + tmpDir := t.TempDir() + testutil.Touch(t, tmpDir, "app1", "app.py") + + b := &bundle.Bundle{ + BundleRootPath: tmpDir, + SyncRootPath: tmpDir, + SyncRoot: vfs.MustNew(tmpDir), + Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/foo/bar/", + }, + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "app1": { + App: &apps.App{ + Name: "app1", + }, + SourceCodePath: "./app1", + }, + "app2": { + App: &apps.App{ + Name: "app2", + }, + SourceCodePath: "./app1", + }, + }, + }, + }, + } + + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) + + diags := bundle.Apply(context.Background(), b, bundle.Seq(mutator.TranslatePaths(), Validate())) + require.Len(t, diags, 1) + require.Equal(t, "Duplicate app source code path", diags[0].Summary) + require.Contains(t, diags[0].Detail, "has the same source code path as app resource") +} diff --git a/bundle/artifacts/expand_globs.go b/bundle/artifacts/expand_globs.go index 7d44db0be..c0af7c69e 100644 --- a/bundle/artifacts/expand_globs.go +++ b/bundle/artifacts/expand_globs.go @@ -97,7 +97,7 @@ func (m *expandGlobs) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnost return dyn.SetByPath(v, base, dyn.V(output)) }) if err != nil { - return diag.FromErr(err) + diags = diags.Extend(diag.FromErr(err)) } return diags diff --git a/bundle/artifacts/expand_globs_test.go b/bundle/artifacts/expand_globs_test.go index dc7c77de7..264c52c50 100644 --- a/bundle/artifacts/expand_globs_test.go +++ b/bundle/artifacts/expand_globs_test.go @@ -2,7 +2,6 @@ package artifacts import ( "context" - "fmt" "path/filepath" "testing" @@ -88,16 +87,16 @@ func TestExpandGlobs_InvalidPattern(t *testing.T) { )) assert.Len(t, diags, 4) - assert.Equal(t, fmt.Sprintf("%s: syntax error in pattern", filepath.Clean("a[.txt")), diags[0].Summary) + assert.Equal(t, filepath.Clean("a[.txt")+": syntax error in pattern", diags[0].Summary) assert.Equal(t, filepath.Join(tmpDir, "databricks.yml"), diags[0].Locations[0].File) assert.Equal(t, "artifacts.test.files[0].source", diags[0].Paths[0].String()) - assert.Equal(t, fmt.Sprintf("%s: syntax error in pattern", filepath.Clean("a[.txt")), diags[1].Summary) + assert.Equal(t, filepath.Clean("a[.txt")+": syntax error in pattern", diags[1].Summary) assert.Equal(t, filepath.Join(tmpDir, "databricks.yml"), diags[1].Locations[0].File) assert.Equal(t, "artifacts.test.files[1].source", diags[1].Paths[0].String()) - assert.Equal(t, fmt.Sprintf("%s: syntax error in pattern", filepath.Clean("../a[.txt")), diags[2].Summary) + assert.Equal(t, filepath.Clean("../a[.txt")+": syntax error in pattern", diags[2].Summary) assert.Equal(t, filepath.Join(tmpDir, "databricks.yml"), diags[2].Locations[0].File) assert.Equal(t, "artifacts.test.files[2].source", diags[2].Paths[0].String()) - assert.Equal(t, fmt.Sprintf("%s: syntax error in pattern", filepath.Clean("subdir/a[.txt")), diags[3].Summary) + assert.Equal(t, filepath.Clean("subdir/a[.txt")+": syntax error in pattern", diags[3].Summary) assert.Equal(t, filepath.Join(tmpDir, "databricks.yml"), diags[3].Locations[0].File) assert.Equal(t, "artifacts.test.files[3].source", diags[3].Paths[0].String()) } diff --git a/bundle/artifacts/whl/infer.go b/bundle/artifacts/whl/infer.go index 604bfc449..9c40360be 100644 --- a/bundle/artifacts/whl/infer.go +++ b/bundle/artifacts/whl/infer.go @@ -32,7 +32,7 @@ func (m *infer) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { //) py := python.GetExecutable() - artifact.BuildCommand = fmt.Sprintf(`%s setup.py bdist_wheel`, py) + artifact.BuildCommand = py + " setup.py bdist_wheel" return nil } diff --git a/bundle/bundle.go b/bundle/bundle.go index 573bcef2f..e715b8b2c 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -8,6 +8,7 @@ package bundle import ( "context" + "errors" "fmt" "os" "path/filepath" @@ -16,6 +17,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/metadata" + "github.com/databricks/cli/libs/auth" "github.com/databricks/cli/libs/fileset" "github.com/databricks/cli/libs/locker" "github.com/databricks/cli/libs/log" @@ -23,7 +25,6 @@ import ( "github.com/databricks/cli/libs/terraform" "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go" - sdkconfig "github.com/databricks/databricks-sdk-go/config" "github.com/hashicorp/terraform-exec/tfexec" ) @@ -56,6 +57,9 @@ type Bundle struct { // It is loaded from the bundle configuration files and mutators may update it. Config config.Root + // Target stores a snapshot of the Root.Bundle.Target configuration when it was selected by SelectTarget. + Target *config.Target `json:"target_config,omitempty" bundle:"internal"` + // Metadata about the bundle deployment. This is the interface Databricks services // rely on to integrate with bundles when they need additional information about // a bundle deployment. @@ -234,25 +238,9 @@ func (b *Bundle) GetSyncIncludePatterns(ctx context.Context) ([]string, error) { // we call into from this bundle context. func (b *Bundle) AuthEnv() (map[string]string, error) { if b.client == nil { - return nil, fmt.Errorf("workspace client not initialized yet") + return nil, errors.New("workspace client not initialized yet") } cfg := b.client.Config - out := make(map[string]string) - for _, attr := range sdkconfig.ConfigAttributes { - // Ignore profile so that downstream tools don't try and reload - // the profile even though we know the current configuration is valid. - if attr.Name == "profile" { - continue - } - if len(attr.EnvVars) == 0 { - continue - } - if attr.IsZero(cfg) { - continue - } - out[attr.EnvVars[0]] = attr.GetString(cfg) - } - - return out, nil + return auth.Env(cfg), nil } diff --git a/bundle/bundle_test.go b/bundle/bundle_test.go index 1c3102357..d52088e34 100644 --- a/bundle/bundle_test.go +++ b/bundle/bundle_test.go @@ -2,7 +2,6 @@ package bundle import ( "context" - "errors" "io/fs" "os" "path/filepath" @@ -16,7 +15,7 @@ import ( func TestLoadNotExists(t *testing.T) { b, err := Load(context.Background(), "/doesntexist") - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorIs(t, err, fs.ErrNotExist) assert.Nil(t, b) } diff --git a/bundle/config/artifact.go b/bundle/config/artifact.go index 9a5690f57..177799e11 100644 --- a/bundle/config/artifact.go +++ b/bundle/config/artifact.go @@ -2,7 +2,7 @@ package config import ( "context" - "fmt" + "errors" "github.com/databricks/cli/libs/exec" ) @@ -37,7 +37,7 @@ type Artifact struct { func (a *Artifact) Build(ctx context.Context) ([]byte, error) { if a.BuildCommand == "" { - return nil, fmt.Errorf("no build property defined") + return nil, errors.New("no build property defined") } var e *exec.Executor diff --git a/bundle/config/experimental.go b/bundle/config/experimental.go index 4c787168f..7ecac5d7d 100644 --- a/bundle/config/experimental.go +++ b/bundle/config/experimental.go @@ -27,9 +27,33 @@ type Experimental struct { // PyDABs determines whether to load the 'databricks-pydabs' package. // // PyDABs allows to define bundle configuration using Python. + // PyDABs is deprecated use Python instead. PyDABs PyDABs `json:"pydabs,omitempty"` + + // Python configures loading of Python code defined with 'databricks-bundles' package. + Python Python `json:"python,omitempty"` } +type Python struct { + // Resources contains a list of fully qualified function paths to load resources + // defined in Python code. + // + // Example: ["my_project.resources:load_resources"] + Resources []string `json:"resources"` + + // Mutators contains a list of fully qualified function paths to mutator functions. + // + // Example: ["my_project.mutators:add_default_cluster"] + Mutators []string `json:"mutators"` + + // VEnvPath is path to the virtual environment. + // + // If enabled, Python code will execute within this environment. If disabled, + // it defaults to using the Python interpreter available in the current shell. + VEnvPath string `json:"venv_path,omitempty"` +} + +// PyDABs is deprecated use Python instead type PyDABs struct { // Enabled is a flag to enable the feature. Enabled bool `json:"enabled,omitempty"` diff --git a/bundle/config/generate/app.go b/bundle/config/generate/app.go new file mode 100644 index 000000000..1255d63f8 --- /dev/null +++ b/bundle/config/generate/app.go @@ -0,0 +1,37 @@ +package generate + +import ( + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/databricks-sdk-go/service/apps" +) + +func ConvertAppToValue(app *apps.App, sourceCodePath string, appConfig map[string]any) (dyn.Value, error) { + ac, err := convert.FromTyped(appConfig, dyn.NilValue) + if err != nil { + return dyn.NilValue, err + } + + ar, err := convert.FromTyped(app.Resources, dyn.NilValue) + if err != nil { + return dyn.NilValue, err + } + + // The majority of fields of the app struct are read-only. + // We copy the relevant fields manually. + dv := map[string]dyn.Value{ + "name": dyn.NewValue(app.Name, []dyn.Location{{Line: 1}}), + "description": dyn.NewValue(app.Description, []dyn.Location{{Line: 2}}), + "source_code_path": dyn.NewValue(sourceCodePath, []dyn.Location{{Line: 3}}), + } + + if ac.Kind() != dyn.KindNil { + dv["config"] = ac.WithLocations([]dyn.Location{{Line: 4}}) + } + + if ar.Kind() != dyn.KindNil { + dv["resources"] = ar.WithLocations([]dyn.Location{{Line: 5}}) + } + + return dyn.V(dv), nil +} diff --git a/bundle/config/loader/process_root_includes.go b/bundle/config/loader/process_root_includes.go index c608a3de6..198095742 100644 --- a/bundle/config/loader/process_root_includes.go +++ b/bundle/config/loader/process_root_includes.go @@ -2,6 +2,7 @@ package loader import ( "context" + "fmt" "path/filepath" "slices" "strings" @@ -36,6 +37,7 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag. // Maintain list of files in order of files being loaded. // This is stored in the bundle configuration for observability. var files []string + var diags diag.Diagnostics // For each glob, find all files to load. // Ordering of the list of globs is maintained in the output. @@ -60,7 +62,7 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag. // Filter matches to ones we haven't seen yet. var includes []string - for _, match := range matches { + for i, match := range matches { rel, err := filepath.Rel(b.BundleRootPath, match) if err != nil { return diag.FromErr(err) @@ -69,9 +71,22 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag. continue } seen[rel] = true + if filepath.Ext(rel) != ".yaml" && filepath.Ext(rel) != ".yml" { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Error, + Summary: "Files in the 'include' configuration section must be YAML files.", + Detail: fmt.Sprintf("The file %s in the 'include' configuration section is not a YAML file, and only YAML files are supported. To include files to sync, specify them in the 'sync.include' configuration section instead.", rel), + Locations: b.Config.GetLocations(fmt.Sprintf("include[%d]", i)), + }) + continue + } includes = append(includes, rel) } + if len(diags) > 0 { + return diags + } + // Add matches to list of mutators to return. slices.Sort(includes) files = append(files, includes...) diff --git a/bundle/config/mutator/apply_presets.go b/bundle/config/mutator/apply_presets.go index 381703756..b402053e7 100644 --- a/bundle/config/mutator/apply_presets.go +++ b/bundle/config/mutator/apply_presets.go @@ -9,7 +9,6 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/textutil" @@ -222,26 +221,7 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos dashboard.DisplayName = prefix + dashboard.DisplayName } - if config.IsExplicitlyEnabled((b.Config.Presets.SourceLinkedDeployment)) { - isDatabricksWorkspace := dbr.RunsOnRuntime(ctx) && strings.HasPrefix(b.SyncRootPath, "/Workspace/") - if !isDatabricksWorkspace { - target := b.Config.Bundle.Target - path := dyn.NewPath(dyn.Key("targets"), dyn.Key(target), dyn.Key("presets"), dyn.Key("source_linked_deployment")) - diags = diags.Append( - diag.Diagnostic{ - Severity: diag.Warning, - Summary: "source-linked deployment is available only in the Databricks Workspace", - Paths: []dyn.Path{ - path, - }, - Locations: b.Config.GetLocations(path[2:].String()), - }, - ) - - disabled := false - b.Config.Presets.SourceLinkedDeployment = &disabled - } - } + // Apps: No presets return diags } diff --git a/bundle/config/mutator/apply_presets_test.go b/bundle/config/mutator/apply_presets_test.go index c26f20383..5e3f942cc 100644 --- a/bundle/config/mutator/apply_presets_test.go +++ b/bundle/config/mutator/apply_presets_test.go @@ -2,16 +2,12 @@ package mutator_test import ( "context" - "runtime" "testing" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/bundle/internal/bundletest" - "github.com/databricks/cli/libs/dbr" - "github.com/databricks/cli/libs/dyn" "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/require" @@ -398,87 +394,3 @@ func TestApplyPresetsResourceNotDefined(t *testing.T) { }) } } - -func TestApplyPresetsSourceLinkedDeployment(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("this test is not applicable on Windows because source-linked mode works only in the Databricks Workspace") - } - - testContext := context.Background() - enabled := true - disabled := false - workspacePath := "/Workspace/user.name@company.com" - - tests := []struct { - bundlePath string - ctx context.Context - name string - initialValue *bool - expectedValue *bool - expectedWarning string - }{ - { - name: "preset enabled, bundle in Workspace, databricks runtime", - bundlePath: workspacePath, - ctx: dbr.MockRuntime(testContext, true), - initialValue: &enabled, - expectedValue: &enabled, - }, - { - name: "preset enabled, bundle not in Workspace, databricks runtime", - bundlePath: "/Users/user.name@company.com", - ctx: dbr.MockRuntime(testContext, true), - initialValue: &enabled, - expectedValue: &disabled, - expectedWarning: "source-linked deployment is available only in the Databricks Workspace", - }, - { - name: "preset enabled, bundle in Workspace, not databricks runtime", - bundlePath: workspacePath, - ctx: dbr.MockRuntime(testContext, false), - initialValue: &enabled, - expectedValue: &disabled, - expectedWarning: "source-linked deployment is available only in the Databricks Workspace", - }, - { - name: "preset disabled, bundle in Workspace, databricks runtime", - bundlePath: workspacePath, - ctx: dbr.MockRuntime(testContext, true), - initialValue: &disabled, - expectedValue: &disabled, - }, - { - name: "preset nil, bundle in Workspace, databricks runtime", - bundlePath: workspacePath, - ctx: dbr.MockRuntime(testContext, true), - initialValue: nil, - expectedValue: nil, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - b := &bundle.Bundle{ - SyncRootPath: tt.bundlePath, - Config: config.Root{ - Presets: config.Presets{ - SourceLinkedDeployment: tt.initialValue, - }, - }, - } - - bundletest.SetLocation(b, "presets.source_linked_deployment", []dyn.Location{{File: "databricks.yml"}}) - diags := bundle.Apply(tt.ctx, b, mutator.ApplyPresets()) - if diags.HasError() { - t.Fatalf("unexpected error: %v", diags) - } - - if tt.expectedWarning != "" { - require.Equal(t, tt.expectedWarning, diags[0].Summary) - require.NotEmpty(t, diags[0].Locations) - } - - require.Equal(t, tt.expectedValue, b.Config.Presets.SourceLinkedDeployment) - }) - } -} diff --git a/bundle/config/mutator/apply_source_linked_deployment_preset.go b/bundle/config/mutator/apply_source_linked_deployment_preset.go new file mode 100644 index 000000000..570ca72cf --- /dev/null +++ b/bundle/config/mutator/apply_source_linked_deployment_preset.go @@ -0,0 +1,92 @@ +package mutator + +import ( + "context" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type applySourceLinkedDeploymentPreset struct{} + +// Apply source-linked deployment preset +func ApplySourceLinkedDeploymentPreset() *applySourceLinkedDeploymentPreset { + return &applySourceLinkedDeploymentPreset{} +} + +func (m *applySourceLinkedDeploymentPreset) Name() string { + return "ApplySourceLinkedDeploymentPreset" +} + +func (m *applySourceLinkedDeploymentPreset) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if config.IsExplicitlyDisabled(b.Config.Presets.SourceLinkedDeployment) { + return nil + } + + var diags diag.Diagnostics + isDatabricksWorkspace := dbr.RunsOnRuntime(ctx) && strings.HasPrefix(b.SyncRootPath, "/Workspace/") + target := b.Config.Bundle.Target + + if config.IsExplicitlyEnabled((b.Config.Presets.SourceLinkedDeployment)) { + if !isDatabricksWorkspace { + path := dyn.NewPath(dyn.Key("targets"), dyn.Key(target), dyn.Key("presets"), dyn.Key("source_linked_deployment")) + diags = diags.Append( + diag.Diagnostic{ + Severity: diag.Warning, + Summary: "source-linked deployment is available only in the Databricks Workspace", + Paths: []dyn.Path{ + path, + }, + Locations: b.Config.GetLocations(path[2:].String()), + }, + ) + + disabled := false + b.Config.Presets.SourceLinkedDeployment = &disabled + return diags + } + } + + if isDatabricksWorkspace && b.Config.Bundle.Mode == config.Development { + enabled := true + b.Config.Presets.SourceLinkedDeployment = &enabled + } + + if len(b.Config.Resources.Apps) > 0 && config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { + path := dyn.NewPath(dyn.Key("targets"), dyn.Key(target), dyn.Key("presets"), dyn.Key("source_linked_deployment")) + diags = diags.Append( + diag.Diagnostic{ + Severity: diag.Error, + Summary: "source-linked deployment is not supported for apps", + Paths: []dyn.Path{ + path, + }, + Locations: b.Config.GetLocations(path[2:].String()), + }, + ) + + return diags + } + + // This mutator runs before workspace paths are defaulted so it's safe to check for the user-defined value + if b.Config.Workspace.FilePath != "" && config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { + path := dyn.NewPath(dyn.Key("workspace"), dyn.Key("file_path")) + diags = diags.Append( + diag.Diagnostic{ + Severity: diag.Warning, + Summary: "workspace.file_path setting will be ignored in source-linked deployment mode", + Detail: "In source-linked deployment files are not copied to the destination and resources use source files instead", + Paths: []dyn.Path{ + path, + }, + Locations: b.Config.GetLocations(path.String()), + }, + ) + } + + return diags +} diff --git a/bundle/config/mutator/apply_source_linked_deployment_preset_test.go b/bundle/config/mutator/apply_source_linked_deployment_preset_test.go new file mode 100644 index 000000000..42fda8ea7 --- /dev/null +++ b/bundle/config/mutator/apply_source_linked_deployment_preset_test.go @@ -0,0 +1,141 @@ +package mutator_test + +import ( + "context" + "runtime" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/dyn" + "github.com/stretchr/testify/require" +) + +func TestApplyPresetsSourceLinkedDeployment(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("this test is not applicable on Windows because source-linked mode works only in the Databricks Workspace") + } + + testContext := context.Background() + enabled := true + disabled := false + workspacePath := "/Workspace/user.name@company.com" + + tests := []struct { + name string + ctx context.Context + mutateBundle func(b *bundle.Bundle) + initialValue *bool + expectedValue *bool + expectedWarning string + expectedError string + }{ + { + name: "preset enabled, bundle in Workspace, databricks runtime", + ctx: dbr.MockRuntime(testContext, true), + initialValue: &enabled, + expectedValue: &enabled, + }, + { + name: "preset enabled, bundle not in Workspace, databricks runtime", + ctx: dbr.MockRuntime(testContext, true), + mutateBundle: func(b *bundle.Bundle) { + b.SyncRootPath = "/Users/user.name@company.com" + }, + initialValue: &enabled, + expectedValue: &disabled, + expectedWarning: "source-linked deployment is available only in the Databricks Workspace", + }, + { + name: "preset enabled, bundle in Workspace, not databricks runtime", + ctx: dbr.MockRuntime(testContext, false), + initialValue: &enabled, + expectedValue: &disabled, + expectedWarning: "source-linked deployment is available only in the Databricks Workspace", + }, + { + name: "preset disabled, bundle in Workspace, databricks runtime", + ctx: dbr.MockRuntime(testContext, true), + initialValue: &disabled, + expectedValue: &disabled, + }, + { + name: "preset nil, bundle in Workspace, databricks runtime", + ctx: dbr.MockRuntime(testContext, true), + initialValue: nil, + expectedValue: nil, + }, + { + name: "preset nil, dev mode true, bundle in Workspace, databricks runtime", + ctx: dbr.MockRuntime(testContext, true), + mutateBundle: func(b *bundle.Bundle) { + b.Config.Bundle.Mode = config.Development + }, + initialValue: nil, + expectedValue: &enabled, + }, + { + name: "preset enabled, workspace.file_path is defined by user", + ctx: dbr.MockRuntime(testContext, true), + mutateBundle: func(b *bundle.Bundle) { + b.Config.Workspace.FilePath = "file_path" + }, + initialValue: &enabled, + expectedValue: &enabled, + expectedWarning: "workspace.file_path setting will be ignored in source-linked deployment mode", + }, + { + name: "preset enabled, apps is defined by user", + ctx: dbr.MockRuntime(testContext, true), + mutateBundle: func(b *bundle.Bundle) { + b.Config.Resources.Apps = map[string]*resources.App{ + "app": {}, + } + }, + initialValue: &enabled, + expectedValue: &enabled, + expectedError: "source-linked deployment is not supported for apps", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := &bundle.Bundle{ + SyncRootPath: workspacePath, + Config: config.Root{ + Presets: config.Presets{ + SourceLinkedDeployment: tt.initialValue, + }, + }, + } + + if tt.mutateBundle != nil { + tt.mutateBundle(b) + } + + bundletest.SetLocation(b, "presets.source_linked_deployment", []dyn.Location{{File: "databricks.yml"}}) + bundletest.SetLocation(b, "workspace.file_path", []dyn.Location{{File: "databricks.yml"}}) + + diags := bundle.Apply(tt.ctx, b, mutator.ApplySourceLinkedDeploymentPreset()) + if diags.HasError() && tt.expectedError == "" { + t.Fatalf("unexpected error: %v", diags) + } + + if tt.expectedWarning != "" { + require.Equal(t, tt.expectedWarning, diags[0].Summary) + require.NotEmpty(t, diags[0].Locations) + } + + if tt.expectedError != "" { + require.Equal(t, tt.expectedError, diags[0].Summary) + require.NotEmpty(t, diags[0].Locations) + } + + require.Equal(t, tt.expectedValue, b.Config.Presets.SourceLinkedDeployment) + }) + } +} diff --git a/bundle/config/mutator/capture_schema_dependency.go b/bundle/config/mutator/capture_schema_dependency.go new file mode 100644 index 000000000..5025c9a0d --- /dev/null +++ b/bundle/config/mutator/capture_schema_dependency.go @@ -0,0 +1,100 @@ +package mutator + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" +) + +type captureSchemaDependency struct{} + +// If a user defines a UC schema in the bundle, they can refer to it in DLT pipelines +// or UC Volumes using the `${resources.schemas..name}` syntax. Using this +// syntax allows TF to capture the deploy time dependency this DLT pipeline or UC Volume +// has on the schema and deploy changes to the schema before deploying the pipeline or volume. +// +// This mutator translates any implicit schema references in DLT pipelines or UC Volumes +// to the explicit syntax. +func CaptureSchemaDependency() bundle.Mutator { + return &captureSchemaDependency{} +} + +func (m *captureSchemaDependency) Name() string { + return "CaptureSchemaDependency" +} + +func schemaNameRef(key string) string { + return fmt.Sprintf("${resources.schemas.%s.name}", key) +} + +func findSchema(b *bundle.Bundle, catalogName, schemaName string) (string, *resources.Schema) { + if catalogName == "" || schemaName == "" { + return "", nil + } + + for k, s := range b.Config.Resources.Schemas { + if s != nil && s.CreateSchema != nil && s.CatalogName == catalogName && s.Name == schemaName { + return k, s + } + } + return "", nil +} + +func resolveVolume(v *resources.Volume, b *bundle.Bundle) { + if v == nil || v.CreateVolumeRequestContent == nil { + return + } + schemaK, schema := findSchema(b, v.CatalogName, v.SchemaName) + if schema == nil { + return + } + + v.SchemaName = schemaNameRef(schemaK) +} + +func resolvePipelineSchema(p *resources.Pipeline, b *bundle.Bundle) { + if p == nil || p.PipelineSpec == nil { + return + } + if p.Schema == "" { + return + } + schemaK, schema := findSchema(b, p.Catalog, p.Schema) + if schema == nil { + return + } + + p.Schema = schemaNameRef(schemaK) +} + +func resolvePipelineTarget(p *resources.Pipeline, b *bundle.Bundle) { + if p == nil || p.PipelineSpec == nil { + return + } + if p.Target == "" { + return + } + schemaK, schema := findSchema(b, p.Catalog, p.Target) + if schema == nil { + return + } + p.Target = schemaNameRef(schemaK) +} + +func (m *captureSchemaDependency) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + for _, p := range b.Config.Resources.Pipelines { + // "schema" and "target" have the same semantics in the DLT API but are mutually + // exclusive i.e. only one can be set at a time. If schema is set, the pipeline + // is in direct publishing mode and can write tables to multiple schemas + // (vs target which is limited to a single schema). + resolvePipelineTarget(p, b) + resolvePipelineSchema(p, b) + } + for _, v := range b.Config.Resources.Volumes { + resolveVolume(v, b) + } + return nil +} diff --git a/bundle/config/mutator/capture_schema_dependency_test.go b/bundle/config/mutator/capture_schema_dependency_test.go new file mode 100644 index 000000000..0a94e7748 --- /dev/null +++ b/bundle/config/mutator/capture_schema_dependency_test.go @@ -0,0 +1,277 @@ +package mutator + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCaptureSchemaDependencyForVolume(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Volumes: map[string]*resources.Volume{ + "volume1": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "foobar", + }, + }, + "volume2": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog2", + SchemaName: "foobar", + }, + }, + "volume3": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "barfoo", + }, + }, + "volume4": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalogX", + SchemaName: "foobar", + }, + }, + "volume5": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "schemaX", + }, + }, + "nilVolume": nil, + "emptyVolume": {}, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Volumes["volume1"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Volumes["volume2"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Volumes["volume3"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "foobar", b.Config.Resources.Volumes["volume4"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "schemaX", b.Config.Resources.Volumes["volume5"].CreateVolumeRequestContent.SchemaName) + + assert.Nil(t, b.Config.Resources.Volumes["nilVolume"]) + assert.Nil(t, b.Config.Resources.Volumes["emptyVolume"].CreateVolumeRequestContent) +} + +func TestCaptureSchemaDependencyForPipelinesWithTarget(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "foobar", + }, + }, + "pipeline2": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog2", + Schema: "foobar", + }, + }, + "pipeline3": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "barfoo", + }, + }, + "pipeline4": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalogX", + Schema: "foobar", + }, + }, + "pipeline5": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "schemaX", + }, + }, + "pipeline6": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Schema: "foobar", + }, + }, + "pipeline7": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Schema: "", + Name: "whatever", + }, + }, + "nilPipeline": nil, + "emptyPipeline": {}, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Pipelines["pipeline1"].Schema) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Pipelines["pipeline2"].Schema) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Pipelines["pipeline3"].Schema) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline4"].Schema) + assert.Equal(t, "schemaX", b.Config.Resources.Pipelines["pipeline5"].Schema) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline6"].Schema) + assert.Equal(t, "", b.Config.Resources.Pipelines["pipeline7"].Schema) + + assert.Nil(t, b.Config.Resources.Pipelines["nilPipeline"]) + assert.Nil(t, b.Config.Resources.Pipelines["emptyPipeline"].PipelineSpec) + + for _, k := range []string{"pipeline1", "pipeline2", "pipeline3", "pipeline4", "pipeline5", "pipeline6", "pipeline7"} { + assert.Empty(t, b.Config.Resources.Pipelines[k].Target) + } +} + +func TestCaptureSchemaDependencyForPipelinesWithSchema(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "foobar", + }, + }, + "pipeline2": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog2", + Target: "foobar", + }, + }, + "pipeline3": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "barfoo", + }, + }, + "pipeline4": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalogX", + Target: "foobar", + }, + }, + "pipeline5": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "schemaX", + }, + }, + "pipeline6": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Target: "foobar", + }, + }, + "pipeline7": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Target: "", + Name: "whatever", + }, + }, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Pipelines["pipeline1"].Target) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Pipelines["pipeline2"].Target) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Pipelines["pipeline3"].Target) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline4"].Target) + assert.Equal(t, "schemaX", b.Config.Resources.Pipelines["pipeline5"].Target) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline6"].Target) + assert.Equal(t, "", b.Config.Resources.Pipelines["pipeline7"].Target) + + for _, k := range []string{"pipeline1", "pipeline2", "pipeline3", "pipeline4", "pipeline5", "pipeline6", "pipeline7"} { + assert.Empty(t, b.Config.Resources.Pipelines[k].Schema) + } +} diff --git a/bundle/config/mutator/configure_dashboard_defaults_test.go b/bundle/config/mutator/configure_dashboard_defaults_test.go index 2234f9a73..9794d355c 100644 --- a/bundle/config/mutator/configure_dashboard_defaults_test.go +++ b/bundle/config/mutator/configure_dashboard_defaults_test.go @@ -109,19 +109,19 @@ func TestConfigureDashboardDefaultsEmbedCredentials(t *testing.T) { // Set to true; still true. v, err = dyn.Get(b.Config.Value(), "resources.dashboards.d1.embed_credentials") if assert.NoError(t, err) { - assert.Equal(t, true, v.MustBool()) + assert.True(t, v.MustBool()) } // Set to false; still false. v, err = dyn.Get(b.Config.Value(), "resources.dashboards.d2.embed_credentials") if assert.NoError(t, err) { - assert.Equal(t, false, v.MustBool()) + assert.False(t, v.MustBool()) } // Not set; now false. v, err = dyn.Get(b.Config.Value(), "resources.dashboards.d3.embed_credentials") if assert.NoError(t, err) { - assert.Equal(t, false, v.MustBool()) + assert.False(t, v.MustBool()) } // No valid dashboard; no change. diff --git a/bundle/config/mutator/default_queueing_test.go b/bundle/config/mutator/default_queueing_test.go index d3621663b..4c521812e 100644 --- a/bundle/config/mutator/default_queueing_test.go +++ b/bundle/config/mutator/default_queueing_test.go @@ -28,8 +28,8 @@ func TestDefaultQueueingApplyNoJobs(t *testing.T) { }, } d := bundle.Apply(context.Background(), b, DefaultQueueing()) - assert.Len(t, d, 0) - assert.Len(t, b.Config.Resources.Jobs, 0) + assert.Empty(t, d) + assert.Empty(t, b.Config.Resources.Jobs) } func TestDefaultQueueingApplyJobsAlreadyEnabled(t *testing.T) { @@ -47,7 +47,7 @@ func TestDefaultQueueingApplyJobsAlreadyEnabled(t *testing.T) { }, } d := bundle.Apply(context.Background(), b, DefaultQueueing()) - assert.Len(t, d, 0) + assert.Empty(t, d) assert.True(t, b.Config.Resources.Jobs["job"].Queue.Enabled) } @@ -66,7 +66,7 @@ func TestDefaultQueueingApplyEnableQueueing(t *testing.T) { }, } d := bundle.Apply(context.Background(), b, DefaultQueueing()) - assert.Len(t, d, 0) + assert.Empty(t, d) assert.NotNil(t, b.Config.Resources.Jobs["job"].Queue) assert.True(t, b.Config.Resources.Jobs["job"].Queue.Enabled) } @@ -96,7 +96,7 @@ func TestDefaultQueueingApplyWithMultipleJobs(t *testing.T) { }, } d := bundle.Apply(context.Background(), b, DefaultQueueing()) - assert.Len(t, d, 0) + assert.Empty(t, d) assert.False(t, b.Config.Resources.Jobs["job1"].Queue.Enabled) assert.True(t, b.Config.Resources.Jobs["job2"].Queue.Enabled) assert.True(t, b.Config.Resources.Jobs["job3"].Queue.Enabled) diff --git a/bundle/config/mutator/environments_compat_test.go b/bundle/config/mutator/environments_compat_test.go index 8a2129847..11facf9fb 100644 --- a/bundle/config/mutator/environments_compat_test.go +++ b/bundle/config/mutator/environments_compat_test.go @@ -44,7 +44,7 @@ func TestEnvironmentsToTargetsWithEnvironmentsDefined(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.EnvironmentsToTargets()) require.NoError(t, diags.Error()) - assert.Len(t, b.Config.Environments, 0) + assert.Empty(t, b.Config.Environments) assert.Len(t, b.Config.Targets, 1) } @@ -61,6 +61,6 @@ func TestEnvironmentsToTargetsWithTargetsDefined(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.EnvironmentsToTargets()) require.NoError(t, diags.Error()) - assert.Len(t, b.Config.Environments, 0) + assert.Empty(t, b.Config.Environments) assert.Len(t, b.Config.Targets, 1) } diff --git a/bundle/config/mutator/expand_workspace_root.go b/bundle/config/mutator/expand_workspace_root.go index a29d129b0..2ec70548f 100644 --- a/bundle/config/mutator/expand_workspace_root.go +++ b/bundle/config/mutator/expand_workspace_root.go @@ -2,7 +2,6 @@ package mutator import ( "context" - "fmt" "path" "strings" @@ -33,7 +32,7 @@ func (m *expandWorkspaceRoot) Apply(ctx context.Context, b *bundle.Bundle) diag. } if strings.HasPrefix(root, "~/") { - home := fmt.Sprintf("/Workspace/Users/%s", currentUser.UserName) + home := "/Workspace/Users/" + currentUser.UserName b.Config.Workspace.RootPath = path.Join(home, root[2:]) } diff --git a/bundle/config/mutator/load_git_details.go b/bundle/config/mutator/load_git_details.go index 5c263ac03..3661c6bcd 100644 --- a/bundle/config/mutator/load_git_details.go +++ b/bundle/config/mutator/load_git_details.go @@ -32,7 +32,7 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn } if info.WorktreeRoot == "" { - b.WorktreeRoot = b.BundleRoot + b.WorktreeRoot = b.SyncRoot } else { b.WorktreeRoot = vfs.MustNew(info.WorktreeRoot) } diff --git a/bundle/config/mutator/merge_apps.go b/bundle/config/mutator/merge_apps.go new file mode 100644 index 000000000..d91e8dd7f --- /dev/null +++ b/bundle/config/mutator/merge_apps.go @@ -0,0 +1,45 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/merge" +) + +type mergeApps struct{} + +func MergeApps() bundle.Mutator { + return &mergeApps{} +} + +func (m *mergeApps) Name() string { + return "MergeApps" +} + +func (m *mergeApps) resourceName(v dyn.Value) string { + switch v.Kind() { + case dyn.KindInvalid, dyn.KindNil: + return "" + case dyn.KindString: + return v.MustString() + default: + panic("app name must be a string") + } +} + +func (m *mergeApps) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + if v.Kind() == dyn.KindNil { + return v, nil + } + + return dyn.Map(v, "resources.apps", dyn.Foreach(func(_ dyn.Path, app dyn.Value) (dyn.Value, error) { + return dyn.Map(app, "resources", merge.ElementsByKeyWithOverride("name", m.resourceName)) + })) + }) + + return diag.FromErr(err) +} diff --git a/bundle/config/mutator/merge_apps_test.go b/bundle/config/mutator/merge_apps_test.go new file mode 100644 index 000000000..0a161b845 --- /dev/null +++ b/bundle/config/mutator/merge_apps_test.go @@ -0,0 +1,73 @@ +package mutator_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/assert" +) + +func TestMergeApps(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "foo": { + App: &apps.App{ + Name: "foo", + Resources: []apps.AppResource{ + { + Name: "job1", + Job: &apps.AppResourceJob{ + Id: "1234", + Permission: "CAN_MANAGE_RUN", + }, + }, + { + Name: "sql1", + SqlWarehouse: &apps.AppResourceSqlWarehouse{ + Id: "5678", + Permission: "CAN_USE", + }, + }, + { + Name: "job1", + Job: &apps.AppResourceJob{ + Id: "1234", + Permission: "CAN_MANAGE", + }, + }, + { + Name: "sql1", + Job: &apps.AppResourceJob{ + Id: "9876", + Permission: "CAN_MANAGE", + }, + }, + }, + }, + }, + }, + }, + }, + } + + diags := bundle.Apply(context.Background(), b, mutator.MergeApps()) + assert.NoError(t, diags.Error()) + + j := b.Config.Resources.Apps["foo"] + + assert.Len(t, j.Resources, 2) + assert.Equal(t, "job1", j.Resources[0].Name) + assert.Equal(t, "sql1", j.Resources[1].Name) + + assert.Equal(t, "CAN_MANAGE", string(j.Resources[0].Job.Permission)) + + assert.Nil(t, j.Resources[1].SqlWarehouse) + assert.Equal(t, "CAN_MANAGE", string(j.Resources[1].Job.Permission)) +} diff --git a/bundle/config/mutator/merge_job_tasks_test.go b/bundle/config/mutator/merge_job_tasks_test.go index a9dae1e10..e6675eefb 100644 --- a/bundle/config/mutator/merge_job_tasks_test.go +++ b/bundle/config/mutator/merge_job_tasks_test.go @@ -74,8 +74,8 @@ func TestMergeJobTasks(t *testing.T) { assert.Equal(t, "i3.2xlarge", cluster.NodeTypeId) assert.Equal(t, 4, cluster.NumWorkers) assert.Len(t, task0.Libraries, 2) - assert.Equal(t, task0.Libraries[0].Whl, "package1") - assert.Equal(t, task0.Libraries[1].Pypi.Package, "package2") + assert.Equal(t, "package1", task0.Libraries[0].Whl) + assert.Equal(t, "package2", task0.Libraries[1].Pypi.Package) // This task was left untouched. task1 := j.Tasks[1].NewCluster diff --git a/bundle/config/mutator/prepend_workspace_prefix.go b/bundle/config/mutator/prepend_workspace_prefix.go index b093ec26a..616759ee4 100644 --- a/bundle/config/mutator/prepend_workspace_prefix.go +++ b/bundle/config/mutator/prepend_workspace_prefix.go @@ -55,7 +55,7 @@ func (m *prependWorkspacePrefix) Apply(ctx context.Context, b *bundle.Bundle) di } } - return dyn.NewValue(fmt.Sprintf("/Workspace%s", path), v.Locations()), nil + return dyn.NewValue("/Workspace"+path, v.Locations()), nil }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index df0136fad..0fe6bd54f 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -2,11 +2,11 @@ package mutator import ( "context" + "fmt" "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/iamutil" @@ -58,14 +58,6 @@ func transformDevelopmentMode(ctx context.Context, b *bundle.Bundle) { t.TriggerPauseStatus = config.Paused } - if !config.IsExplicitlyDisabled(t.SourceLinkedDeployment) { - isInWorkspace := strings.HasPrefix(b.SyncRootPath, "/Workspace/") - if isInWorkspace && dbr.RunsOnRuntime(ctx) { - enabled := true - t.SourceLinkedDeployment = &enabled - } - } - if !config.IsExplicitlyDisabled(t.PipelinesDevelopment) { enabled := true t.PipelinesDevelopment = &enabled @@ -155,8 +147,21 @@ func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUs } } - if !isPrincipalUsed && !isRunAsSet(r) { - return diag.Errorf("'run_as' must be set for all jobs when using 'mode: production'") + // We need to verify that there is only a single deployment of the current target. + // The best way to enforce this is to explicitly set root_path. + advice := fmt.Sprintf( + "set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/%s/.bundle/${bundle.name}/${bundle.target}", + b.Config.Workspace.CurrentUser.UserName, + ) + if !isExplicitRootSet(b) { + if isRunAsSet(r) || isPrincipalUsed { + // Just setting run_as is not enough to guarantee a single deployment, + // and neither is setting a principal. + // We only show a warning for these cases since we didn't historically + // report an error for them. + return diag.Recommendationf("target with 'mode: production' should %s", advice) + } + return diag.Errorf("target with 'mode: production' must %s", advice) } return nil } @@ -173,6 +178,10 @@ func isRunAsSet(r config.Resources) bool { return true } +func isExplicitRootSet(b *bundle.Bundle) bool { + return b.Target != nil && b.Target.Workspace != nil && b.Target.Workspace.RootPath != "" +} + func (m *processTargetMode) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { switch b.Config.Bundle.Mode { case config.Development: diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 14d524416..723b01ee3 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -3,18 +3,17 @@ package mutator import ( "context" "reflect" - "runtime" "slices" "testing" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/tags" "github.com/databricks/cli/libs/vfs" sdkconfig "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/service/apps" "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/dashboards" @@ -144,6 +143,13 @@ func mockBundle(mode config.Mode) *bundle.Bundle { }, }, }, + Apps: map[string]*resources.App{ + "app1": { + App: &apps.App{ + Name: "app1", + }, + }, + }, }, }, SyncRoot: vfs.MustNew("/Users/lennart.kats@databricks.com"), @@ -163,18 +169,18 @@ func TestProcessTargetModeDevelopment(t *testing.T) { // Job 1 assert.Equal(t, "[dev lennart] job1", b.Config.Resources.Jobs["job1"].Name) - assert.Equal(t, b.Config.Resources.Jobs["job1"].Tags["existing"], "tag") - assert.Equal(t, b.Config.Resources.Jobs["job1"].Tags["dev"], "lennart") - assert.Equal(t, b.Config.Resources.Jobs["job1"].Schedule.PauseStatus, jobs.PauseStatusPaused) + assert.Equal(t, "tag", b.Config.Resources.Jobs["job1"].Tags["existing"]) + assert.Equal(t, "lennart", b.Config.Resources.Jobs["job1"].Tags["dev"]) + assert.Equal(t, jobs.PauseStatusPaused, b.Config.Resources.Jobs["job1"].Schedule.PauseStatus) // Job 2 assert.Equal(t, "[dev lennart] job2", b.Config.Resources.Jobs["job2"].Name) - assert.Equal(t, b.Config.Resources.Jobs["job2"].Tags["dev"], "lennart") - assert.Equal(t, b.Config.Resources.Jobs["job2"].Schedule.PauseStatus, jobs.PauseStatusUnpaused) + assert.Equal(t, "lennart", b.Config.Resources.Jobs["job2"].Tags["dev"]) + assert.Equal(t, jobs.PauseStatusUnpaused, b.Config.Resources.Jobs["job2"].Schedule.PauseStatus) // Pipeline 1 assert.Equal(t, "[dev lennart] pipeline1", b.Config.Resources.Pipelines["pipeline1"].Name) - assert.Equal(t, false, b.Config.Resources.Pipelines["pipeline1"].Continuous) + assert.False(t, b.Config.Resources.Pipelines["pipeline1"].Continuous) assert.True(t, b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) // Experiment 1 @@ -323,7 +329,7 @@ func TestProcessTargetModeProduction(t *testing.T) { b := mockBundle(config.Production) diags := validateProductionMode(context.Background(), b, false) - require.ErrorContains(t, diags.Error(), "run_as") + require.ErrorContains(t, diags.Error(), "target with 'mode: production' must set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}") b.Config.Workspace.StatePath = "/Shared/.bundle/x/y/state" b.Config.Workspace.ArtifactPath = "/Shared/.bundle/x/y/artifacts" @@ -331,7 +337,7 @@ func TestProcessTargetModeProduction(t *testing.T) { b.Config.Workspace.ResourcePath = "/Shared/.bundle/x/y/resources" diags = validateProductionMode(context.Background(), b, false) - require.ErrorContains(t, diags.Error(), "production") + require.ErrorContains(t, diags.Error(), "target with 'mode: production' must set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}") permissions := []resources.Permission{ { @@ -377,12 +383,29 @@ func TestProcessTargetModeProductionOkForPrincipal(t *testing.T) { require.NoError(t, diags.Error()) } +func TestProcessTargetModeProductionOkWithRootPath(t *testing.T) { + b := mockBundle(config.Production) + + // Our target has all kinds of problems when not using service principals ... + diags := validateProductionMode(context.Background(), b, false) + require.Error(t, diags.Error()) + + // ... but we're okay if we specify a root path + b.Target = &config.Target{ + Workspace: &config.Workspace{ + RootPath: "some-root-path", + }, + } + diags = validateProductionMode(context.Background(), b, false) + require.NoError(t, diags.Error()) +} + // Make sure that we have test coverage for all resource types func TestAllResourcesMocked(t *testing.T) { b := mockBundle(config.Development) resources := reflect.ValueOf(b.Config.Resources) - for i := 0; i < resources.NumField(); i++ { + for i := range resources.NumField() { field := resources.Field(i) if field.Kind() == reflect.Map { assert.True( @@ -411,13 +434,20 @@ func TestAllNonUcResourcesAreRenamed(t *testing.T) { require.NoError(t, diags.Error()) resources := reflect.ValueOf(b.Config.Resources) - for i := 0; i < resources.NumField(); i++ { + for i := range resources.NumField() { field := resources.Field(i) if field.Kind() == reflect.Map { for _, key := range field.MapKeys() { resource := field.MapIndex(key) nameField := resource.Elem().FieldByName("Name") + resourceType := resources.Type().Field(i).Name + + // Skip apps, as they are not renamed + if resourceType == "Apps" { + continue + } + if !nameField.IsValid() || nameField.Kind() != reflect.String { continue } @@ -540,32 +570,3 @@ func TestPipelinesDevelopmentDisabled(t *testing.T) { assert.False(t, b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) } - -func TestSourceLinkedDeploymentEnabled(t *testing.T) { - b, diags := processSourceLinkedBundle(t, true) - require.NoError(t, diags.Error()) - assert.True(t, *b.Config.Presets.SourceLinkedDeployment) -} - -func TestSourceLinkedDeploymentDisabled(t *testing.T) { - b, diags := processSourceLinkedBundle(t, false) - require.NoError(t, diags.Error()) - assert.False(t, *b.Config.Presets.SourceLinkedDeployment) -} - -func processSourceLinkedBundle(t *testing.T, presetEnabled bool) (*bundle.Bundle, diag.Diagnostics) { - if runtime.GOOS == "windows" { - t.Skip("this test is not applicable on Windows because source-linked mode works only in the Databricks Workspace") - } - - b := mockBundle(config.Development) - - workspacePath := "/Workspace/lennart@company.com/" - b.SyncRootPath = workspacePath - b.Config.Presets.SourceLinkedDeployment = &presetEnabled - - ctx := dbr.MockRuntime(context.Background(), true) - m := bundle.Seq(ProcessTargetMode(), ApplyPresets()) - diags := bundle.Apply(ctx, b, m) - return b, diags -} diff --git a/bundle/config/mutator/python/python_diagnostics.go b/bundle/config/mutator/python/python_diagnostics.go index 12822065b..7a1e13b4e 100644 --- a/bundle/config/mutator/python/python_diagnostics.go +++ b/bundle/config/mutator/python/python_diagnostics.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/libs/dyn" ) +// pythonDiagnostic is a single entry in diagnostics.json type pythonDiagnostic struct { Severity pythonSeverity `json:"severity"` Summary string `json:"summary"` diff --git a/bundle/config/mutator/python/python_locations.go b/bundle/config/mutator/python/python_locations.go new file mode 100644 index 000000000..2fa86bea0 --- /dev/null +++ b/bundle/config/mutator/python/python_locations.go @@ -0,0 +1,194 @@ +package python + +import ( + "encoding/json" + "fmt" + "io" + "path/filepath" + + "github.com/databricks/cli/libs/dyn" +) + +// generatedFileName is used as the virtual file name for YAML generated by Python code. +// +// mergePythonLocations replaces dyn.Location with generatedFileName with locations loaded +// from locations.json +const generatedFileName = "__generated_by_python__.yml" + +// pythonLocations is data structure for efficient location lookup for a given path +// +// Locations form a tree, and we assign locations of the closest ancestor to each dyn.Value based on its path. +// We implement it as a trie (prefix tree) where keys are components of the path. With that, lookups are O(n) +// where n is the number of components in the path. +// +// For example, with locations.json: +// +// {"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5} +// {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5} +// {"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7} +// +// - resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5 +// +// - resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5, +// because we use the location of the job as the most precise approximation. +// +// See pythonLocationEntry for the structure of a single entry in locations.json +type pythonLocations struct { + // descendants referenced by index, e.g. '.foo' + keys map[string]*pythonLocations + + // descendants referenced by key, e.g. '[0]' + indexes map[int]*pythonLocations + + // location for the current node if it exists + location dyn.Location + + // if true, location is present + exists bool +} + +// pythonLocationEntry is a single entry in locations.json +type pythonLocationEntry struct { + Path string `json:"path"` + File string `json:"file"` + Line int `json:"line"` + Column int `json:"column"` +} + +// mergePythonLocations applies locations from Python mutator into given dyn.Value +// +// The primary use-case is to merge locations.json with output.json, so that any +// validation errors will point to Python source code instead of generated YAML. +func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) { + return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) { + newLocation, ok := findPythonLocation(locations, path) + if !ok { + return value, nil + } + + // The first item in the list is the "last" location used for error reporting + // + // Loaded YAML uses virtual file path as location, we remove any of such references, + // because they should use 'newLocation' instead. + // + // We preserve any previous non-virtual locations in case when Python function modified + // resource defined in YAML. + newLocations := append( + []dyn.Location{newLocation}, + removeVirtualLocations(value.Locations())..., + ) + + return value.WithLocations(newLocations), nil + }) +} + +func removeVirtualLocations(locations []dyn.Location) []dyn.Location { + var newLocations []dyn.Location + + for _, location := range locations { + if filepath.Base(location.File) == generatedFileName { + continue + } + + newLocations = append(newLocations, location) + } + + return newLocations +} + +// parsePythonLocations parses locations.json from the Python mutator. +// +// locations file is newline-separated JSON objects with pythonLocationEntry structure. +func parsePythonLocations(input io.Reader) (*pythonLocations, error) { + decoder := json.NewDecoder(input) + locations := newPythonLocations() + + for decoder.More() { + var entry pythonLocationEntry + + err := decoder.Decode(&entry) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + path, err := dyn.NewPathFromString(entry.Path) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + location := dyn.Location{ + File: entry.File, + Line: entry.Line, + Column: entry.Column, + } + + putPythonLocation(locations, path, location) + } + + return locations, nil +} + +// putPythonLocation puts the location to the trie for the given path +func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) { + currentNode := trie + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + currentNode.keys[key] = newPythonLocations() + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + currentNode.indexes[index] = newPythonLocations() + } + + currentNode = currentNode.indexes[index] + } + } + + currentNode.location = location + currentNode.exists = true +} + +// newPythonLocations creates a new trie node +func newPythonLocations() *pythonLocations { + return &pythonLocations{ + keys: make(map[string]*pythonLocations), + indexes: make(map[int]*pythonLocations), + } +} + +// findPythonLocation finds the location or closest ancestor location in the trie for the given path +// if no ancestor or exact location is found, false is returned. +func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) { + currentNode := locations + lastLocation := locations.location + exists := locations.exists + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + break + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + break + } + + currentNode = currentNode.indexes[index] + } + + if currentNode.exists { + lastLocation = currentNode.location + exists = true + } + } + + return lastLocation, exists +} diff --git a/bundle/config/mutator/python/python_locations_test.go b/bundle/config/mutator/python/python_locations_test.go new file mode 100644 index 000000000..32afcc92b --- /dev/null +++ b/bundle/config/mutator/python/python_locations_test.go @@ -0,0 +1,179 @@ +package python + +import ( + "bytes" + "path/filepath" + "testing" + + "github.com/databricks/cli/libs/diag" + "github.com/stretchr/testify/require" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestMergeLocations(t *testing.T) { + pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1} + generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1} + yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation) + + input := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + "baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}), + "qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}), + }, + []dyn.Location{}, + ), + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + expected := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + // pythonLocation is appended to the beginning of the list if absent + "baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + // generatedLocation is replaced by pythonLocation + "qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + }, + []dyn.Location{pythonLocation}, + ), + // if location is unknown, we keep it as-is + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + actual, err := mergePythonLocations(input, locations) + + assert.NoError(t, err) + assert.Equal(t, expected, actual) +} + +func TestFindLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_indexPathComponent(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]")) + + assert.True(t, exists) + assert.Equal(t, location2, actual) +} + +func TestFindLocation_closestAncestorLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_unknownLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + _, exists := findPythonLocation(locations, dyn.MustPathFromString("bar")) + + assert.False(t, exists) +} + +func TestLoadOutput(t *testing.T) { + location := dyn.Location{File: "my_job.py", Line: 1, Column: 1} + bundleRoot := t.TempDir() + output := `{ + "resources": { + "jobs": { + "my_job": { + "name": "my_job", + "tasks": [ + { + "task_key": "my_task", + "notebook_task": { + "notebook_path": "my_notebook" + } + } + ] + } + } + } + }` + + locations := newPythonLocations() + putPythonLocation( + locations, + dyn.MustPathFromString("resources.jobs.my_job"), + location, + ) + + value, diags := loadOutput( + bundleRoot, + bytes.NewReader([]byte(output)), + locations, + ) + + assert.Equal(t, diag.Diagnostics{}, diags) + + name, err := dyn.Get(value, "resources.jobs.my_job.name") + require.NoError(t, err) + require.Equal(t, []dyn.Location{location}, name.Locations()) + + // until we implement path normalization, we have to keep locations of values + // that change semantic depending on their location + // + // note: it's important to have absolute path including 'bundleRoot' + // because mutator pipeline already has expanded locations into absolute path + notebookPath, err := dyn.Get(value, "resources.jobs.my_job.tasks[0].notebook_task.notebook_path") + require.NoError(t, err) + require.Len(t, notebookPath.Locations(), 1) + require.Equal(t, filepath.Join(bundleRoot, generatedFileName), notebookPath.Locations()[0].File) +} + +func TestParsePythonLocations(t *testing.T) { + expected := dyn.Location{File: "foo.py", Line: 1, Column: 2} + + input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}` + reader := bytes.NewReader([]byte(input)) + locations, err := parsePythonLocations(reader) + + assert.NoError(t, err) + + assert.True(t, locations.keys["foo"].exists) + assert.Equal(t, expected, locations.keys["foo"].location) +} diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index 69c1a5dd6..cd2e286e5 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -7,10 +7,14 @@ import ( "errors" "fmt" "io" + "io/fs" "os" "path/filepath" + "reflect" "strings" + "github.com/databricks/cli/bundle/config/mutator/paths" + "github.com/databricks/databricks-sdk-go/logger" "github.com/fatih/color" @@ -40,6 +44,8 @@ const ( // We also open for possibility of appending other sections of bundle configuration, // for example, adding new variables. However, this is not supported yet, and CLI rejects // such changes. + // + // Deprecated, left for backward-compatibility with PyDABs. PythonMutatorPhaseLoad phase = "load" // PythonMutatorPhaseInit is the phase after bundle configuration was loaded, and @@ -59,7 +65,46 @@ const ( // PyDABs can output YAML containing references to variables, and CLI should resolve them. // // Existing resources can't be removed, and CLI rejects such changes. + // + // Deprecated, left for backward-compatibility with PyDABs. PythonMutatorPhaseInit phase = "init" + + // PythonMutatorPhaseLoadResources is the phase in which YAML configuration was loaded. + // + // At this stage, we execute Python code to load resources defined in Python. + // + // During this process, Python code can access: + // - selected deployment target + // - bundle variable values + // - variables provided through CLI argument or environment variables + // + // The following is not available: + // - variables referencing other variables are in unresolved format + // + // Python code can output YAML referencing variables, and CLI should resolve them. + // + // Existing resources can't be removed or modified, and CLI rejects such changes. + // While it's called 'load_resources', this phase is executed in 'init' phase of mutator pipeline. + PythonMutatorPhaseLoadResources phase = "load_resources" + + // PythonMutatorPhaseApplyMutators is the phase in which resources defined in YAML or Python + // are already loaded. + // + // At this stage, we execute Python code to mutate resources defined in YAML or Python. + // + // During this process, Python code can access: + // - selected deployment target + // - bundle variable values + // - variables provided through CLI argument or environment variables + // + // The following is not available: + // - variables referencing other variables are in unresolved format + // + // Python code can output YAML referencing variables, and CLI should resolve them. + // + // Resources can't be added or removed, and CLI rejects such changes. Python code is + // allowed to modify existing resources, but not other parts of bundle configuration. + PythonMutatorPhaseApplyMutators phase = "apply_mutators" ) type pythonMutator struct { @@ -76,18 +121,75 @@ func (m *pythonMutator) Name() string { return fmt.Sprintf("PythonMutator(%s)", m.phase) } -func getExperimental(b *bundle.Bundle) config.Experimental { - if b.Config.Experimental == nil { - return config.Experimental{} +// opts is a common structure for deprecated PyDABs and upcoming Python +// configuration sections +type opts struct { + enabled bool + + venvPath string + + loadLocations bool +} + +type runPythonMutatorOpts struct { + cacheDir string + bundleRootPath string + pythonPath string + loadLocations bool +} + +// getOpts adapts deprecated PyDABs and upcoming Python configuration +// into a common structure. +func getOpts(b *bundle.Bundle, phase phase) (opts, error) { + experimental := b.Config.Experimental + if experimental == nil { + return opts{}, nil } - return *b.Config.Experimental + // using reflect.DeepEquals in case we add more fields + pydabsEnabled := !reflect.DeepEqual(experimental.PyDABs, config.PyDABs{}) + pythonEnabled := !reflect.DeepEqual(experimental.Python, config.Python{}) + + if pydabsEnabled && pythonEnabled { + return opts{}, errors.New("both experimental/pydabs and experimental/python are enabled, only one can be enabled") + } else if pydabsEnabled { + if !experimental.PyDABs.Enabled { + return opts{}, nil + } + + // don't execute for phases for 'python' section + if phase == PythonMutatorPhaseInit || phase == PythonMutatorPhaseLoad { + return opts{ + enabled: true, + venvPath: experimental.PyDABs.VEnvPath, + loadLocations: false, // not supported in PyDABs + }, nil + } else { + return opts{}, nil + } + } else if pythonEnabled { + // don't execute for phases for 'pydabs' section + if phase == PythonMutatorPhaseLoadResources || phase == PythonMutatorPhaseApplyMutators { + return opts{ + enabled: true, + venvPath: experimental.Python.VEnvPath, + loadLocations: true, + }, nil + } else { + return opts{}, nil + } + } else { + return opts{}, nil + } } func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - experimental := getExperimental(b) + opts, err := getOpts(b, m.phase) + if err != nil { + return diag.Errorf("failed to apply python mutator: %s", err) + } - if !experimental.PyDABs.Enabled { + if !opts.enabled { return nil } @@ -95,8 +197,8 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno var mutateDiags diag.Diagnostics mutateDiagsHasError := errors.New("unexpected error") - err := b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) { - pythonPath, err := detectExecutable(ctx, experimental.PyDABs.VEnvPath) + err = b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) { + pythonPath, err := detectExecutable(ctx, opts.venvPath) if err != nil { return dyn.InvalidValue, fmt.Errorf("failed to get Python interpreter path: %w", err) } @@ -106,7 +208,12 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err) } - rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.BundleRootPath, pythonPath, leftRoot) + rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{ + cacheDir: cacheDir, + bundleRootPath: b.BundleRootPath, + pythonPath: pythonPath, + loadLocations: opts.loadLocations, + }) mutateDiags = diags if diags.HasError() { return dyn.InvalidValue, mutateDiagsHasError @@ -137,7 +244,7 @@ func createCacheDir(ctx context.Context) (string, error) { // support the same env variable as in b.CacheDir if tempDir, exists := env.TempDir(ctx); exists { // use 'default' as target name - cacheDir := filepath.Join(tempDir, "default", "pydabs") + cacheDir := filepath.Join(tempDir, "default", "python") err := os.MkdirAll(cacheDir, 0o700) if err != nil { @@ -147,16 +254,17 @@ func createCacheDir(ctx context.Context) (string, error) { return cacheDir, nil } - return os.MkdirTemp("", "-pydabs") + return os.MkdirTemp("", "-python") } -func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) { - inputPath := filepath.Join(cacheDir, "input.json") - outputPath := filepath.Join(cacheDir, "output.json") - diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") +func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) { + inputPath := filepath.Join(opts.cacheDir, "input.json") + outputPath := filepath.Join(opts.cacheDir, "output.json") + diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json") + locationsPath := filepath.Join(opts.cacheDir, "locations.json") args := []string{ - pythonPath, + opts.pythonPath, "-m", "databricks.bundles.build", "--phase", @@ -169,6 +277,10 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath diagnosticsPath, } + if opts.loadLocations { + args = append(args, "--locations", locationsPath) + } + if err := writeInputFile(inputPath, root); err != nil { return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) } @@ -183,7 +295,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath _, processErr := process.Background( ctx, args, - process.WithDir(rootPath), + process.WithDir(opts.bundleRootPath), process.WithStderrWriter(stderrWriter), process.WithStdoutWriter(stdoutWriter), ) @@ -203,7 +315,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath } // process can fail without reporting errors in diagnostics file or creating it, for instance, - // venv doesn't have PyDABs library installed + // venv doesn't have 'databricks-bundles' library installed if processErr != nil { diagnostic := diag.Diagnostic{ Severity: diag.Error, @@ -219,23 +331,27 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) } - output, outputDiags := loadOutputFile(rootPath, outputPath) + locations, err := loadLocationsFile(locationsPath) + if err != nil { + return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err) + } + + output, outputDiags := loadOutputFile(opts.bundleRootPath, outputPath, locations) pythonDiagnostics = pythonDiagnostics.Extend(outputDiags) // we pass through pythonDiagnostic because it contains warnings return output, pythonDiagnostics } -const installExplanation = `If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, -and that the wheel is installed in the Python environment: +const pythonInstallExplanation = `Ensure that 'databricks-bundles' is installed in Python environment: - $ .venv/bin/pip install -e . + $ .venv/bin/pip install databricks-bundles If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, or activate the environment before running CLI commands: experimental: - pydabs: + python: venv_path: .venv ` @@ -245,9 +361,9 @@ or activate the environment before running CLI commands: func explainProcessErr(stderr string) string { // implemented in cpython/Lib/runpy.py and portable across Python 3.x, including pypy if strings.Contains(stderr, "Error while finding module specification for 'databricks.bundles.build'") { - summary := color.CyanString("Explanation: ") + "'databricks-pydabs' library is not installed in the Python environment.\n" + summary := color.CyanString("Explanation: ") + "'databricks-bundles' library is not installed in the Python environment.\n" - return stderr + "\n" + summary + "\n" + installExplanation + return stderr + "\n" + summary + "\n" + pythonInstallExplanation } return stderr @@ -264,7 +380,21 @@ func writeInputFile(inputPath string, input dyn.Value) error { return os.WriteFile(inputPath, rootConfigJson, 0o600) } -func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) { +// loadLocationsFile loads locations.json containing source locations for generated YAML. +func loadLocationsFile(locationsPath string) (*pythonLocations, error) { + locationsFile, err := os.Open(locationsPath) + if errors.Is(err, fs.ErrNotExist) { + return newPythonLocations(), nil + } else if err != nil { + return nil, fmt.Errorf("failed to open locations file: %w", err) + } + + defer locationsFile.Close() + + return parsePythonLocations(locationsFile) +} + +func loadOutputFile(rootPath, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) { outputFile, err := os.Open(outputPath) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err)) @@ -272,15 +402,19 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) { defer outputFile.Close() + return loadOutput(rootPath, outputFile, locations) +} + +func loadOutput(rootPath string, outputFile io.Reader, locations *pythonLocations) (dyn.Value, diag.Diagnostics) { // we need absolute path because later parts of pipeline assume all paths are absolute // and this file will be used as location to resolve relative paths. // - // virtualPath has to stay in rootPath, because locations outside root path are not allowed: + // virtualPath has to stay in bundleRootPath, because locations outside root path are not allowed: // - // Error: path /var/folders/.../pydabs/dist/*.whl is not contained in bundle root path + // Error: path /var/folders/.../python/dist/*.whl is not contained in bundle root path // // for that, we pass virtualPath instead of outputPath as file location - virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_pydabs__.yml")) + virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName)) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err)) } @@ -290,7 +424,29 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err)) } - return strictNormalize(config.Root{}, generated) + // paths are resolved relative to locations of their values, if we change location + // we have to update each path, until we simplify that, we don't update locations + // for such values, so we don't change how paths are resolved + // + // we can remove this once we: + // - add variable interpolation before and after PythonMutator + // - implement path normalization (aka path normal form) + _, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) { + putPythonLocation(locations, p, v.Location()) + return v, nil + }) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + // generated has dyn.Location as if it comes from generated YAML file + // earlier we loaded locations.json with source locations in Python code + generatedWithLocations, err := mergePythonLocations(generated, locations) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + return strictNormalize(config.Root{}, generatedWithLocations) } func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) { @@ -334,19 +490,23 @@ func loadDiagnosticsFile(path string) (diag.Diagnostics, error) { func createOverrideVisitor(ctx context.Context, phase phase) (merge.OverrideVisitor, error) { switch phase { case PythonMutatorPhaseLoad: - return createLoadOverrideVisitor(ctx), nil + return createLoadResourcesOverrideVisitor(ctx), nil case PythonMutatorPhaseInit: - return createInitOverrideVisitor(ctx), nil + return createInitOverrideVisitor(ctx, insertResourceModeAllow), nil + case PythonMutatorPhaseLoadResources: + return createLoadResourcesOverrideVisitor(ctx), nil + case PythonMutatorPhaseApplyMutators: + return createInitOverrideVisitor(ctx, insertResourceModeDisallow), nil default: return merge.OverrideVisitor{}, fmt.Errorf("unknown phase: %s", phase) } } -// createLoadOverrideVisitor creates an override visitor for the load phase. +// createLoadResourcesOverrideVisitor creates an override visitor for the load_resources phase. // -// During load, it's only possible to create new resources, and not modify or +// During load_resources, it's only possible to create new resources, and not modify or // delete existing ones. -func createLoadOverrideVisitor(ctx context.Context) merge.OverrideVisitor { +func createLoadResourcesOverrideVisitor(ctx context.Context) merge.OverrideVisitor { resourcesPath := dyn.NewPath(dyn.Key("resources")) jobsPath := dyn.NewPath(dyn.Key("resources"), dyn.Key("jobs")) @@ -385,11 +545,21 @@ func createLoadOverrideVisitor(ctx context.Context) merge.OverrideVisitor { } } +// insertResourceMode controls whether createInitOverrideVisitor allows or disallows inserting new resources. +type insertResourceMode int + +const ( + insertResourceModeDisallow insertResourceMode = iota + insertResourceModeAllow insertResourceMode = iota +) + // createInitOverrideVisitor creates an override visitor for the init phase. // // During the init phase it's possible to create new resources, modify existing // resources, but not delete existing resources. -func createInitOverrideVisitor(ctx context.Context) merge.OverrideVisitor { +// +// If mode is insertResourceModeDisallow, it matching expected behaviour of apply_mutators +func createInitOverrideVisitor(ctx context.Context, mode insertResourceMode) merge.OverrideVisitor { resourcesPath := dyn.NewPath(dyn.Key("resources")) jobsPath := dyn.NewPath(dyn.Key("resources"), dyn.Key("jobs")) @@ -424,6 +594,11 @@ func createInitOverrideVisitor(ctx context.Context) merge.OverrideVisitor { return dyn.InvalidValue, fmt.Errorf("unexpected change at %q (insert)", valuePath.String()) } + insertResource := len(valuePath) == len(jobsPath)+1 + if mode == insertResourceModeDisallow && insertResource { + return dyn.InvalidValue, fmt.Errorf("unexpected change at %q (insert)", valuePath.String()) + } + log.Debugf(ctx, "Insert value at %q", valuePath.String()) return right, nil @@ -441,9 +616,9 @@ func createInitOverrideVisitor(ctx context.Context) merge.OverrideVisitor { } func isOmitemptyDelete(left dyn.Value) bool { - // PyDABs can omit empty sequences/mappings in output, because we don't track them as optional, + // Python output can omit empty sequences/mappings, because we don't track them as optional, // there is no semantic difference between empty and missing, so we keep them as they were before - // PyDABs deleted them. + // Python mutator deleted them. switch left.Kind() { case dyn.KindMap: diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index 8bdf91d03..322fb79e8 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -2,11 +2,11 @@ package python import ( "context" + "errors" "fmt" "os" "os/exec" "path/filepath" - "reflect" "runtime" "testing" @@ -39,13 +39,25 @@ func TestPythonMutator_Name_init(t *testing.T) { assert.Equal(t, "PythonMutator(init)", mutator.Name()) } -func TestPythonMutator_load(t *testing.T) { +func TestPythonMutator_Name_loadResources(t *testing.T) { + mutator := PythonMutator(PythonMutatorPhaseLoadResources) + + assert.Equal(t, "PythonMutator(load_resources)", mutator.Name()) +} + +func TestPythonMutator_Name_applyMutators(t *testing.T) { + mutator := PythonMutator(PythonMutatorPhaseApplyMutators) + + assert.Equal(t, "PythonMutator(apply_mutators)", mutator.Name()) +} + +func TestPythonMutator_loadResources(t *testing.T) { withFakeVEnv(t, ".venv") b := loadYaml("databricks.yml", ` experimental: - pydabs: - enabled: true + python: + resources: ["resources:load_resources"] venv_path: .venv resources: jobs: @@ -59,12 +71,12 @@ func TestPythonMutator_load(t *testing.T) { "-m", "databricks.bundles.build", "--phase", - "load", + "load_resources", }, `{ "experimental": { - "pydabs": { - "enabled": true, + "python": { + "resources": ["resources:load_resources"], "venv_path": ".venv" } }, @@ -80,9 +92,11 @@ func TestPythonMutator_load(t *testing.T) { } }`, `{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`, + `{"path": "resources.jobs.job0", "file": "src/examples/job0.py", "line": 3, "column": 5} + {"path": "resources.jobs.job1", "file": "src/examples/job1.py", "line": 5, "column": 7}`, ) - mutator := PythonMutator(PythonMutatorPhaseLoad) + mutator := PythonMutator(PythonMutatorPhaseLoadResources) diags := bundle.Apply(ctx, b, mutator) assert.NoError(t, diags.Error()) @@ -97,6 +111,25 @@ func TestPythonMutator_load(t *testing.T) { assert.Equal(t, "job_1", job1.Name) } + // output of locations.json should be applied to underlying dyn.Value + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + name1, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job1.name")) + if err != nil { + return dyn.InvalidValue, err + } + + assert.Equal(t, []dyn.Location{ + { + File: "src/examples/job1.py", + Line: 5, + Column: 7, + }, + }, name1.Locations()) + + return v, nil + }) + assert.NoError(t, err) + assert.Equal(t, 1, len(diags)) assert.Equal(t, "job doesn't have any tasks", diags[0].Summary) assert.Equal(t, []dyn.Location{ @@ -108,13 +141,12 @@ func TestPythonMutator_load(t *testing.T) { }, diags[0].Locations) } -func TestPythonMutator_load_disallowed(t *testing.T) { +func TestPythonMutator_loadResources_disallowed(t *testing.T) { withFakeVEnv(t, ".venv") - b := loadYaml("databricks.yml", ` experimental: - pydabs: - enabled: true + python: + resources: ["resources:load_resources"] venv_path: .venv resources: jobs: @@ -128,12 +160,12 @@ func TestPythonMutator_load_disallowed(t *testing.T) { "-m", "databricks.bundles.build", "--phase", - "load", + "load_resources", }, `{ "experimental": { - "pydabs": { - "enabled": true, + "python": { + "resources": ["resources:load_resources"], "venv_path": ".venv" } }, @@ -145,22 +177,22 @@ func TestPythonMutator_load_disallowed(t *testing.T) { } } } - }`, "") + }`, "", "") - mutator := PythonMutator(PythonMutatorPhaseLoad) + mutator := PythonMutator(PythonMutatorPhaseLoadResources) diag := bundle.Apply(ctx, b, mutator) assert.EqualError(t, diag.Error(), "unexpected change at \"resources.jobs.job0.description\" (insert)") } -func TestPythonMutator_init(t *testing.T) { +func TestPythonMutator_applyMutators(t *testing.T) { withFakeVEnv(t, ".venv") - b := loadYaml("databricks.yml", ` experimental: - pydabs: - enabled: true + python: venv_path: .venv + mutators: + - "mutators:add_description" resources: jobs: job0: @@ -173,13 +205,13 @@ func TestPythonMutator_init(t *testing.T) { "-m", "databricks.bundles.build", "--phase", - "init", + "apply_mutators", }, `{ "experimental": { - "pydabs": { - "enabled": true, - "venv_path": ".venv" + "python": { + "venv_path": ".venv", + "mutators": ["mutators:add_description"] } }, "resources": { @@ -190,9 +222,9 @@ func TestPythonMutator_init(t *testing.T) { } } } - }`, "") + }`, "", "") - mutator := PythonMutator(PythonMutatorPhaseInit) + mutator := PythonMutator(PythonMutatorPhaseApplyMutators) diag := bundle.Apply(ctx, b, mutator) assert.NoError(t, diag.Error()) @@ -207,12 +239,12 @@ func TestPythonMutator_init(t *testing.T) { require.NoError(t, err) assert.Equal(t, "databricks.yml", name.Location().File) - // 'description' was updated by PyDABs and has location of generated file until + // 'description' was updated by Python code and has location of generated file until // we implement source maps description, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job0.description")) require.NoError(t, err) - expectedVirtualPath, err := filepath.Abs("__generated_by_pydabs__.yml") + expectedVirtualPath, err := filepath.Abs(generatedFileName) require.NoError(t, err) assert.Equal(t, expectedVirtualPath, description.Location().File) @@ -223,12 +255,12 @@ func TestPythonMutator_init(t *testing.T) { func TestPythonMutator_badOutput(t *testing.T) { withFakeVEnv(t, ".venv") - b := loadYaml("databricks.yml", ` experimental: - pydabs: - enabled: true + python: venv_path: .venv + resources: + - "resources:load_resources" resources: jobs: job0: @@ -241,7 +273,7 @@ func TestPythonMutator_badOutput(t *testing.T) { "-m", "databricks.bundles.build", "--phase", - "load", + "load_resources", }, `{ "resources": { @@ -251,9 +283,9 @@ func TestPythonMutator_badOutput(t *testing.T) { } } } - }`, "") + }`, "", "") - mutator := PythonMutator(PythonMutatorPhaseLoad) + mutator := PythonMutator(PythonMutatorPhaseLoadResources) diag := bundle.Apply(ctx, b, mutator) assert.EqualError(t, diag.Error(), "unknown field: unknown_property") @@ -269,34 +301,63 @@ func TestPythonMutator_disabled(t *testing.T) { assert.NoError(t, diag.Error()) } -func TestPythonMutator_venvRequired(t *testing.T) { - b := loadYaml("databricks.yml", ` - experimental: - pydabs: - enabled: true`) - - ctx := context.Background() - mutator := PythonMutator(PythonMutatorPhaseLoad) - diag := bundle.Apply(ctx, b, mutator) - - assert.Error(t, diag.Error(), "\"experimental.enable_pydabs\" is enabled, but \"experimental.venv.path\" is not set") -} - func TestPythonMutator_venvNotFound(t *testing.T) { expectedError := fmt.Sprintf("failed to get Python interpreter path: can't find %q, check if virtualenv is created", interpreterPath("bad_path")) b := loadYaml("databricks.yml", ` experimental: - pydabs: - enabled: true - venv_path: bad_path`) + python: + venv_path: bad_path + resources: + - "resources:load_resources"`) - mutator := PythonMutator(PythonMutatorPhaseInit) + mutator := PythonMutator(PythonMutatorPhaseLoadResources) diag := bundle.Apply(context.Background(), b, mutator) assert.EqualError(t, diag.Error(), expectedError) } +func TestGetOps_Python(t *testing.T) { + actual, err := getOpts(&bundle.Bundle{ + Config: config.Root{ + Experimental: &config.Experimental{ + Python: config.Python{ + VEnvPath: ".venv", + Resources: []string{ + "resources:load_resources", + }, + }, + }, + }, + }, PythonMutatorPhaseLoadResources) + + assert.NoError(t, err) + assert.Equal(t, opts{venvPath: ".venv", enabled: true, loadLocations: true}, actual) +} + +func TestGetOps_PyDABs(t *testing.T) { + actual, err := getOpts(&bundle.Bundle{ + Config: config.Root{ + Experimental: &config.Experimental{ + PyDABs: config.PyDABs{ + VEnvPath: ".venv", + Enabled: true, + }, + }, + }, + }, PythonMutatorPhaseInit) + + assert.NoError(t, err) + assert.Equal(t, opts{venvPath: ".venv", enabled: true, loadLocations: false}, actual) +} + +func TestGetOps_empty(t *testing.T) { + actual, err := getOpts(&bundle.Bundle{}, PythonMutatorPhaseLoadResources) + + assert.NoError(t, err) + assert.Equal(t, opts{enabled: false}, actual) +} + type createOverrideVisitorTestCase struct { name string updatePath dyn.Path @@ -314,48 +375,48 @@ func TestCreateOverrideVisitor(t *testing.T) { testCases := []createOverrideVisitorTestCase{ { - name: "load: can't change an existing job", - phase: PythonMutatorPhaseLoad, + name: "load_resources: can't change an existing job", + phase: PythonMutatorPhaseLoadResources, updatePath: dyn.MustPathFromString("resources.jobs.job0.name"), deletePath: dyn.MustPathFromString("resources.jobs.job0.name"), insertPath: dyn.MustPathFromString("resources.jobs.job0.name"), - deleteError: fmt.Errorf("unexpected change at \"resources.jobs.job0.name\" (delete)"), - insertError: fmt.Errorf("unexpected change at \"resources.jobs.job0.name\" (insert)"), - updateError: fmt.Errorf("unexpected change at \"resources.jobs.job0.name\" (update)"), + deleteError: errors.New("unexpected change at \"resources.jobs.job0.name\" (delete)"), + insertError: errors.New("unexpected change at \"resources.jobs.job0.name\" (insert)"), + updateError: errors.New("unexpected change at \"resources.jobs.job0.name\" (update)"), }, { - name: "load: can't delete an existing job", - phase: PythonMutatorPhaseLoad, + name: "load_resources: can't delete an existing job", + phase: PythonMutatorPhaseLoadResources, deletePath: dyn.MustPathFromString("resources.jobs.job0"), - deleteError: fmt.Errorf("unexpected change at \"resources.jobs.job0\" (delete)"), + deleteError: errors.New("unexpected change at \"resources.jobs.job0\" (delete)"), }, { - name: "load: can insert 'resources'", - phase: PythonMutatorPhaseLoad, + name: "load_resources: can insert 'resources'", + phase: PythonMutatorPhaseLoadResources, insertPath: dyn.MustPathFromString("resources"), insertError: nil, }, { - name: "load: can insert 'resources.jobs'", - phase: PythonMutatorPhaseLoad, + name: "load_resources: can insert 'resources.jobs'", + phase: PythonMutatorPhaseLoadResources, insertPath: dyn.MustPathFromString("resources.jobs"), insertError: nil, }, { - name: "load: can insert a job", - phase: PythonMutatorPhaseLoad, + name: "load_resources: can insert a job", + phase: PythonMutatorPhaseLoadResources, insertPath: dyn.MustPathFromString("resources.jobs.job0"), insertError: nil, }, { - name: "load: can't change include", - phase: PythonMutatorPhaseLoad, + name: "load_resources: can't change include", + phase: PythonMutatorPhaseLoadResources, deletePath: dyn.MustPathFromString("include[0]"), insertPath: dyn.MustPathFromString("include[0]"), updatePath: dyn.MustPathFromString("include[0]"), - deleteError: fmt.Errorf("unexpected change at \"include[0]\" (delete)"), - insertError: fmt.Errorf("unexpected change at \"include[0]\" (insert)"), - updateError: fmt.Errorf("unexpected change at \"include[0]\" (update)"), + deleteError: errors.New("unexpected change at \"include[0]\" (delete)"), + insertError: errors.New("unexpected change at \"include[0]\" (insert)"), + updateError: errors.New("unexpected change at \"include[0]\" (update)"), }, { name: "init: can change an existing job", @@ -371,7 +432,7 @@ func TestCreateOverrideVisitor(t *testing.T) { name: "init: can't delete an existing job", phase: PythonMutatorPhaseInit, deletePath: dyn.MustPathFromString("resources.jobs.job0"), - deleteError: fmt.Errorf("unexpected change at \"resources.jobs.job0\" (delete)"), + deleteError: errors.New("unexpected change at \"resources.jobs.job0\" (delete)"), }, { name: "init: can insert 'resources'", @@ -397,9 +458,43 @@ func TestCreateOverrideVisitor(t *testing.T) { deletePath: dyn.MustPathFromString("include[0]"), insertPath: dyn.MustPathFromString("include[0]"), updatePath: dyn.MustPathFromString("include[0]"), - deleteError: fmt.Errorf("unexpected change at \"include[0]\" (delete)"), - insertError: fmt.Errorf("unexpected change at \"include[0]\" (insert)"), - updateError: fmt.Errorf("unexpected change at \"include[0]\" (update)"), + deleteError: errors.New("unexpected change at \"include[0]\" (delete)"), + insertError: errors.New("unexpected change at \"include[0]\" (insert)"), + updateError: errors.New("unexpected change at \"include[0]\" (update)"), + }, + { + name: "apply_mutators: can't delete an existing job", + phase: PythonMutatorPhaseInit, + deletePath: dyn.MustPathFromString("resources.jobs.job0"), + deleteError: errors.New("unexpected change at \"resources.jobs.job0\" (delete)"), + }, + { + name: "apply_mutators: can insert 'resources'", + phase: PythonMutatorPhaseApplyMutators, + insertPath: dyn.MustPathFromString("resources"), + insertError: nil, + }, + { + name: "apply_mutators: can insert 'resources.jobs'", + phase: PythonMutatorPhaseApplyMutators, + insertPath: dyn.MustPathFromString("resources.jobs"), + insertError: nil, + }, + { + name: "apply_mutators: can't insert a job", + phase: PythonMutatorPhaseApplyMutators, + insertPath: dyn.MustPathFromString("resources.jobs.job0"), + insertError: errors.New("unexpected change at \"resources.jobs.job0\" (insert)"), + }, + { + name: "apply_mutators: can't change include", + phase: PythonMutatorPhaseApplyMutators, + deletePath: dyn.MustPathFromString("include[0]"), + insertPath: dyn.MustPathFromString("include[0]"), + updatePath: dyn.MustPathFromString("include[0]"), + deleteError: errors.New("unexpected change at \"include[0]\" (delete)"), + insertError: errors.New("unexpected change at \"include[0]\" (insert)"), + updateError: errors.New("unexpected change at \"include[0]\" (update)"), }, } @@ -458,9 +553,9 @@ type overrideVisitorOmitemptyTestCase struct { } func TestCreateOverrideVisitor_omitempty(t *testing.T) { - // PyDABs can omit empty sequences/mappings in output, because we don't track them as optional, + // Python output can omit empty sequences/mappings in output, because we don't track them as optional, // there is no semantic difference between empty and missing, so we keep them as they were before - // PyDABs deleted them. + // Python code deleted them. allPhases := []phase{PythonMutatorPhaseLoad, PythonMutatorPhaseInit} location := dyn.Location{ @@ -567,18 +662,17 @@ func TestExplainProcessErr(t *testing.T) { stderr := "/home/test/.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks')\n" expected := `/home/test/.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks') -Explanation: 'databricks-pydabs' library is not installed in the Python environment. +Explanation: 'databricks-bundles' library is not installed in the Python environment. -If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, -and that the wheel is installed in the Python environment: +Ensure that 'databricks-bundles' is installed in Python environment: - $ .venv/bin/pip install -e . + $ .venv/bin/pip install databricks-bundles If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, or activate the environment before running CLI commands: experimental: - pydabs: + python: venv_path: .venv ` @@ -587,7 +681,7 @@ or activate the environment before running CLI commands: assert.Equal(t, expected, out) } -func withProcessStub(t *testing.T, args []string, output, diagnostics string) context.Context { +func withProcessStub(t *testing.T, args []string, output, diagnostics, locations string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) @@ -599,32 +693,51 @@ func withProcessStub(t *testing.T, args []string, output, diagnostics string) co inputPath := filepath.Join(cacheDir, "input.json") outputPath := filepath.Join(cacheDir, "output.json") + locationsPath := filepath.Join(cacheDir, "locations.json") diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") - args = append(args, "--input", inputPath) - args = append(args, "--output", outputPath) - args = append(args, "--diagnostics", diagnosticsPath) - stub.WithCallback(func(actual *exec.Cmd) error { _, err := os.Stat(inputPath) assert.NoError(t, err) - if reflect.DeepEqual(actual.Args, args) { - err := os.WriteFile(outputPath, []byte(output), 0o600) - require.NoError(t, err) + actualInputPath := getArg(actual.Args, "--input") + actualOutputPath := getArg(actual.Args, "--output") + actualDiagnosticsPath := getArg(actual.Args, "--diagnostics") + actualLocationsPath := getArg(actual.Args, "--locations") - err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600) - require.NoError(t, err) + require.Equal(t, inputPath, actualInputPath) + require.Equal(t, outputPath, actualOutputPath) + require.Equal(t, diagnosticsPath, actualDiagnosticsPath) - return nil - } else { - return fmt.Errorf("unexpected command: %v", actual.Args) + // locations is an optional argument + if locations != "" { + require.Equal(t, locationsPath, actualLocationsPath) + + err = os.WriteFile(locationsPath, []byte(locations), 0o600) + require.NoError(t, err) } + + err = os.WriteFile(outputPath, []byte(output), 0o600) + require.NoError(t, err) + + err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600) + require.NoError(t, err) + + return nil }) return ctx } +func getArg(args []string, name string) string { + for i := range args { + if args[i] == name { + return args[i+1] + } + } + return "" +} + func loadYaml(name, content string) *bundle.Bundle { v, diag := config.LoadFromBytes(name, []byte(content)) diff --git a/bundle/config/mutator/resolve_resource_references.go b/bundle/config/mutator/resolve_resource_references.go index bf902f928..20a5b6585 100644 --- a/bundle/config/mutator/resolve_resource_references.go +++ b/bundle/config/mutator/resolve_resource_references.go @@ -40,6 +40,7 @@ func (m *resolveResourceReferences) Apply(ctx context.Context, b *bundle.Bundle) }) } + // Note, diags are lost from all goroutines except the first one to return diag return diag.FromErr(errs.Wait()) } diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 8c207e375..9aa93791f 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -2,9 +2,11 @@ package mutator import ( "context" + "errors" "fmt" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/variable" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" @@ -12,15 +14,37 @@ import ( "github.com/databricks/cli/libs/dyn/dynvar" ) +/* +For pathological cases, output and time grow exponentially. + +On my laptop, timings for acceptance/bundle/variables/complex-cycle: +rounds time + + 9 0.10s + 10 0.13s + 11 0.27s + 12 0.68s + 13 1.98s + 14 6.28s + 15 21.70s + 16 78.16s +*/ +const maxResolutionRounds = 11 + type resolveVariableReferences struct { - prefixes []string - pattern dyn.Pattern - lookupFn func(dyn.Value, dyn.Path) (dyn.Value, error) - skipFn func(dyn.Value) bool + prefixes []string + pattern dyn.Pattern + lookupFn func(dyn.Value, dyn.Path, *bundle.Bundle) (dyn.Value, error) + skipFn func(dyn.Value) bool + extraRounds int } func ResolveVariableReferences(prefixes ...string) bundle.Mutator { - return &resolveVariableReferences{prefixes: prefixes, lookupFn: lookup} + return &resolveVariableReferences{ + prefixes: prefixes, + lookupFn: lookup, + extraRounds: maxResolutionRounds - 1, + } } func ResolveVariableReferencesInLookup() bundle.Mutator { @@ -31,61 +55,21 @@ func ResolveVariableReferencesInLookup() bundle.Mutator { }, pattern: dyn.NewPattern(dyn.Key("variables"), dyn.AnyKey(), dyn.Key("lookup")), lookupFn: lookupForVariables} } -func ResolveVariableReferencesInComplexVariables() bundle.Mutator { - return &resolveVariableReferences{ - prefixes: []string{ - "bundle", - "workspace", - "variables", - }, - pattern: dyn.NewPattern(dyn.Key("variables"), dyn.AnyKey(), dyn.Key("value")), - lookupFn: lookupForComplexVariables, - skipFn: skipResolvingInNonComplexVariables, +func lookup(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { + if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { + if path.String() == "workspace.file_path" { + return dyn.V(b.SyncRootPath), nil + } } -} - -func lookup(v dyn.Value, path dyn.Path) (dyn.Value, error) { // Future opportunity: if we lookup this path in both the given root // and the synthesized root, we know if it was explicitly set or implied to be empty. // Then we can emit a warning if it was not explicitly set. return dyn.GetByPath(v, path) } -func lookupForComplexVariables(v dyn.Value, path dyn.Path) (dyn.Value, error) { +func lookupForVariables(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { if path[0].Key() != "variables" { - return lookup(v, path) - } - - varV, err := dyn.GetByPath(v, path[:len(path)-1]) - if err != nil { - return dyn.InvalidValue, err - } - - var vv variable.Variable - err = convert.ToTyped(&vv, varV) - if err != nil { - return dyn.InvalidValue, err - } - - if vv.Type == variable.VariableTypeComplex { - return dyn.InvalidValue, fmt.Errorf("complex variables cannot contain references to another complex variables") - } - - return lookup(v, path) -} - -func skipResolvingInNonComplexVariables(v dyn.Value) bool { - switch v.Kind() { - case dyn.KindMap, dyn.KindSequence: - return false - default: - return true - } -} - -func lookupForVariables(v dyn.Value, path dyn.Path) (dyn.Value, error) { - if path[0].Key() != "variables" { - return lookup(v, path) + return lookup(v, path, b) } varV, err := dyn.GetByPath(v, path[:len(path)-1]) @@ -100,10 +84,10 @@ func lookupForVariables(v dyn.Value, path dyn.Path) (dyn.Value, error) { } if vv.Lookup != nil && vv.Lookup.String() != "" { - return dyn.InvalidValue, fmt.Errorf("lookup variables cannot contain references to another lookup variables") + return dyn.InvalidValue, errors.New("lookup variables cannot contain references to another lookup variables") } - return lookup(v, path) + return lookup(v, path, b) } func (*resolveVariableReferences) Name() string { @@ -125,6 +109,36 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) varPath := dyn.NewPath(dyn.Key("var")) var diags diag.Diagnostics + maxRounds := 1 + m.extraRounds + + for round := range maxRounds { + hasUpdates, newDiags := m.resolveOnce(b, prefixes, varPath) + + diags = diags.Extend(newDiags) + + if diags.HasError() { + break + } + + if !hasUpdates { + break + } + + if round >= maxRounds-1 { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: fmt.Sprintf("Detected unresolved variables after %d resolution rounds", round+1), + // Would be nice to include names of the variables there, but that would complicate things more + }) + break + } + } + return diags +} + +func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn.Path, varPath dyn.Path) (bool, diag.Diagnostics) { + var diags diag.Diagnostics + hasUpdates := false err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { // Synthesize a copy of the root that has all fields that are present in the type // but not set in the dynamic value set to their corresponding empty value. @@ -167,7 +181,8 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) if m.skipFn != nil && m.skipFn(v) { return dyn.InvalidValue, dynvar.ErrSkipResolution } - return m.lookupFn(normalized, path) + hasUpdates = true + return m.lookupFn(normalized, path, b) } } @@ -187,5 +202,6 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) if err != nil { diags = diags.Extend(diag.FromErr(err)) } - return diags + + return hasUpdates, diags } diff --git a/bundle/config/mutator/resolve_variable_references_test.go b/bundle/config/mutator/resolve_variable_references_test.go index 7bb6f11a0..44f6c8dbb 100644 --- a/bundle/config/mutator/resolve_variable_references_test.go +++ b/bundle/config/mutator/resolve_variable_references_test.go @@ -7,430 +7,60 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/bundle/config/variable" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/databricks/databricks-sdk-go/service/jobs" - "github.com/stretchr/testify/assert" + "github.com/databricks/databricks-sdk-go/service/pipelines" "github.com/stretchr/testify/require" ) -func TestResolveVariableReferences(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", +func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { + testCases := []struct { + enabled bool + assert func(t *testing.T, b *bundle.Bundle) + }{ + { + true, + func(t *testing.T, b *bundle.Bundle) { + // Variables that use workspace file path should have SyncRootValue during resolution phase + require.Equal(t, "sync/root/path", b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Configuration["source"]) + + // The file path itself should remain the same + require.Equal(t, "file/path", b.Config.Workspace.FilePath) }, - Workspace: config.Workspace{ - RootPath: "${bundle.name}/bar", - FilePath: "${workspace.root_path}/baz", + }, + { + false, + func(t *testing.T, b *bundle.Bundle) { + require.Equal(t, "file/path", b.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Configuration["source"]) + require.Equal(t, "file/path", b.Config.Workspace.FilePath) }, }, } - // Apply with an invalid prefix. This should not change the workspace root path. - diags := bundle.Apply(context.Background(), b, ResolveVariableReferences("doesntexist")) - require.NoError(t, diags.Error()) - require.Equal(t, "${bundle.name}/bar", b.Config.Workspace.RootPath) - require.Equal(t, "${workspace.root_path}/baz", b.Config.Workspace.FilePath) - - // Apply with a valid prefix. This should change the workspace root path. - diags = bundle.Apply(context.Background(), b, ResolveVariableReferences("bundle", "workspace")) - require.NoError(t, diags.Error()) - require.Equal(t, "example/bar", b.Config.Workspace.RootPath) - require.Equal(t, "example/bar/baz", b.Config.Workspace.FilePath) -} - -func TestResolveVariableReferencesToBundleVariables(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Workspace: config.Workspace{ - RootPath: "${bundle.name}/${var.foo}", - }, - Variables: map[string]*variable.Variable{ - "foo": { - Value: "bar", + for _, testCase := range testCases { + b := &bundle.Bundle{ + SyncRootPath: "sync/root/path", + Config: config.Root{ + Presets: config.Presets{ + SourceLinkedDeployment: &testCase.enabled, }, - }, - }, - } - - // Apply with a valid prefix. This should change the workspace root path. - diags := bundle.Apply(context.Background(), b, ResolveVariableReferences("bundle", "variables")) - require.NoError(t, diags.Error()) - require.Equal(t, "example/bar", b.Config.Workspace.RootPath) -} - -func TestResolveVariableReferencesToEmptyFields(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - Git: config.Git{ - Branch: "", + Workspace: config.Workspace{ + FilePath: "file/path", }, - }, - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - Tags: map[string]string{ - "git_branch": "${bundle.git.branch}", - }, - }, - }, - }, - }, - }, - } - - // Apply for the bundle prefix. - diags := bundle.Apply(context.Background(), b, ResolveVariableReferences("bundle")) - require.NoError(t, diags.Error()) - - // The job settings should have been interpolated to an empty string. - require.Equal(t, "", b.Config.Resources.Jobs["job1"].JobSettings.Tags["git_branch"]) -} - -func TestResolveVariableReferencesForPrimitiveNonStringFields(t *testing.T) { - var diags diag.Diagnostics - - b := &bundle.Bundle{ - Config: config.Root{ - Variables: map[string]*variable.Variable{ - "no_alert_for_canceled_runs": {}, - "no_alert_for_skipped_runs": {}, - "min_workers": {}, - "max_workers": {}, - "spot_bid_max_price": {}, - }, - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - NotificationSettings: &jobs.JobNotificationSettings{ - NoAlertForCanceledRuns: false, - NoAlertForSkippedRuns: false, - }, - Tasks: []jobs.Task{ - { - NewCluster: &compute.ClusterSpec{ - Autoscale: &compute.AutoScale{ - MinWorkers: 0, - MaxWorkers: 0, - }, - AzureAttributes: &compute.AzureAttributes{ - SpotBidMaxPrice: 0.0, - }, - }, + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + PipelineSpec: &pipelines.PipelineSpec{ + Configuration: map[string]string{ + "source": "${workspace.file_path}", }, }, }, }, }, }, - }, + } + + diags := bundle.Apply(context.Background(), b, ResolveVariableReferences("workspace")) + require.NoError(t, diags.Error()) + testCase.assert(t, b) } - - ctx := context.Background() - - // Initialize the variables. - diags = bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.InitializeVariables([]string{ - "no_alert_for_canceled_runs=true", - "no_alert_for_skipped_runs=true", - "min_workers=1", - "max_workers=2", - "spot_bid_max_price=0.5", - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - // Assign the variables to the dynamic configuration. - diags = bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - // Set the notification settings. - p = dyn.MustPathFromString("resources.jobs.job1.notification_settings") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("no_alert_for_canceled_runs")), dyn.V("${var.no_alert_for_canceled_runs}")) - require.NoError(t, err) - v, err = dyn.SetByPath(v, p.Append(dyn.Key("no_alert_for_skipped_runs")), dyn.V("${var.no_alert_for_skipped_runs}")) - require.NoError(t, err) - - // Set the min and max workers. - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0].new_cluster.autoscale") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("min_workers")), dyn.V("${var.min_workers}")) - require.NoError(t, err) - v, err = dyn.SetByPath(v, p.Append(dyn.Key("max_workers")), dyn.V("${var.max_workers}")) - require.NoError(t, err) - - // Set the spot bid max price. - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0].new_cluster.azure_attributes") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("spot_bid_max_price")), dyn.V("${var.spot_bid_max_price}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - // Apply for the variable prefix. This should resolve the variables to their values. - diags = bundle.Apply(context.Background(), b, ResolveVariableReferences("variables")) - require.NoError(t, diags.Error()) - assert.Equal(t, true, b.Config.Resources.Jobs["job1"].JobSettings.NotificationSettings.NoAlertForCanceledRuns) - assert.Equal(t, true, b.Config.Resources.Jobs["job1"].JobSettings.NotificationSettings.NoAlertForSkippedRuns) - assert.Equal(t, 1, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.Autoscale.MinWorkers) - assert.Equal(t, 2, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.Autoscale.MaxWorkers) - assert.Equal(t, 0.5, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.AzureAttributes.SpotBidMaxPrice) -} - -func TestResolveComplexVariable(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "cluster": { - Value: map[string]any{ - "node_type_id": "Standard_DS3_v2", - "num_workers": 2, - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - JobClusters: []jobs.JobCluster{ - { - NewCluster: compute.ClusterSpec{ - NodeTypeId: "random", - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.job_clusters[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("new_cluster")), dyn.V("${var.cluster}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, ResolveVariableReferences("bundle", "workspace", "variables")) - require.NoError(t, diags.Error()) - require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["job1"].JobSettings.JobClusters[0].NewCluster.NodeTypeId) - require.Equal(t, 2, b.Config.Resources.Jobs["job1"].JobSettings.JobClusters[0].NewCluster.NumWorkers) -} - -func TestResolveComplexVariableReferencesToFields(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "cluster": { - Value: map[string]any{ - "node_type_id": "Standard_DS3_v2", - "num_workers": 2, - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - JobClusters: []jobs.JobCluster{ - { - NewCluster: compute.ClusterSpec{ - NodeTypeId: "random", - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.job_clusters[0].new_cluster") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("node_type_id")), dyn.V("${var.cluster.node_type_id}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, ResolveVariableReferences("bundle", "workspace", "variables")) - require.NoError(t, diags.Error()) - require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["job1"].JobSettings.JobClusters[0].NewCluster.NodeTypeId) -} - -func TestResolveComplexVariableReferencesWithComplexVariablesError(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "cluster": { - Value: map[string]any{ - "node_type_id": "Standard_DS3_v2", - "num_workers": 2, - "spark_conf": "${var.spark_conf}", - }, - Type: variable.VariableTypeComplex, - }, - "spark_conf": { - Value: map[string]any{ - "spark.executor.memory": "4g", - "spark.executor.cores": "2", - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - JobClusters: []jobs.JobCluster{ - { - NewCluster: compute.ClusterSpec{ - NodeTypeId: "random", - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.job_clusters[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("new_cluster")), dyn.V("${var.cluster}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, bundle.Seq(ResolveVariableReferencesInComplexVariables(), ResolveVariableReferences("bundle", "workspace", "variables"))) - require.ErrorContains(t, diags.Error(), "complex variables cannot contain references to another complex variables") -} - -func TestResolveComplexVariableWithVarReference(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "package_version": { - Value: "1.0.0", - }, - "cluster_libraries": { - Value: [](map[string]any){ - { - "pypi": map[string]string{ - "package": "cicd_template==${var.package_version}", - }, - }, - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - Tasks: []jobs.Task{ - { - Libraries: []compute.Library{}, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("libraries")), dyn.V("${var.cluster_libraries}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, bundle.Seq( - ResolveVariableReferencesInComplexVariables(), - ResolveVariableReferences("bundle", "workspace", "variables"), - )) - require.NoError(t, diags.Error()) - require.Equal(t, "cicd_template==1.0.0", b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].Libraries[0].Pypi.Package) } diff --git a/bundle/config/mutator/rewrite_workspace_prefix_test.go b/bundle/config/mutator/rewrite_workspace_prefix_test.go index 48973a4cf..099738c02 100644 --- a/bundle/config/mutator/rewrite_workspace_prefix_test.go +++ b/bundle/config/mutator/rewrite_workspace_prefix_test.go @@ -71,7 +71,7 @@ func TestNoWorkspacePrefixUsed(t *testing.T) { } for _, d := range diags { - require.Equal(t, d.Severity, diag.Warning) + require.Equal(t, diag.Warning, d.Severity) require.Contains(t, expectedErrors, d.Summary) delete(expectedErrors, d.Summary) } diff --git a/bundle/config/mutator/run_as.go b/bundle/config/mutator/run_as.go index 7ffd782c2..3d7391b01 100644 --- a/bundle/config/mutator/run_as.go +++ b/bundle/config/mutator/run_as.go @@ -119,6 +119,16 @@ func validateRunAs(b *bundle.Bundle) diag.Diagnostics { )) } + // Apps do not support run_as in the API. + if len(b.Config.Resources.Apps) > 0 { + diags = diags.Extend(reportRunAsNotSupported( + "apps", + b.Config.GetLocation("resources.apps"), + b.Config.Workspace.CurrentUser.UserName, + identity, + )) + } + return diags } diff --git a/bundle/config/mutator/run_as_test.go b/bundle/config/mutator/run_as_test.go index dbf4bf806..650b65d61 100644 --- a/bundle/config/mutator/run_as_test.go +++ b/bundle/config/mutator/run_as_test.go @@ -32,6 +32,7 @@ func allResourceTypes(t *testing.T) []string { // the dyn library gives us the correct list of all resources supported. Please // also update this check when adding a new resource require.Equal(t, []string{ + "apps", "clusters", "dashboards", "experiments", @@ -104,47 +105,47 @@ func TestRunAsWorksForAllowedResources(t *testing.T) { } } -func TestRunAsErrorForUnsupportedResources(t *testing.T) { - // Bundle "run_as" has two modes of operation, each with a different set of - // resources that are supported. - // Cases: - // 1. When the bundle "run_as" identity is same as the current deployment - // identity. In this case all resources are supported. - // 2. When the bundle "run_as" identity is different from the current - // deployment identity. In this case only a subset of resources are - // supported. This subset of resources are defined in the allow list below. - // - // To be a part of the allow list, the resource must satisfy one of the following - // two conditions: - // 1. The resource supports setting a run_as identity to a different user - // from the owner/creator of the resource. For example, jobs. - // 2. Run as semantics do not apply to the resource. We do not plan to add - // platform side support for `run_as` for these resources. For example, - // experiments or registered models. - // - // Any resource that is not on the allow list cannot be used when the bundle - // run_as is different from the current deployment user. "bundle validate" must - // return an error if such a resource has been defined, and the run_as identity - // is different from the current deployment identity. - // - // Action Item: If you are adding a new resource to DABs, please check in with - // the relevant owning team whether the resource should be on the allow list or (implicitly) on - // the deny list. Any resources that could have run_as semantics in the future - // should be on the deny list. - // For example: Teams for pipelines, model serving endpoints or Lakeview dashboards - // are planning to add platform side support for `run_as` for these resources at - // some point in the future. These resources are (implicitly) on the deny list, since - // they are not on the allow list below. - allowList := []string{ - "clusters", - "jobs", - "models", - "registered_models", - "experiments", - "schemas", - "volumes", - } +// Bundle "run_as" has two modes of operation, each with a different set of +// resources that are supported. +// Cases: +// 1. When the bundle "run_as" identity is same as the current deployment +// identity. In this case all resources are supported. +// 2. When the bundle "run_as" identity is different from the current +// deployment identity. In this case only a subset of resources are +// supported. This subset of resources are defined in the allow list below. +// +// To be a part of the allow list, the resource must satisfy one of the following +// two conditions: +// 1. The resource supports setting a run_as identity to a different user +// from the owner/creator of the resource. For example, jobs. +// 2. Run as semantics do not apply to the resource. We do not plan to add +// platform side support for `run_as` for these resources. For example, +// experiments or registered models. +// +// Any resource that is not on the allow list cannot be used when the bundle +// run_as is different from the current deployment user. "bundle validate" must +// return an error if such a resource has been defined, and the run_as identity +// is different from the current deployment identity. +// +// Action Item: If you are adding a new resource to DABs, please check in with +// the relevant owning team whether the resource should be on the allow list or (implicitly) on +// the deny list. Any resources that could have run_as semantics in the future +// should be on the deny list. +// For example: Teams for pipelines, model serving endpoints or Lakeview dashboards +// are planning to add platform side support for `run_as` for these resources at +// some point in the future. These resources are (implicitly) on the deny list, since +// they are not on the allow list below. +var allowList = []string{ + "clusters", + "jobs", + "models", + "registered_models", + "experiments", + "schemas", + "volumes", +} +func TestRunAsErrorForUnsupportedResources(t *testing.T) { base := config.Root{ Workspace: config.Workspace{ CurrentUser: &config.User{ @@ -197,3 +198,54 @@ func TestRunAsErrorForUnsupportedResources(t *testing.T) { "See https://docs.databricks.com/dev-tools/bundles/run-as.html to learn more about the run_as property.", rt) } } + +func TestRunAsNoErrorForSupportedResources(t *testing.T) { + base := config.Root{ + Workspace: config.Workspace{ + CurrentUser: &config.User{ + User: &iam.User{ + UserName: "alice", + }, + }, + }, + RunAs: &jobs.JobRunAs{ + UserName: "bob", + }, + } + + v, err := convert.FromTyped(base, dyn.NilValue) + require.NoError(t, err) + + // Define top level resources key in the bundle configuration. + // This is not part of the typed configuration, so we need to add it manually. + v, err = dyn.Set(v, "resources", dyn.V(map[string]dyn.Value{})) + require.NoError(t, err) + + for _, rt := range allResourceTypes(t) { + // Skip unsupported resources + if !slices.Contains(allowList, rt) { + continue + } + + // Add an instance of the resource type that is not on the allow list to + // the bundle configuration. + nv, err := dyn.SetByPath(v, dyn.NewPath(dyn.Key("resources"), dyn.Key(rt)), dyn.V(map[string]dyn.Value{ + "foo": dyn.V(map[string]dyn.Value{ + "name": dyn.V("bar"), + }), + })) + require.NoError(t, err) + + // Get back typed configuration from the newly created invalid bundle configuration. + r := &config.Root{} + err = convert.ToTyped(r, nv) + require.NoError(t, err) + + // Assert this configuration passes validation. + b := &bundle.Bundle{ + Config: *r, + } + diags := bundle.Apply(context.Background(), b, SetRunAs()) + require.NoError(t, diags.Error()) + } +} diff --git a/bundle/config/mutator/select_target.go b/bundle/config/mutator/select_target.go index 178686b6e..ce18da4f5 100644 --- a/bundle/config/mutator/select_target.go +++ b/bundle/config/mutator/select_target.go @@ -15,6 +15,7 @@ type selectTarget struct { } // SelectTarget merges the specified target into the root configuration. +// After merging, it removes the 'Targets' section from the configuration. func SelectTarget(name string) bundle.Mutator { return &selectTarget{ name: name, @@ -31,7 +32,7 @@ func (m *selectTarget) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnosti } // Get specified target - _, ok := b.Config.Targets[m.name] + target, ok := b.Config.Targets[m.name] if !ok { return diag.Errorf("%s: no such target. Available targets: %s", m.name, strings.Join(maps.Keys(b.Config.Targets), ", ")) } @@ -43,13 +44,15 @@ func (m *selectTarget) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnosti } // Store specified target in configuration for reference. + b.Target = target b.Config.Bundle.Target = m.name // We do this for backward compatibility. // TODO: remove when Environments section is not supported anymore. b.Config.Bundle.Environment = b.Config.Bundle.Target - // Clear targets after loading. + // Cleanup the original targets and environments sections since they + // show up in the JSON output of the 'summary' and 'validate' commands. b.Config.Targets = nil b.Config.Environments = nil diff --git a/bundle/config/mutator/set_variables.go b/bundle/config/mutator/set_variables.go index 9e9f2dcfe..ac2f660a9 100644 --- a/bundle/config/mutator/set_variables.go +++ b/bundle/config/mutator/set_variables.go @@ -3,11 +3,14 @@ package mutator import ( "context" "fmt" + "os" + "path/filepath" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/variable" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/jsonloader" "github.com/databricks/cli/libs/env" ) @@ -23,7 +26,11 @@ func (m *setVariables) Name() string { return "SetVariables" } -func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, name string) (dyn.Value, error) { +func getDefaultVariableFilePath(target string) string { + return ".databricks/bundle/" + target + "/variable-overrides.json" +} + +func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, name string, fileDefault dyn.Value) (dyn.Value, error) { // case: variable already has value initialized, so skip if variable.HasValue() { return v, nil @@ -49,6 +56,26 @@ func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, return v, nil } + // case: Set the variable to the default value from the variable file + if fileDefault.Kind() != dyn.KindInvalid && fileDefault.Kind() != dyn.KindNil { + hasComplexType := variable.IsComplex() + hasComplexValue := fileDefault.Kind() == dyn.KindMap || fileDefault.Kind() == dyn.KindSequence + + if hasComplexType && !hasComplexValue { + return dyn.InvalidValue, fmt.Errorf(`variable %s is of type complex, but the value in the variable file is not a complex type`, name) + } + if !hasComplexType && hasComplexValue { + return dyn.InvalidValue, fmt.Errorf(`variable %s is not of type complex, but the value in the variable file is a complex type`, name) + } + + v, err := dyn.Set(v, "value", fileDefault) + if err != nil { + return dyn.InvalidValue, fmt.Errorf(`failed to assign default value from variable file to variable %s with error: %v`, name, err) + } + + return v, nil + } + // case: Set the variable to its default value if variable.HasDefault() { vDefault, err := dyn.Get(v, "default") @@ -64,10 +91,43 @@ func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, } // We should have had a value to set for the variable at this point. - return dyn.InvalidValue, fmt.Errorf(`no value assigned to required variable %s. Assignment can be done through the "--var" flag or by setting the %s environment variable`, name, bundleVarPrefix+name) + return dyn.InvalidValue, fmt.Errorf(`no value assigned to required variable %s. Assignment can be done using "--var", by setting the %s environment variable, or in %s file`, name, bundleVarPrefix+name, getDefaultVariableFilePath("")) +} + +func readVariablesFromFile(b *bundle.Bundle) (dyn.Value, diag.Diagnostics) { + var diags diag.Diagnostics + + filePath := filepath.Join(b.BundleRootPath, getDefaultVariableFilePath(b.Config.Bundle.Target)) + if _, err := os.Stat(filePath); err != nil { + return dyn.InvalidValue, nil + } + + f, err := os.ReadFile(filePath) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to read variables file: %w", err)) + } + + val, err := jsonloader.LoadJSON(f, filePath) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse variables file %s: %w", filePath, err)) + } + + if val.Kind() != dyn.KindMap { + return dyn.InvalidValue, diags.Append(diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("failed to parse variables file %s: invalid format", filePath), + Detail: "Variables file must be a JSON object with the following format:\n{\"var1\": \"value1\", \"var2\": \"value2\"}", + }) + } + + return val, nil } func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + defaults, diags := readVariablesFromFile(b) + if diags.HasError() { + return diags + } err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { return dyn.Map(v, "variables", dyn.Foreach(func(p dyn.Path, variable dyn.Value) (dyn.Value, error) { name := p[1].Key() @@ -76,9 +136,10 @@ func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos return dyn.InvalidValue, fmt.Errorf(`variable "%s" is not defined`, name) } - return setVariable(ctx, variable, v, name) + fileDefault, _ := dyn.Get(defaults, name) + return setVariable(ctx, variable, v, name, fileDefault) })) }) - return diag.FromErr(err) + return diags.Extend(diag.FromErr(err)) } diff --git a/bundle/config/mutator/set_variables_test.go b/bundle/config/mutator/set_variables_test.go index d9719793f..d904d5be3 100644 --- a/bundle/config/mutator/set_variables_test.go +++ b/bundle/config/mutator/set_variables_test.go @@ -25,12 +25,12 @@ func TestSetVariableFromProcessEnvVar(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) require.NoError(t, err) - assert.Equal(t, variable.Value, "process-env") + assert.Equal(t, "process-env", variable.Value) } func TestSetVariableUsingDefaultValue(t *testing.T) { @@ -43,12 +43,12 @@ func TestSetVariableUsingDefaultValue(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) require.NoError(t, err) - assert.Equal(t, variable.Value, "default") + assert.Equal(t, "default", variable.Value) } func TestSetVariableWhenAlreadyAValueIsAssigned(t *testing.T) { @@ -65,12 +65,12 @@ func TestSetVariableWhenAlreadyAValueIsAssigned(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) require.NoError(t, err) - assert.Equal(t, variable.Value, "assigned-value") + assert.Equal(t, "assigned-value", variable.Value) } func TestSetVariableEnvVarValueDoesNotOverridePresetValue(t *testing.T) { @@ -90,12 +90,12 @@ func TestSetVariableEnvVarValueDoesNotOverridePresetValue(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) require.NoError(t, err) - assert.Equal(t, variable.Value, "assigned-value") + assert.Equal(t, "assigned-value", variable.Value) } func TestSetVariablesErrorsIfAValueCouldNotBeResolved(t *testing.T) { @@ -107,8 +107,8 @@ func TestSetVariablesErrorsIfAValueCouldNotBeResolved(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - _, err = setVariable(context.Background(), v, &variable, "foo") - assert.ErrorContains(t, err, "no value assigned to required variable foo. Assignment can be done through the \"--var\" flag or by setting the BUNDLE_VAR_foo environment variable") + _, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) + assert.ErrorContains(t, err, "no value assigned to required variable foo. Assignment can be done using \"--var\", by setting the BUNDLE_VAR_foo environment variable, or in .databricks/bundle//variable-overrides.json file") } func TestSetVariablesMutator(t *testing.T) { @@ -157,6 +157,6 @@ func TestSetComplexVariablesViaEnvVariablesIsNotAllowed(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - _, err = setVariable(context.Background(), v, &variable, "foo") + _, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) assert.ErrorContains(t, err, "setting via environment variables (BUNDLE_VAR_foo) is not supported for complex variable foo") } diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index af0f94120..1eda578fa 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -6,6 +6,7 @@ import ( "fmt" "io/fs" "net/url" + "os" "path" "path/filepath" "strings" @@ -17,6 +18,47 @@ import ( "github.com/databricks/cli/libs/notebook" ) +// TranslateMode specifies how a path should be translated. +type TranslateMode int + +const ( + // TranslateModeNotebook translates a path to a remote notebook. + TranslateModeNotebook TranslateMode = iota + + // TranslateModeFile translates a path to a remote regular file. + TranslateModeFile + + // TranslateModeDirectory translates a path to a remote directory. + TranslateModeDirectory + + // TranslateModeLocalAbsoluteFile translates a path to the local absolute file path. + // It returns an error if the path does not exist or is a directory. + TranslateModeLocalAbsoluteFile + + // TranslateModeLocalAbsoluteDirectory translates a path to the local absolute directory path. + // It returns an error if the path does not exist or is not a directory. + TranslateModeLocalAbsoluteDirectory + + // TranslateModeLocalRelative translates a path to be relative to the bundle sync root path. + // It does not check if the path exists, nor care if it is a file or directory. + TranslateModeLocalRelative + + // TranslateModeLocalRelativeWithPrefix translates a path to be relative to the bundle sync root path. + // It a "./" prefix to the path if it does not already have one. + // This allows for disambiguating between paths and PyPI package names. + TranslateModeLocalRelativeWithPrefix +) + +// translateOptions control path translation behavior. +type translateOptions struct { + // Mode specifies how the path should be translated. + Mode TranslateMode + + // AllowPathOutsideSyncRoot can be set for paths that are not tied to the sync root path. + // This is the case for artifact paths, for example. + AllowPathOutsideSyncRoot bool +} + type ErrIsNotebook struct { path string } @@ -44,8 +86,6 @@ func (m *translatePaths) Name() string { return "TranslatePaths" } -type rewriteFunc func(literal, localFullPath, localRelPath, remotePath string) (string, error) - // translateContext is a context for rewriting paths in a config. // It is freshly instantiated on every mutator apply call. // It provides access to the underlying bundle object such that @@ -56,77 +96,97 @@ type translateContext struct { // seen is a map of local paths to their corresponding remote paths. // If a local path has already been successfully resolved, we do not need to resolve it again. seen map[string]string + + // remoteRoot is the root path of the remote workspace. + // It is equal to ${workspace.file_path} for regular deployments. + // It points to the source root path for source-linked deployments. + remoteRoot string } // rewritePath converts a given relative path from the loaded config to a new path based on the passed rewriting function // // It takes these arguments: -// - The argument `dir` is the directory relative to which the given relative path is. -// - The given relative path is both passed and written back through `*p`. -// - The argument `fn` is a function that performs the actual rewriting logic. -// This logic is different between regular files or notebooks. +// - The context in which the function is called. +// - The argument `dir` is the directory relative to which the relative path should be interpreted. +// - The argument `input` is the relative path to rewrite. +// - The argument `opts` is a struct that specifies how the path should be rewritten. +// It contains a `Mode` field that specifies how the path should be rewritten. // -// The function returns an error if it is impossible to rewrite the given relative path. +// The function returns the rewritten path if successful, or an error if the path could not be rewritten. +// The returned path is an empty string if the path was not rewritten. func (t *translateContext) rewritePath( + ctx context.Context, dir string, - p *string, - fn rewriteFunc, -) error { + input string, + opts translateOptions, +) (string, error) { // We assume absolute paths point to a location in the workspace - if path.IsAbs(*p) { - return nil + if path.IsAbs(input) { + return "", nil } - url, err := url.Parse(*p) + url, err := url.Parse(input) if err != nil { - return err + return "", err } // If the file path has scheme, it's a full path and we don't need to transform it if url.Scheme != "" { - return nil + return "", nil } // Local path is relative to the directory the resource was defined in. - localPath := filepath.Join(dir, filepath.FromSlash(*p)) + localPath := filepath.Join(dir, input) if interp, ok := t.seen[localPath]; ok { - *p = interp - return nil + return interp, nil } // Local path must be contained in the sync root. // If it isn't, it won't be synchronized into the workspace. localRelPath, err := filepath.Rel(t.b.SyncRootPath, localPath) if err != nil { - return err + return "", err } - if strings.HasPrefix(localRelPath, "..") { - return fmt.Errorf("path %s is not contained in sync root path", localPath) + if !opts.AllowPathOutsideSyncRoot && !filepath.IsLocal(localRelPath) { + return "", fmt.Errorf("path %s is not contained in sync root path", localPath) } - var workspacePath string - if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { - workspacePath = t.b.SyncRootPath - } else { - workspacePath = t.b.Config.Workspace.FilePath - } - remotePath := path.Join(workspacePath, filepath.ToSlash(localRelPath)) + // Normalize paths to separated by forward slashes. + localPath = filepath.ToSlash(localPath) + localRelPath = filepath.ToSlash(localRelPath) // Convert local path into workspace path via specified function. - interp, err := fn(*p, localPath, localRelPath, remotePath) + var interp string + switch opts.Mode { + case TranslateModeNotebook: + interp, err = t.translateNotebookPath(ctx, input, localPath, localRelPath) + case TranslateModeFile: + interp, err = t.translateFilePath(ctx, input, localPath, localRelPath) + case TranslateModeDirectory: + interp, err = t.translateDirectoryPath(ctx, input, localPath, localRelPath) + case TranslateModeLocalAbsoluteFile: + interp, err = t.translateLocalAbsoluteFilePath(ctx, input, localPath, localRelPath) + case TranslateModeLocalAbsoluteDirectory: + interp, err = t.translateLocalAbsoluteDirectoryPath(ctx, input, localPath, localRelPath) + case TranslateModeLocalRelative: + interp, err = t.translateLocalRelativePath(ctx, input, localPath, localRelPath) + case TranslateModeLocalRelativeWithPrefix: + interp, err = t.translateLocalRelativeWithPrefixPath(ctx, input, localPath, localRelPath) + default: + return "", fmt.Errorf("unsupported translate mode: %d", opts.Mode) + } if err != nil { - return err + return "", err } - *p = interp t.seen[localPath] = interp - return nil + return interp, nil } -func (t *translateContext) translateNotebookPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) +func (t *translateContext) translateNotebookPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, localRelPath) if errors.Is(err, fs.ErrNotExist) { - if filepath.Ext(localFullPath) != notebook.ExtensionNone { + if path.Ext(localFullPath) != notebook.ExtensionNone { return "", fmt.Errorf("notebook %s not found", literal) } @@ -142,7 +202,7 @@ func (t *translateContext) translateNotebookPath(literal, localFullPath, localRe // way we can provide a more targeted error message. for _, ext := range extensions { literalWithExt := literal + ext - localRelPathWithExt := filepath.ToSlash(localRelPath + ext) + localRelPathWithExt := localRelPath + ext if _, err := fs.Stat(t.b.SyncRoot, localRelPathWithExt); err == nil { return "", fmt.Errorf(`notebook %s not found. Did you mean %s? Local notebook references are expected to contain one of the following @@ -162,45 +222,42 @@ to contain one of the following file extensions: [%s]`, literal, strings.Join(ex } // Upon import, notebooks are stripped of their extension. - return strings.TrimSuffix(remotePath, filepath.Ext(localFullPath)), nil + localRelPathNoExt := strings.TrimSuffix(localRelPath, path.Ext(localRelPath)) + return path.Join(t.remoteRoot, localRelPathNoExt), nil } -func (t *translateContext) translateFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) +func (t *translateContext) translateFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, localRelPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", filepath.FromSlash(localFullPath), err) } if nb { return "", ErrIsNotebook{localFullPath} } - return remotePath, nil + return path.Join(t.remoteRoot, localRelPath), nil } -func (t *translateContext) translateDirectoryPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) +func (t *translateContext) translateDirectoryPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + info, err := t.b.SyncRoot.Stat(localRelPath) if err != nil { return "", err } if !info.IsDir() { - return "", fmt.Errorf("%s is not a directory", localFullPath) + return "", fmt.Errorf("%s is not a directory", filepath.FromSlash(localFullPath)) } - return remotePath, nil + return path.Join(t.remoteRoot, localRelPath), nil } -func (t *translateContext) translateNoOp(literal, localFullPath, localRelPath, remotePath string) (string, error) { - return localRelPath, nil -} - -func (t *translateContext) retainLocalAbsoluteFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) +func (t *translateContext) translateLocalAbsoluteFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + info, err := t.b.SyncRoot.Stat(localRelPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is a file: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is a file: %w", filepath.FromSlash(localFullPath), err) } if info.IsDir() { return "", fmt.Errorf("expected %s to be a file but found a directory", literal) @@ -208,16 +265,33 @@ func (t *translateContext) retainLocalAbsoluteFilePath(literal, localFullPath, l return localFullPath, nil } -func (t *translateContext) translateNoOpWithPrefix(literal, localFullPath, localRelPath, remotePath string) (string, error) { +func (t *translateContext) translateLocalAbsoluteDirectoryPath(ctx context.Context, literal, localFullPath, _ string) (string, error) { + info, err := os.Stat(filepath.FromSlash(localFullPath)) + if errors.Is(err, fs.ErrNotExist) { + return "", fmt.Errorf("directory %s not found", literal) + } + if err != nil { + return "", fmt.Errorf("unable to determine if %s is a directory: %w", filepath.FromSlash(localFullPath), err) + } + if !info.IsDir() { + return "", fmt.Errorf("expected %s to be a directory but found a file", literal) + } + return localFullPath, nil +} + +func (t *translateContext) translateLocalRelativePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + return localRelPath, nil +} + +func (t *translateContext) translateLocalRelativeWithPrefixPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { if !strings.HasPrefix(localRelPath, ".") { - localRelPath = "." + string(filepath.Separator) + localRelPath + localRelPath = "./" + localRelPath } return localRelPath, nil } -func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc, dir string) (dyn.Value, error) { - out := v.MustString() - err := t.rewritePath(dir, &out, fn) +func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.Value, dir string, opts translateOptions) (dyn.Value, error) { + out, err := t.rewritePath(ctx, dir, v.MustString(), opts) if err != nil { if target := (&ErrIsNotebook{}); errors.As(err, target) { return dyn.InvalidValue, fmt.Errorf(`expected a file for "%s" but got a notebook: %w`, p, target) @@ -228,42 +302,38 @@ func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc, return dyn.InvalidValue, err } + // If the path was not rewritten, return the original value. + if out == "" { + return v, nil + } + return dyn.NewValue(out, v.Locations()), nil } -func (t *translateContext) rewriteRelativeTo(p dyn.Path, v dyn.Value, fn rewriteFunc, dir, fallback string) (dyn.Value, error) { - nv, err := t.rewriteValue(p, v, fn, dir) - if err == nil { - return nv, nil - } - - // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. - if fallback != "" { - nv, nerr := t.rewriteValue(p, v, fn, fallback) - if nerr == nil { - // TODO: Emit a warning that this path should be rewritten. - return nv, nil - } - } - - return dyn.InvalidValue, err -} - -func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { +func (m *translatePaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { t := &translateContext{ b: b, seen: make(map[string]string), } + // Set the remote root to the sync root if source-linked deployment is enabled. + // Otherwise, set it to the workspace file path. + if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { + t.remoteRoot = t.b.SyncRootPath + } else { + t.remoteRoot = t.b.Config.Workspace.FilePath + } + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { var err error - for _, fn := range []func(dyn.Value) (dyn.Value, error){ + for _, fn := range []func(context.Context, dyn.Value) (dyn.Value, error){ t.applyJobTranslations, t.applyPipelineTranslations, t.applyArtifactTranslations, t.applyDashboardTranslations, + t.applyAppsTranslations, } { - v, err = fn(v) + v, err = fn(ctx, v) if err != nil { return dyn.InvalidValue, err } @@ -274,6 +344,8 @@ func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnos return diag.FromErr(err) } +// gatherFallbackPaths collects the fallback paths for relative paths in the configuration. +// Read more about the motivation for this functionality in the "fallback" path translation tests. func gatherFallbackPaths(v dyn.Value, typ string) (map[string]string, error) { fallback := make(map[string]string) pattern := dyn.NewPattern(dyn.Key("resources"), dyn.Key(typ), dyn.AnyKey()) diff --git a/bundle/config/mutator/translate_paths_apps.go b/bundle/config/mutator/translate_paths_apps.go new file mode 100644 index 000000000..6117ee43f --- /dev/null +++ b/bundle/config/mutator/translate_paths_apps.go @@ -0,0 +1,33 @@ +package mutator + +import ( + "context" + "fmt" + + "github.com/databricks/cli/libs/dyn" +) + +func (t *translateContext) applyAppsTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { + // Convert the `source_code_path` field to a remote absolute path. + // We use this path for app deployment to point to the source code. + pattern := dyn.NewPattern( + dyn.Key("resources"), + dyn.Key("apps"), + dyn.AnyKey(), + dyn.Key("source_code_path"), + ) + + opts := translateOptions{ + Mode: TranslateModeDirectory, + } + + return dyn.MapByPattern(v, pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + key := p[2].Key() + dir, err := v.Location().Directory() + if err != nil { + return dyn.InvalidValue, fmt.Errorf("unable to determine directory for app %s: %w", key, err) + } + + return t.rewriteValue(ctx, p, v, dir, opts) + }) +} diff --git a/bundle/config/mutator/translate_paths_apps_test.go b/bundle/config/mutator/translate_paths_apps_test.go new file mode 100644 index 000000000..5692934b8 --- /dev/null +++ b/bundle/config/mutator/translate_paths_apps_test.go @@ -0,0 +1,57 @@ +package mutator_test + +import ( + "context" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTranslatePathsApps_FilePathRelativeSubDirectory(t *testing.T) { + dir := t.TempDir() + touchEmptyFile(t, filepath.Join(dir, "src", "app", "app.py")) + + b := &bundle.Bundle{ + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), + Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/bundle/files", + }, + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "app": { + App: &apps.App{ + Name: "My App", + }, + SourceCodePath: "../src/app", + }, + }, + }, + }, + } + + bundletest.SetLocation(b, "resources.apps", []dyn.Location{{ + File: filepath.Join(dir, "resources/app.yml"), + }}) + + diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) + require.NoError(t, diags.Error()) + + // Assert that the file path for the app has been converted to its local absolute path. + assert.Equal( + t, + "/bundle/files/src/app", + b.Config.Resources.Apps["app"].SourceCodePath, + ) +} diff --git a/bundle/config/mutator/translate_paths_artifacts.go b/bundle/config/mutator/translate_paths_artifacts.go index 921c00c73..8e864073f 100644 --- a/bundle/config/mutator/translate_paths_artifacts.go +++ b/bundle/config/mutator/translate_paths_artifacts.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" @@ -8,7 +9,7 @@ import ( type artifactRewritePattern struct { pattern dyn.Pattern - fn rewriteFunc + opts translateOptions } func (t *translateContext) artifactRewritePatterns() []artifactRewritePattern { @@ -22,12 +23,18 @@ func (t *translateContext) artifactRewritePatterns() []artifactRewritePattern { return []artifactRewritePattern{ { base.Append(dyn.Key("path")), - t.translateNoOp, + translateOptions{ + Mode: TranslateModeLocalAbsoluteDirectory, + + // Artifact paths may be outside the sync root. + // They are the working directory for artifact builds. + AllowPathOutsideSyncRoot: true, + }, }, } } -func (t *translateContext) applyArtifactTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyArtifactTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error for _, rewritePattern := range t.artifactRewritePatterns() { @@ -38,7 +45,7 @@ func (t *translateContext) applyArtifactTranslations(v dyn.Value) (dyn.Value, er return dyn.InvalidValue, fmt.Errorf("unable to determine directory for artifact %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, "") + return t.rewriteValue(ctx, p, v, dir, rewritePattern.opts) }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/translate_paths_artifacts_test.go b/bundle/config/mutator/translate_paths_artifacts_test.go new file mode 100644 index 000000000..0d1af6156 --- /dev/null +++ b/bundle/config/mutator/translate_paths_artifacts_test.go @@ -0,0 +1,83 @@ +package mutator_test + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTranslatePathsArtifacts_InsideSyncRoot(t *testing.T) { + tmp := t.TempDir() + dir := filepath.Join(tmp, "bundle") + lib := filepath.Join(dir, "my_lib") + _ = os.MkdirAll(lib, 0o755) + _ = os.MkdirAll(dir, 0o755) + + b := &bundle.Bundle{ + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), + Config: config.Root{ + Artifacts: map[string]*config.Artifact{ + "my_artifact": { + Type: "wheel", + + // Assume this is defined in a subdir to the sync root. + Path: "../my_lib", + }, + }, + }, + } + + bundletest.SetLocation(b, "artifacts", []dyn.Location{{ + File: filepath.Join(dir, "config/artifacts.yml"), + }}) + + diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) + require.NoError(t, diags.Error()) + + // Assert that the artifact path has been converted to a local absolute path. + assert.Equal(t, filepath.ToSlash(lib), b.Config.Artifacts["my_artifact"].Path) +} + +func TestTranslatePathsArtifacts_OutsideSyncRoot(t *testing.T) { + tmp := t.TempDir() + lib := filepath.Join(tmp, "my_lib") + dir := filepath.Join(tmp, "bundle") + _ = os.MkdirAll(lib, 0o755) + _ = os.MkdirAll(dir, 0o755) + + b := &bundle.Bundle{ + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), + Config: config.Root{ + Artifacts: map[string]*config.Artifact{ + "my_artifact": { + Type: "wheel", + + // Assume this is defined in a subdir of the bundle root. + Path: "../../my_lib", + }, + }, + }, + } + + bundletest.SetLocation(b, "artifacts", []dyn.Location{{ + File: filepath.Join(dir, "config/artifacts.yml"), + }}) + + diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) + require.NoError(t, diags.Error()) + + // Assert that the artifact path has been converted to a local absolute path. + assert.Equal(t, filepath.ToSlash(lib), b.Config.Artifacts["my_artifact"].Path) +} diff --git a/bundle/config/mutator/translate_paths_dashboards.go b/bundle/config/mutator/translate_paths_dashboards.go index 93822a599..18c4c12e2 100644 --- a/bundle/config/mutator/translate_paths_dashboards.go +++ b/bundle/config/mutator/translate_paths_dashboards.go @@ -1,12 +1,13 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" ) -func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyDashboardTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { // Convert the `file_path` field to a local absolute path. // We load the file at this path and use its contents for the dashboard contents. pattern := dyn.NewPattern( @@ -16,6 +17,10 @@ func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, e dyn.Key("file_path"), ) + opts := translateOptions{ + Mode: TranslateModeLocalAbsoluteFile, + } + return dyn.MapByPattern(v, pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { key := p[2].Key() dir, err := v.Location().Directory() @@ -23,6 +28,6 @@ func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, e return dyn.InvalidValue, fmt.Errorf("unable to determine directory for dashboard %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, t.retainLocalAbsoluteFilePath, dir, "") + return t.rewriteValue(ctx, p, v, dir, opts) }) } diff --git a/bundle/config/mutator/translate_paths_dashboards_test.go b/bundle/config/mutator/translate_paths_dashboards_test.go index 5e4e69f5d..02fba92e0 100644 --- a/bundle/config/mutator/translate_paths_dashboards_test.go +++ b/bundle/config/mutator/translate_paths_dashboards_test.go @@ -48,7 +48,7 @@ func TestTranslatePathsDashboards_FilePathRelativeSubDirectory(t *testing.T) { // Assert that the file path for the dashboard has been converted to its local absolute path. assert.Equal( t, - filepath.Join(dir, "src", "my_dashboard.lvdash.json"), + filepath.ToSlash(filepath.Join(dir, "src", "my_dashboard.lvdash.json")), b.Config.Resources.Dashboards["dashboard"].FilePath, ) } diff --git a/bundle/config/mutator/translate_paths_jobs.go b/bundle/config/mutator/translate_paths_jobs.go index c29ff0ea9..148ed4466 100644 --- a/bundle/config/mutator/translate_paths_jobs.go +++ b/bundle/config/mutator/translate_paths_jobs.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "slices" @@ -9,7 +10,7 @@ import ( "github.com/databricks/cli/libs/dyn" ) -func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyJobTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error fallback, err := gatherFallbackPaths(v, "jobs") @@ -38,28 +39,48 @@ func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) return dyn.InvalidValue, fmt.Errorf("unable to determine directory for job %s: %w", key, err) } - rewritePatternFn, err := t.getRewritePatternFn(kind) + mode, err := getJobTranslateMode(kind) if err != nil { return dyn.InvalidValue, err } - return t.rewriteRelativeTo(p, v, rewritePatternFn, dir, fallback[key]) + opts := translateOptions{ + Mode: mode, + } + + // Try to rewrite the path relative to the directory of the configuration file where the value was defined. + nv, err := t.rewriteValue(ctx, p, v, dir, opts) + if err == nil { + return nv, nil + } + + // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. + // We only do this for jobs and pipelines because of the comment in [gatherFallbackPaths]. + if fallback[key] != "" { + nv, nerr := t.rewriteValue(ctx, p, v, fallback[key], opts) + if nerr == nil { + // TODO: Emit a warning that this path should be rewritten. + return nv, nil + } + } + + return dyn.InvalidValue, err }) } -func (t *translateContext) getRewritePatternFn(kind paths.PathKind) (rewriteFunc, error) { +func getJobTranslateMode(kind paths.PathKind) (TranslateMode, error) { switch kind { case paths.PathKindLibrary: - return t.translateNoOp, nil + return TranslateModeLocalRelative, nil case paths.PathKindNotebook: - return t.translateNotebookPath, nil + return TranslateModeNotebook, nil case paths.PathKindWorkspaceFile: - return t.translateFilePath, nil + return TranslateModeFile, nil case paths.PathKindDirectory: - return t.translateDirectoryPath, nil + return TranslateModeDirectory, nil case paths.PathKindWithPrefix: - return t.translateNoOpWithPrefix, nil + return TranslateModeLocalRelativeWithPrefix, nil } - return nil, fmt.Errorf("unsupported path kind: %d", kind) + return TranslateMode(0), fmt.Errorf("unsupported path kind: %d", kind) } diff --git a/bundle/config/mutator/translate_paths_pipelines.go b/bundle/config/mutator/translate_paths_pipelines.go index 71a65e846..204808ff5 100644 --- a/bundle/config/mutator/translate_paths_pipelines.go +++ b/bundle/config/mutator/translate_paths_pipelines.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" @@ -8,7 +9,7 @@ import ( type pipelineRewritePattern struct { pattern dyn.Pattern - fn rewriteFunc + opts translateOptions } func (t *translateContext) pipelineRewritePatterns() []pipelineRewritePattern { @@ -25,16 +26,16 @@ func (t *translateContext) pipelineRewritePatterns() []pipelineRewritePattern { return []pipelineRewritePattern{ { base.Append(dyn.Key("notebook"), dyn.Key("path")), - t.translateNotebookPath, + translateOptions{Mode: TranslateModeNotebook}, }, { base.Append(dyn.Key("file"), dyn.Key("path")), - t.translateFilePath, + translateOptions{Mode: TranslateModeFile}, }, } } -func (t *translateContext) applyPipelineTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyPipelineTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error fallback, err := gatherFallbackPaths(v, "pipelines") @@ -50,7 +51,23 @@ func (t *translateContext) applyPipelineTranslations(v dyn.Value) (dyn.Value, er return dyn.InvalidValue, fmt.Errorf("unable to determine directory for pipeline %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, fallback[key]) + // Try to rewrite the path relative to the directory of the configuration file where the value was defined. + nv, err := t.rewriteValue(ctx, p, v, dir, rewritePattern.opts) + if err == nil { + return nv, nil + } + + // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. + // We only do this for jobs and pipelines because of the comment in [gatherFallbackPaths]. + if fallback[key] != "" { + nv, nerr := t.rewriteValue(ctx, p, v, fallback[key], rewritePattern.opts) + if nerr == nil { + // TODO: Emit a warning that this path should be rewritten. + return nv, nil + } + } + + return dyn.InvalidValue, err }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 493abb8c5..aa6488ab0 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -6,7 +6,6 @@ import ( "os" "path/filepath" "runtime" - "strings" "testing" "github.com/databricks/cli/bundle" @@ -226,7 +225,7 @@ func TestTranslatePaths(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.whl"), + "dist/task.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) assert.Equal( @@ -251,7 +250,7 @@ func TestTranslatePaths(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.jar"), + "dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[5].Libraries[0].Jar, ) assert.Equal( @@ -362,7 +361,7 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { ) assert.Equal( t, - filepath.Join("job", "dist", "task.jar"), + "job/dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[1].Libraries[0].Jar, ) assert.Equal( @@ -774,8 +773,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) - assert.Equal(t, strings.Join([]string{".", "job", "dist", "env1.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0]) - assert.Equal(t, strings.Join([]string{".", "dist", "env2.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) + assert.Equal(t, "./job/dist/env1.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0]) + assert.Equal(t, "./dist/env2.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) assert.Equal(t, "simplejson", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[2]) assert.Equal(t, "/Workspace/Users/foo@bar.com/test.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[3]) assert.Equal(t, "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[4]) @@ -839,7 +838,7 @@ func TestTranslatePathWithComplexVariables(t *testing.T) { assert.Equal( t, - filepath.Join("variables", "local", "whl.whl"), + "variables/local/whl.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) } @@ -952,34 +951,34 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { // updated to source path assert.Equal( t, - filepath.Join(dir, "my_job_notebook"), + dir+"/my_job_notebook", b.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath, ) assert.Equal( t, - filepath.Join(dir, "requirements.txt"), + dir+"/requirements.txt", b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements, ) assert.Equal( t, - filepath.Join(dir, "my_python_file.py"), + dir+"/my_python_file.py", b.Config.Resources.Jobs["job"].Tasks[3].SparkPythonTask.PythonFile, ) assert.Equal( t, - filepath.Join(dir, "my_pipeline_notebook"), + dir+"/my_pipeline_notebook", b.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path, ) assert.Equal( t, - filepath.Join(dir, "my_python_file.py"), + dir+"/my_python_file.py", b.Config.Resources.Pipelines["pipeline"].Libraries[2].File.Path, ) // left as is assert.Equal( t, - filepath.Join("dist", "task.whl"), + "dist/task.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) assert.Equal( @@ -989,7 +988,7 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.jar"), + "dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[4].Libraries[0].Jar, ) assert.Equal( diff --git a/bundle/config/resources.go b/bundle/config/resources.go index 13cf0d462..1f523fed3 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -23,6 +23,7 @@ type Resources struct { Volumes map[string]*resources.Volume `json:"volumes,omitempty"` Clusters map[string]*resources.Cluster `json:"clusters,omitempty"` Dashboards map[string]*resources.Dashboard `json:"dashboards,omitempty"` + Apps map[string]*resources.App `json:"apps,omitempty"` } type ConfigResource interface { @@ -87,6 +88,7 @@ func (r *Resources) AllResources() []ResourceGroup { collectResourceMap(descriptions["clusters"], r.Clusters), collectResourceMap(descriptions["dashboards"], r.Dashboards), collectResourceMap(descriptions["volumes"], r.Volumes), + collectResourceMap(descriptions["apps"], r.Apps), } } @@ -97,12 +99,19 @@ func (r *Resources) FindResourceByConfigKey(key string) (ConfigResource, error) found = append(found, r.Jobs[k]) } } + for k := range r.Pipelines { if k == key { found = append(found, r.Pipelines[k]) } } + for k := range r.Apps { + if k == key { + found = append(found, r.Apps[k]) + } + } + if len(found) == 0 { return nil, fmt.Errorf("no such resource: %s", key) } @@ -126,76 +135,96 @@ type ResourceDescription struct { // Singular and plural title when used in summaries / terminal UI. SingularTitle string PluralTitle string + + TerraformResourceName string } // The keys of the map corresponds to the resource key in the bundle configuration. func SupportedResources() map[string]ResourceDescription { return map[string]ResourceDescription{ "jobs": { - SingularName: "job", - PluralName: "jobs", - SingularTitle: "Job", - PluralTitle: "Jobs", + SingularName: "job", + PluralName: "jobs", + SingularTitle: "Job", + PluralTitle: "Jobs", + TerraformResourceName: "databricks_job", }, "pipelines": { - SingularName: "pipeline", - PluralName: "pipelines", - SingularTitle: "Pipeline", - PluralTitle: "Pipelines", + SingularName: "pipeline", + PluralName: "pipelines", + SingularTitle: "Pipeline", + PluralTitle: "Pipelines", + TerraformResourceName: "databricks_pipeline", }, "models": { - SingularName: "model", - PluralName: "models", - SingularTitle: "Model", - PluralTitle: "Models", + SingularName: "model", + PluralName: "models", + SingularTitle: "Model", + PluralTitle: "Models", + TerraformResourceName: "databricks_mlflow_model", }, "experiments": { - SingularName: "experiment", - PluralName: "experiments", - SingularTitle: "Experiment", - PluralTitle: "Experiments", + SingularName: "experiment", + PluralName: "experiments", + SingularTitle: "Experiment", + PluralTitle: "Experiments", + TerraformResourceName: "databricks_mlflow_experiment", }, "model_serving_endpoints": { - SingularName: "model_serving_endpoint", - PluralName: "model_serving_endpoints", - SingularTitle: "Model Serving Endpoint", - PluralTitle: "Model Serving Endpoints", + SingularName: "model_serving_endpoint", + PluralName: "model_serving_endpoints", + SingularTitle: "Model Serving Endpoint", + PluralTitle: "Model Serving Endpoints", + TerraformResourceName: "databricks_model_serving_endpoint", }, "registered_models": { - SingularName: "registered_model", - PluralName: "registered_models", - SingularTitle: "Registered Model", - PluralTitle: "Registered Models", + SingularName: "registered_model", + PluralName: "registered_models", + SingularTitle: "Registered Model", + PluralTitle: "Registered Models", + TerraformResourceName: "databricks_registered_model", }, "quality_monitors": { - SingularName: "quality_monitor", - PluralName: "quality_monitors", - SingularTitle: "Quality Monitor", - PluralTitle: "Quality Monitors", + SingularName: "quality_monitor", + PluralName: "quality_monitors", + SingularTitle: "Quality Monitor", + PluralTitle: "Quality Monitors", + TerraformResourceName: "databricks_quality_monitor", }, "schemas": { - SingularName: "schema", - PluralName: "schemas", - SingularTitle: "Schema", - PluralTitle: "Schemas", + SingularName: "schema", + PluralName: "schemas", + SingularTitle: "Schema", + PluralTitle: "Schemas", + TerraformResourceName: "databricks_schema", }, "clusters": { - SingularName: "cluster", - PluralName: "clusters", - SingularTitle: "Cluster", - PluralTitle: "Clusters", + SingularName: "cluster", + PluralName: "clusters", + SingularTitle: "Cluster", + PluralTitle: "Clusters", + TerraformResourceName: "databricks_cluster", }, "dashboards": { - SingularName: "dashboard", - PluralName: "dashboards", - SingularTitle: "Dashboard", - PluralTitle: "Dashboards", + SingularName: "dashboard", + PluralName: "dashboards", + SingularTitle: "Dashboard", + PluralTitle: "Dashboards", + TerraformResourceName: "databricks_dashboard", }, "volumes": { - SingularName: "volume", - PluralName: "volumes", - SingularTitle: "Volume", - PluralTitle: "Volumes", + SingularName: "volume", + PluralName: "volumes", + SingularTitle: "Volume", + PluralTitle: "Volumes", + TerraformResourceName: "databricks_volume", + }, + "apps": { + SingularName: "app", + PluralName: "apps", + SingularTitle: "App", + PluralTitle: "Apps", + TerraformResourceName: "databricks_app", }, } } diff --git a/bundle/config/resources/apps.go b/bundle/config/resources/apps.go new file mode 100644 index 000000000..809e04896 --- /dev/null +++ b/bundle/config/resources/apps.go @@ -0,0 +1,70 @@ +package resources + +import ( + "context" + "net/url" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/marshal" + "github.com/databricks/databricks-sdk-go/service/apps" +) + +type App struct { + // SourceCodePath is a required field used by DABs to point to Databricks app source code + // on local disk and to the corresponding workspace path during app deployment. + SourceCodePath string `json:"source_code_path"` + + // Config is an optional field which allows configuring the app following Databricks app configuration format like in app.yml. + // When this field is set, DABs read the configuration set in this field and write + // it to app.yml in the root of the source code folder in Databricks workspace. + // If there’s app.yml defined locally, DABs will raise an error. + Config map[string]any `json:"config,omitempty"` + + Permissions []Permission `json:"permissions,omitempty"` + ModifiedStatus ModifiedStatus `json:"modified_status,omitempty" bundle:"internal"` + URL string `json:"url,omitempty" bundle:"internal"` + + *apps.App +} + +func (a *App) UnmarshalJSON(b []byte) error { + return marshal.Unmarshal(b, a) +} + +func (a App) MarshalJSON() ([]byte, error) { + return marshal.Marshal(a) +} + +func (a *App) Exists(ctx context.Context, w *databricks.WorkspaceClient, name string) (bool, error) { + _, err := w.Apps.GetByName(ctx, name) + if err != nil { + log.Debugf(ctx, "app %s does not exist", name) + return false, err + } + return true, nil +} + +func (a *App) TerraformResourceName() string { + return "databricks_app" +} + +func (a *App) InitializeURL(baseURL url.URL) { + if a.ModifiedStatus == "" || a.ModifiedStatus == ModifiedStatusCreated { + return + } + baseURL.Path = "apps/" + a.Name + a.URL = baseURL.String() +} + +func (a *App) GetName() string { + return a.Name +} + +func (a *App) GetURL() string { + return a.URL +} + +func (a *App) IsNil() bool { + return a.App == nil +} diff --git a/bundle/config/resources/clusters.go b/bundle/config/resources/clusters.go index ba991e865..073f40a79 100644 --- a/bundle/config/resources/clusters.go +++ b/bundle/config/resources/clusters.go @@ -2,7 +2,6 @@ package resources import ( "context" - "fmt" "net/url" "github.com/databricks/cli/libs/log" @@ -45,7 +44,7 @@ func (s *Cluster) InitializeURL(baseURL url.URL) { if s.ID == "" { return } - baseURL.Path = fmt.Sprintf("compute/clusters/%s", s.ID) + baseURL.Path = "compute/clusters/" + s.ID s.URL = baseURL.String() } diff --git a/bundle/config/resources/job.go b/bundle/config/resources/job.go index 0aa41b2e8..76de78439 100644 --- a/bundle/config/resources/job.go +++ b/bundle/config/resources/job.go @@ -2,7 +2,6 @@ package resources import ( "context" - "fmt" "net/url" "strconv" @@ -52,7 +51,7 @@ func (j *Job) InitializeURL(baseURL url.URL) { if j.ID == "" { return } - baseURL.Path = fmt.Sprintf("jobs/%s", j.ID) + baseURL.Path = "jobs/" + j.ID j.URL = baseURL.String() } diff --git a/bundle/config/resources/mlflow_experiment.go b/bundle/config/resources/mlflow_experiment.go index 5d179ec0f..ea18ce114 100644 --- a/bundle/config/resources/mlflow_experiment.go +++ b/bundle/config/resources/mlflow_experiment.go @@ -2,7 +2,6 @@ package resources import ( "context" - "fmt" "net/url" "github.com/databricks/cli/libs/log" @@ -47,7 +46,7 @@ func (s *MlflowExperiment) InitializeURL(baseURL url.URL) { if s.ID == "" { return } - baseURL.Path = fmt.Sprintf("ml/experiments/%s", s.ID) + baseURL.Path = "ml/experiments/" + s.ID s.URL = baseURL.String() } diff --git a/bundle/config/resources/mlflow_model.go b/bundle/config/resources/mlflow_model.go index 72376f45d..69ae2d438 100644 --- a/bundle/config/resources/mlflow_model.go +++ b/bundle/config/resources/mlflow_model.go @@ -2,7 +2,6 @@ package resources import ( "context" - "fmt" "net/url" "github.com/databricks/cli/libs/log" @@ -47,7 +46,7 @@ func (s *MlflowModel) InitializeURL(baseURL url.URL) { if s.ID == "" { return } - baseURL.Path = fmt.Sprintf("ml/models/%s", s.ID) + baseURL.Path = "ml/models/" + s.ID s.URL = baseURL.String() } diff --git a/bundle/config/resources/model_serving_endpoint.go b/bundle/config/resources/model_serving_endpoint.go index a3c472b3f..8b1394d86 100644 --- a/bundle/config/resources/model_serving_endpoint.go +++ b/bundle/config/resources/model_serving_endpoint.go @@ -2,7 +2,6 @@ package resources import ( "context" - "fmt" "net/url" "github.com/databricks/cli/libs/log" @@ -55,7 +54,7 @@ func (s *ModelServingEndpoint) InitializeURL(baseURL url.URL) { if s.ID == "" { return } - baseURL.Path = fmt.Sprintf("ml/endpoints/%s", s.ID) + baseURL.Path = "ml/endpoints/" + s.ID s.URL = baseURL.String() } diff --git a/bundle/config/resources/permission.go b/bundle/config/resources/permission.go index 62e18a09e..fa1568601 100644 --- a/bundle/config/resources/permission.go +++ b/bundle/config/resources/permission.go @@ -25,5 +25,5 @@ func (p Permission) String() string { return fmt.Sprintf("level: %s, group_name: %s", p.Level, p.GroupName) } - return fmt.Sprintf("level: %s", p.Level) + return "level: " + p.Level } diff --git a/bundle/config/resources/pipeline.go b/bundle/config/resources/pipeline.go index eaa4c5368..5127d07ba 100644 --- a/bundle/config/resources/pipeline.go +++ b/bundle/config/resources/pipeline.go @@ -2,7 +2,6 @@ package resources import ( "context" - "fmt" "net/url" "github.com/databricks/cli/libs/log" @@ -47,7 +46,7 @@ func (p *Pipeline) InitializeURL(baseURL url.URL) { if p.ID == "" { return } - baseURL.Path = fmt.Sprintf("pipelines/%s", p.ID) + baseURL.Path = "pipelines/" + p.ID p.URL = baseURL.String() } diff --git a/bundle/config/resources/quality_monitor.go b/bundle/config/resources/quality_monitor.go index b1d7e08a5..88bc0a3e7 100644 --- a/bundle/config/resources/quality_monitor.go +++ b/bundle/config/resources/quality_monitor.go @@ -2,7 +2,6 @@ package resources import ( "context" - "fmt" "net/url" "strings" @@ -51,7 +50,7 @@ func (s *QualityMonitor) InitializeURL(baseURL url.URL) { if s.TableName == "" { return } - baseURL.Path = fmt.Sprintf("explore/data/%s", strings.ReplaceAll(s.TableName, ".", "/")) + baseURL.Path = "explore/data/" + strings.ReplaceAll(s.TableName, ".", "/") s.URL = baseURL.String() } diff --git a/bundle/config/resources/registered_model.go b/bundle/config/resources/registered_model.go index 8513a79ae..006eef773 100644 --- a/bundle/config/resources/registered_model.go +++ b/bundle/config/resources/registered_model.go @@ -2,7 +2,6 @@ package resources import ( "context" - "fmt" "net/url" "strings" @@ -57,7 +56,7 @@ func (s *RegisteredModel) InitializeURL(baseURL url.URL) { if s.ID == "" { return } - baseURL.Path = fmt.Sprintf("explore/data/models/%s", strings.ReplaceAll(s.ID, ".", "/")) + baseURL.Path = "explore/data/models/" + strings.ReplaceAll(s.ID, ".", "/") s.URL = baseURL.String() } diff --git a/bundle/config/resources/schema.go b/bundle/config/resources/schema.go index 8eadd7e46..b638907ac 100644 --- a/bundle/config/resources/schema.go +++ b/bundle/config/resources/schema.go @@ -2,7 +2,7 @@ package resources import ( "context" - "fmt" + "errors" "net/url" "strings" @@ -26,7 +26,7 @@ type Schema struct { } func (s *Schema) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { - return false, fmt.Errorf("schema.Exists() is not supported") + return false, errors.New("schema.Exists() is not supported") } func (s *Schema) TerraformResourceName() string { @@ -37,7 +37,7 @@ func (s *Schema) InitializeURL(baseURL url.URL) { if s.ID == "" { return } - baseURL.Path = fmt.Sprintf("explore/data/%s", strings.ReplaceAll(s.ID, ".", "/")) + baseURL.Path = "explore/data/" + strings.ReplaceAll(s.ID, ".", "/") s.URL = baseURL.String() } diff --git a/bundle/config/resources/volume.go b/bundle/config/resources/volume.go index cae2a3463..882b7107d 100644 --- a/bundle/config/resources/volume.go +++ b/bundle/config/resources/volume.go @@ -2,7 +2,7 @@ package resources import ( "context" - "fmt" + "errors" "net/url" "strings" @@ -34,7 +34,7 @@ func (v Volume) MarshalJSON() ([]byte, error) { } func (v *Volume) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { - return false, fmt.Errorf("volume.Exists() is not supported") + return false, errors.New("volume.Exists() is not supported") } func (v *Volume) TerraformResourceName() string { @@ -45,7 +45,7 @@ func (v *Volume) InitializeURL(baseURL url.URL) { if v.ID == "" { return } - baseURL.Path = fmt.Sprintf("explore/data/volumes/%s", strings.ReplaceAll(v.ID, ".", "/")) + baseURL.Path = "explore/data/volumes/" + strings.ReplaceAll(v.ID, ".", "/") v.URL = baseURL.String() } diff --git a/bundle/config/resources_test.go b/bundle/config/resources_test.go index 2d05acf3e..cbbcf5e27 100644 --- a/bundle/config/resources_test.go +++ b/bundle/config/resources_test.go @@ -33,15 +33,15 @@ func TestCustomMarshallerIsImplemented(t *testing.T) { r := Resources{} rt := reflect.TypeOf(r) - for i := 0; i < rt.NumField(); i++ { + for i := range rt.NumField() { field := rt.Field(i) // Fields in Resources are expected be of the form map[string]*resourceStruct - assert.Equal(t, field.Type.Kind(), reflect.Map, "Resource %s is not a map", field.Name) + assert.Equal(t, reflect.Map, field.Type.Kind(), "Resource %s is not a map", field.Name) kt := field.Type.Key() - assert.Equal(t, kt.Kind(), reflect.String, "Resource %s is not a map with string keys", field.Name) + assert.Equal(t, reflect.String, kt.Kind(), "Resource %s is not a map with string keys", field.Name) vt := field.Type.Elem() - assert.Equal(t, vt.Kind(), reflect.Ptr, "Resource %s is not a map with pointer values", field.Name) + assert.Equal(t, reflect.Ptr, vt.Kind(), "Resource %s is not a map with pointer values", field.Name) // Marshalling a resourceStruct will panic if resourceStruct does not have a custom marshaller // This is because resourceStruct embeds a Go SDK struct that implements @@ -75,7 +75,7 @@ func TestResourcesAllResourcesCompleteness(t *testing.T) { types = append(types, group.Description.PluralName) } - for i := 0; i < rt.NumField(); i++ { + for i := range rt.NumField() { field := rt.Field(i) jsonTag := field.Tag.Get("json") @@ -92,7 +92,7 @@ func TestSupportedResources(t *testing.T) { actual := SupportedResources() typ := reflect.TypeOf(Resources{}) - for i := 0; i < typ.NumField(); i++ { + for i := range typ.NumField() { field := typ.Field(i) jsonTags := strings.Split(field.Tag.Get("json"), ",") pluralName := jsonTags[0] diff --git a/bundle/config/root.go b/bundle/config/root.go index 4b1467456..b974bcec5 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -47,8 +47,8 @@ type Root struct { // Targets can be used to differentiate settings and resources between // bundle deployment targets (e.g. development, staging, production). - // If not specified, the code below initializes this field with a - // single default-initialized target called "default". + // Note that this field is set to 'nil' by the SelectTarget mutator; + // use bundle.Bundle.Target to access the selected target configuration. Targets map[string]*Target `json:"targets,omitempty"` // DEPRECATED. Left for backward compatibility with Targets @@ -102,7 +102,8 @@ func LoadFromBytes(path string, raw []byte) (*Root, diag.Diagnostics) { // Convert normalized configuration tree to typed configuration. err = r.updateWithDynamicValue(v) if err != nil { - return nil, diag.Errorf("failed to load %s: %v", path, err) + diags = diags.Extend(diag.Errorf("failed to load %s: %v", path, err)) + return nil, diags } return &r, diags } @@ -387,14 +388,6 @@ func (r *Root) MergeTargetOverrides(name string) error { return err } - // If the branch was overridden, we need to clear the inferred flag. - if branch := v.Get("branch"); branch.Kind() != dyn.KindInvalid { - out, err = dyn.SetByPath(out, dyn.NewPath(dyn.Key("inferred")), dyn.V(false)) - if err != nil { - return err - } - } - // Set the merged value. root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("git")), out) if err != nil { diff --git a/bundle/config/validate/fast_validate.go b/bundle/config/validate/fast_validate.go new file mode 100644 index 000000000..47d83036d --- /dev/null +++ b/bundle/config/validate/fast_validate.go @@ -0,0 +1,51 @@ +package validate + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +// FastValidate runs a subset of fast validation checks. This is a subset of the full +// suite of validation mutators that satisfy ANY ONE of the following criteria: +// +// 1. No file i/o or network requests are made in the mutator. +// 2. The validation is blocking for bundle deployments. +// +// The full suite of validation mutators is available in the [Validate] mutator. +type fastValidateReadonly struct{} + +func FastValidateReadonly() bundle.ReadOnlyMutator { + return &fastValidateReadonly{} +} + +func (f *fastValidateReadonly) Name() string { + return "fast_validate(readonly)" +} + +func (f *fastValidateReadonly) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics { + return bundle.ApplyReadOnly(ctx, rb, bundle.Parallel( + // Fast mutators with only in-memory checks + JobClusterKeyDefined(), + JobTaskClusterSpec(), + SingleNodeCluster(), + + // Blocking mutators. Deployments will fail if these checks fail. + ValidateArtifactPath(), + )) +} + +type fastValidate struct{} + +func FastValidate() bundle.Mutator { + return &fastValidate{} +} + +func (f *fastValidate) Name() string { + return "fast_validate" +} + +func (f *fastValidate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + return bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), FastValidateReadonly()) +} diff --git a/bundle/config/validate/files_to_sync_test.go b/bundle/config/validate/files_to_sync_test.go index d6a1ed59a..dd40295c3 100644 --- a/bundle/config/validate/files_to_sync_test.go +++ b/bundle/config/validate/files_to_sync_test.go @@ -87,7 +87,7 @@ func TestFilesToSync_EverythingIgnored(t *testing.T) { ctx := context.Background() rb := bundle.ReadOnly(b) diags := bundle.ApplyReadOnly(ctx, rb, FilesToSync()) - require.Equal(t, 1, len(diags)) + require.Len(t, diags, 1) assert.Equal(t, diag.Warning, diags[0].Severity) assert.Equal(t, "There are no files to sync, please check your .gitignore", diags[0].Summary) } @@ -101,7 +101,7 @@ func TestFilesToSync_EverythingExcluded(t *testing.T) { ctx := context.Background() rb := bundle.ReadOnly(b) diags := bundle.ApplyReadOnly(ctx, rb, FilesToSync()) - require.Equal(t, 1, len(diags)) + require.Len(t, diags, 1) assert.Equal(t, diag.Warning, diags[0].Severity) assert.Equal(t, "There are no files to sync, please check your .gitignore and sync.exclude configuration", diags[0].Summary) } diff --git a/bundle/config/validate/folder_permissions.go b/bundle/config/validate/folder_permissions.go index aa89a0551..7b12b4b16 100644 --- a/bundle/config/validate/folder_permissions.go +++ b/bundle/config/validate/folder_permissions.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "path" + "strconv" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/libraries" @@ -36,7 +37,8 @@ func (f *folderPermissions) Apply(ctx context.Context, b bundle.ReadOnlyBundle) } if err := g.Wait(); err != nil { - return diag.FromErr(err) + // Note, only diag from first coroutine is captured, others are lost + diags = diags.Extend(diag.FromErr(err)) } for _, r := range results { @@ -59,7 +61,7 @@ func checkFolderPermission(ctx context.Context, b bundle.ReadOnlyBundle, folderP } objPermissions, err := w.GetPermissions(ctx, workspace.GetWorkspaceObjectPermissionsRequest{ - WorkspaceObjectId: fmt.Sprint(obj.ObjectId), + WorkspaceObjectId: strconv.FormatInt(obj.ObjectId, 10), WorkspaceObjectType: "directories", }) if err != nil { diff --git a/bundle/config/validate/job_cluster_key_defined_test.go b/bundle/config/validate/job_cluster_key_defined_test.go index 176b0fedc..2cbdb7c6a 100644 --- a/bundle/config/validate/job_cluster_key_defined_test.go +++ b/bundle/config/validate/job_cluster_key_defined_test.go @@ -34,7 +34,7 @@ func TestJobClusterKeyDefined(t *testing.T) { } diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), JobClusterKeyDefined()) - require.Len(t, diags, 0) + require.Empty(t, diags) require.NoError(t, diags.Error()) } @@ -59,8 +59,8 @@ func TestJobClusterKeyNotDefined(t *testing.T) { diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), JobClusterKeyDefined()) require.Len(t, diags, 1) require.NoError(t, diags.Error()) - require.Equal(t, diags[0].Severity, diag.Warning) - require.Equal(t, diags[0].Summary, "job_cluster_key do-not-exist is not defined") + require.Equal(t, diag.Warning, diags[0].Severity) + require.Equal(t, "job_cluster_key do-not-exist is not defined", diags[0].Summary) } func TestJobClusterKeyDefinedInDifferentJob(t *testing.T) { @@ -92,6 +92,6 @@ func TestJobClusterKeyDefinedInDifferentJob(t *testing.T) { diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), JobClusterKeyDefined()) require.Len(t, diags, 1) require.NoError(t, diags.Error()) - require.Equal(t, diags[0].Severity, diag.Warning) - require.Equal(t, diags[0].Summary, "job_cluster_key do-not-exist is not defined") + require.Equal(t, diag.Warning, diags[0].Severity) + require.Equal(t, "job_cluster_key do-not-exist is not defined", diags[0].Summary) } diff --git a/bundle/config/validate/unique_resource_keys.go b/bundle/config/validate/unique_resource_keys.go index 50295375b..d80c5d632 100644 --- a/bundle/config/validate/unique_resource_keys.go +++ b/bundle/config/validate/unique_resource_keys.go @@ -2,7 +2,6 @@ package validate import ( "context" - "fmt" "sort" "github.com/databricks/cli/bundle" @@ -102,7 +101,7 @@ func (m *uniqueResourceKeys) Apply(ctx context.Context, b *bundle.Bundle) diag.D // If there are multiple resources with the same key, report an error. diags = append(diags, diag.Diagnostic{ Severity: diag.Error, - Summary: fmt.Sprintf("multiple resources have been defined with the same key: %s", k), + Summary: "multiple resources have been defined with the same key: " + k, Locations: v.locations, Paths: v.paths, }) diff --git a/bundle/config/validate/validate.go b/bundle/config/validate/validate.go index 131566fc9..8fdd704ab 100644 --- a/bundle/config/validate/validate.go +++ b/bundle/config/validate/validate.go @@ -30,12 +30,13 @@ func (l location) Path() dyn.Path { // Apply implements bundle.Mutator. func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), bundle.Parallel( - JobClusterKeyDefined(), + FastValidateReadonly(), + + // Slow mutators that require network or file i/o. These are only + // run in the `bundle validate` command. FilesToSync(), - ValidateSyncPatterns(), - JobTaskClusterSpec(), ValidateFolderPermissions(), - SingleNodeCluster(), + ValidateSyncPatterns(), )) } diff --git a/bundle/config/validate/validate_artifact_path.go b/bundle/config/validate/validate_artifact_path.go new file mode 100644 index 000000000..aa4492670 --- /dev/null +++ b/bundle/config/validate/validate_artifact_path.go @@ -0,0 +1,129 @@ +package validate + +import ( + "context" + "errors" + "fmt" + "slices" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/dynvar" + "github.com/databricks/databricks-sdk-go/apierr" +) + +type validateArtifactPath struct{} + +func ValidateArtifactPath() bundle.ReadOnlyMutator { + return &validateArtifactPath{} +} + +func (v *validateArtifactPath) Name() string { + return "validate:artifact_paths" +} + +func extractVolumeFromPath(artifactPath string) (string, string, string, error) { + if !libraries.IsVolumesPath(artifactPath) { + return "", "", "", fmt.Errorf("expected artifact_path to start with /Volumes/, got %s", artifactPath) + } + + parts := strings.Split(artifactPath, "/") + volumeFormatErr := fmt.Errorf("expected UC volume path to be in the format /Volumes////..., got %s", artifactPath) + + // Incorrect format. + if len(parts) < 5 { + return "", "", "", volumeFormatErr + } + + catalogName := parts[2] + schemaName := parts[3] + volumeName := parts[4] + + // Incorrect format. + if catalogName == "" || schemaName == "" || volumeName == "" { + return "", "", "", volumeFormatErr + } + + return catalogName, schemaName, volumeName, nil +} + +func findVolumeInBundle(r config.Root, catalogName, schemaName, volumeName string) (dyn.Path, []dyn.Location, bool) { + volumes := r.Resources.Volumes + for k, v := range volumes { + if v.CatalogName != catalogName || v.Name != volumeName { + continue + } + // UC schemas can be defined in the bundle itself, and thus might be interpolated + // at runtime via the ${resources.schemas.} syntax. Thus we match the volume + // definition if the schema name is the same as the one in the bundle, or if the + // schema name is interpolated. + // We only have to check for ${resources.schemas...} references because any + // other valid reference (like ${var.foo}) would have been interpolated by this point. + p, ok := dynvar.PureReferenceToPath(v.SchemaName) + isSchemaDefinedInBundle := ok && p.HasPrefix(dyn.Path{dyn.Key("resources"), dyn.Key("schemas")}) + if v.SchemaName != schemaName && !isSchemaDefinedInBundle { + continue + } + pathString := "resources.volumes." + k + return dyn.MustPathFromString(pathString), r.GetLocations(pathString), true + } + return nil, nil, false +} + +func (v *validateArtifactPath) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics { + // We only validate UC Volumes paths right now. + if !libraries.IsVolumesPath(rb.Config().Workspace.ArtifactPath) { + return nil + } + + wrapErrorMsg := func(s string) diag.Diagnostics { + return diag.Diagnostics{ + { + Summary: s, + Severity: diag.Error, + Locations: rb.Config().GetLocations("workspace.artifact_path"), + Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, + }, + } + } + + catalogName, schemaName, volumeName, err := extractVolumeFromPath(rb.Config().Workspace.ArtifactPath) + if err != nil { + return wrapErrorMsg(err.Error()) + } + volumeFullName := fmt.Sprintf("%s.%s.%s", catalogName, schemaName, volumeName) + w := rb.WorkspaceClient() + _, err = w.Volumes.ReadByName(ctx, volumeFullName) + + if errors.Is(err, apierr.ErrPermissionDenied) { + return wrapErrorMsg(fmt.Sprintf("cannot access volume %s: %s", volumeFullName, err)) + } + if errors.Is(err, apierr.ErrNotFound) { + path, locations, ok := findVolumeInBundle(rb.Config(), catalogName, schemaName, volumeName) + if !ok { + return wrapErrorMsg(fmt.Sprintf("volume %s does not exist", volumeFullName)) + } + + // If the volume is defined in the bundle, provide a more helpful error diagnostic, + // with more details and location information. + return diag.Diagnostics{{ + Summary: fmt.Sprintf("volume %s does not exist", volumeFullName), + Severity: diag.Error, + Detail: `You are using a volume in your artifact_path that is managed by +this bundle but which has not been deployed yet. Please first deploy +the volume using 'bundle deploy' and then switch over to using it in +the artifact_path.`, + Locations: slices.Concat(rb.Config().GetLocations("workspace.artifact_path"), locations), + Paths: append([]dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, path), + }} + + } + if err != nil { + return wrapErrorMsg(fmt.Sprintf("cannot read volume %s: %s", volumeFullName, err)) + } + return nil +} diff --git a/bundle/config/validate/validate_artifact_path_test.go b/bundle/config/validate/validate_artifact_path_test.go new file mode 100644 index 000000000..e1ae6af34 --- /dev/null +++ b/bundle/config/validate/validate_artifact_path_test.go @@ -0,0 +1,243 @@ +package validate + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestValidateArtifactPathWithVolumeInBundle(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + ArtifactPath: "/Volumes/catalogN/schemaN/volumeN/abc", + }, + Resources: config.Resources{ + Volumes: map[string]*resources.Volume{ + "foo": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalogN", + Name: "volumeN", + SchemaName: "schemaN", + }, + }, + }, + }, + }, + } + + bundletest.SetLocation(b, "workspace.artifact_path", []dyn.Location{{File: "file", Line: 1, Column: 1}}) + bundletest.SetLocation(b, "resources.volumes.foo", []dyn.Location{{File: "file", Line: 2, Column: 2}}) + + ctx := context.Background() + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockVolumesAPI() + api.EXPECT().ReadByName(mock.Anything, "catalogN.schemaN.volumeN").Return(nil, &apierr.APIError{ + StatusCode: 404, + }) + b.SetWorkpaceClient(m.WorkspaceClient) + + diags := bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), ValidateArtifactPath()) + assert.Equal(t, diag.Diagnostics{{ + Severity: diag.Error, + Summary: "volume catalogN.schemaN.volumeN does not exist", + Locations: []dyn.Location{ + {File: "file", Line: 1, Column: 1}, + {File: "file", Line: 2, Column: 2}, + }, + Paths: []dyn.Path{ + dyn.MustPathFromString("workspace.artifact_path"), + dyn.MustPathFromString("resources.volumes.foo"), + }, + Detail: `You are using a volume in your artifact_path that is managed by +this bundle but which has not been deployed yet. Please first deploy +the volume using 'bundle deploy' and then switch over to using it in +the artifact_path.`, + }}, diags) +} + +func TestValidateArtifactPath(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + ArtifactPath: "/Volumes/catalogN/schemaN/volumeN/abc", + }, + }, + } + + bundletest.SetLocation(b, "workspace.artifact_path", []dyn.Location{{File: "file", Line: 1, Column: 1}}) + assertDiags := func(t *testing.T, diags diag.Diagnostics, expected string) { + assert.Len(t, diags, 1) + assert.Equal(t, diag.Diagnostics{{ + Severity: diag.Error, + Summary: expected, + Locations: []dyn.Location{{File: "file", Line: 1, Column: 1}}, + Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, + }}, diags) + } + + rb := bundle.ReadOnly(b) + ctx := context.Background() + + tcases := []struct { + err error + expectedSummary string + }{ + { + err: &apierr.APIError{ + StatusCode: 403, + Message: "User does not have USE SCHEMA on Schema 'catalogN.schemaN'", + }, + expectedSummary: "cannot access volume catalogN.schemaN.volumeN: User does not have USE SCHEMA on Schema 'catalogN.schemaN'", + }, + { + err: &apierr.APIError{ + StatusCode: 404, + }, + expectedSummary: "volume catalogN.schemaN.volumeN does not exist", + }, + { + err: &apierr.APIError{ + StatusCode: 500, + Message: "Internal Server Error", + }, + expectedSummary: "cannot read volume catalogN.schemaN.volumeN: Internal Server Error", + }, + } + + for _, tc := range tcases { + m := mocks.NewMockWorkspaceClient(t) + api := m.GetMockVolumesAPI() + api.EXPECT().ReadByName(mock.Anything, "catalogN.schemaN.volumeN").Return(nil, tc.err) + b.SetWorkpaceClient(m.WorkspaceClient) + + diags := bundle.ApplyReadOnly(ctx, rb, ValidateArtifactPath()) + assertDiags(t, diags, tc.expectedSummary) + } +} + +func invalidVolumePaths() []string { + return []string{ + "/Volumes/", + "/Volumes/main", + "/Volumes/main/", + "/Volumes/main//", + "/Volumes/main//my_schema", + "/Volumes/main/my_schema", + "/Volumes/main/my_schema/", + "/Volumes/main/my_schema//", + "/Volumes//my_schema/my_volume", + } +} + +func TestExtractVolumeFromPath(t *testing.T) { + catalogName, schemaName, volumeName, err := extractVolumeFromPath("/Volumes/main/my_schema/my_volume") + require.NoError(t, err) + assert.Equal(t, "main", catalogName) + assert.Equal(t, "my_schema", schemaName) + assert.Equal(t, "my_volume", volumeName) + + for _, p := range invalidVolumePaths() { + _, _, _, err := extractVolumeFromPath(p) + assert.EqualError(t, err, "expected UC volume path to be in the format /Volumes////..., got "+p) + } +} + +func TestValidateArtifactPathWithInvalidPaths(t *testing.T) { + for _, p := range invalidVolumePaths() { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + ArtifactPath: p, + }, + }, + } + + bundletest.SetLocation(b, "workspace.artifact_path", []dyn.Location{{File: "config.yml", Line: 1, Column: 2}}) + + diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), ValidateArtifactPath()) + require.Equal(t, diag.Diagnostics{{ + Severity: diag.Error, + Summary: "expected UC volume path to be in the format /Volumes////..., got " + p, + Locations: []dyn.Location{{File: "config.yml", Line: 1, Column: 2}}, + Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, + }}, diags) + } +} + +func TestFindVolumeInBundle(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Volumes: map[string]*resources.Volume{ + "foo": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "main", + Name: "my_volume", + SchemaName: "my_schema", + }, + }, + }, + }, + }, + } + + bundletest.SetLocation(b, "resources.volumes.foo", []dyn.Location{ + { + File: "volume.yml", + Line: 1, + Column: 2, + }, + }) + + // volume is in DAB. + path, locations, ok := findVolumeInBundle(b.Config, "main", "my_schema", "my_volume") + assert.True(t, ok) + assert.Equal(t, []dyn.Location{{ + File: "volume.yml", + Line: 1, + Column: 2, + }}, locations) + assert.Equal(t, dyn.MustPathFromString("resources.volumes.foo"), path) + + // wrong volume name + _, _, ok = findVolumeInBundle(b.Config, "main", "my_schema", "doesnotexist") + assert.False(t, ok) + + // wrong schema name + _, _, ok = findVolumeInBundle(b.Config, "main", "doesnotexist", "my_volume") + assert.False(t, ok) + + // wrong catalog name + _, _, ok = findVolumeInBundle(b.Config, "doesnotexist", "my_schema", "my_volume") + assert.False(t, ok) + + // schema name is interpolated but does not have the right prefix. In this case + // we should not match the volume. + b.Config.Resources.Volumes["foo"].SchemaName = "${foo.bar.baz}" + _, _, ok = findVolumeInBundle(b.Config, "main", "my_schema", "my_volume") + assert.False(t, ok) + + // schema name is interpolated. + b.Config.Resources.Volumes["foo"].SchemaName = "${resources.schemas.my_schema.name}" + path, locations, ok = findVolumeInBundle(b.Config, "main", "valuedoesnotmatter", "my_volume") + assert.True(t, ok) + assert.Equal(t, []dyn.Location{{ + File: "volume.yml", + Line: 1, + Column: 2, + }}, locations) + assert.Equal(t, dyn.MustPathFromString("resources.volumes.foo"), path) +} diff --git a/bundle/config/validate/validate_sync_patterns.go b/bundle/config/validate/validate_sync_patterns.go index f5787a81d..04acd28ab 100644 --- a/bundle/config/validate/validate_sync_patterns.go +++ b/bundle/config/validate/validate_sync_patterns.go @@ -47,15 +47,13 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di var errs errgroup.Group var diags diag.Diagnostics - for i, pattern := range patterns { - index := i - fullPattern := pattern + for index, pattern := range patterns { // If the pattern is negated, strip the negation prefix // and check if the pattern matches any files. // Negation in gitignore syntax means "don't look at this path' // So if p matches nothing it's useless negation, but if there are matches, // it means: do not include these files into result set - p := strings.TrimPrefix(fullPattern, "!") + p := strings.TrimPrefix(pattern, "!") errs.Go(func() error { fs, err := fileset.NewGlobSet(rb.BundleRoot(), []string{p}) if err != nil { @@ -72,7 +70,7 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di mu.Lock() diags = diags.Append(diag.Diagnostic{ Severity: diag.Warning, - Summary: fmt.Sprintf("Pattern %s does not match any files", fullPattern), + Summary: fmt.Sprintf("Pattern %s does not match any files", pattern), Locations: []dyn.Location{loc.Location()}, Paths: []dyn.Path{loc.Path()}, }) diff --git a/bundle/config/variable/lookup.go b/bundle/config/variable/lookup.go index 37e380f18..71c8512e3 100755 --- a/bundle/config/variable/lookup.go +++ b/bundle/config/variable/lookup.go @@ -2,7 +2,7 @@ package variable import ( "context" - "fmt" + "errors" "github.com/databricks/databricks-sdk-go" ) @@ -83,11 +83,11 @@ func (l *Lookup) constructResolver() (resolver, error) { switch len(resolvers) { case 0: - return nil, fmt.Errorf("no valid lookup fields provided") + return nil, errors.New("no valid lookup fields provided") case 1: return resolvers[0], nil default: - return nil, fmt.Errorf("exactly one lookup field must be provided") + return nil, errors.New("exactly one lookup field must be provided") } } diff --git a/bundle/config/variable/lookup_test.go b/bundle/config/variable/lookup_test.go index bd54d89fc..bcfcb4626 100644 --- a/bundle/config/variable/lookup_test.go +++ b/bundle/config/variable/lookup_test.go @@ -13,7 +13,7 @@ func TestLookup_Coverage(t *testing.T) { val := reflect.ValueOf(lookup) typ := val.Type() - for i := 0; i < val.NumField(); i++ { + for i := range val.NumField() { field := val.Field(i) if field.Kind() != reflect.String { t.Fatalf("Field %s is not a string", typ.Field(i).Name) diff --git a/bundle/config/variable/resolve_alert.go b/bundle/config/variable/resolve_alert.go index be83e81fa..507306aa0 100644 --- a/bundle/config/variable/resolve_alert.go +++ b/bundle/config/variable/resolve_alert.go @@ -2,7 +2,6 @@ package variable import ( "context" - "fmt" "github.com/databricks/databricks-sdk-go" ) @@ -16,9 +15,9 @@ func (l resolveAlert) Resolve(ctx context.Context, w *databricks.WorkspaceClient if err != nil { return "", err } - return fmt.Sprint(entity.Id), nil + return entity.Id, nil } func (l resolveAlert) String() string { - return fmt.Sprintf("alert: %s", l.name) + return "alert: " + l.name } diff --git a/bundle/config/variable/resolve_cluster.go b/bundle/config/variable/resolve_cluster.go index a8cf3fe7f..51278aef5 100644 --- a/bundle/config/variable/resolve_cluster.go +++ b/bundle/config/variable/resolve_cluster.go @@ -42,5 +42,5 @@ func (l resolveCluster) Resolve(ctx context.Context, w *databricks.WorkspaceClie } func (l resolveCluster) String() string { - return fmt.Sprintf("cluster: %s", l.name) + return "cluster: " + l.name } diff --git a/bundle/config/variable/resolve_cluster_policy.go b/bundle/config/variable/resolve_cluster_policy.go index b19380a63..94fd892b2 100644 --- a/bundle/config/variable/resolve_cluster_policy.go +++ b/bundle/config/variable/resolve_cluster_policy.go @@ -2,7 +2,6 @@ package variable import ( "context" - "fmt" "github.com/databricks/databricks-sdk-go" ) @@ -16,9 +15,9 @@ func (l resolveClusterPolicy) Resolve(ctx context.Context, w *databricks.Workspa if err != nil { return "", err } - return fmt.Sprint(entity.PolicyId), nil + return entity.PolicyId, nil } func (l resolveClusterPolicy) String() string { - return fmt.Sprintf("cluster-policy: %s", l.name) + return "cluster-policy: " + l.name } diff --git a/bundle/config/variable/resolve_dashboard.go b/bundle/config/variable/resolve_dashboard.go index 44fd45197..2979716ce 100644 --- a/bundle/config/variable/resolve_dashboard.go +++ b/bundle/config/variable/resolve_dashboard.go @@ -2,7 +2,6 @@ package variable import ( "context" - "fmt" "github.com/databricks/databricks-sdk-go" ) @@ -16,9 +15,9 @@ func (l resolveDashboard) Resolve(ctx context.Context, w *databricks.WorkspaceCl if err != nil { return "", err } - return fmt.Sprint(entity.Id), nil + return entity.Id, nil } func (l resolveDashboard) String() string { - return fmt.Sprintf("dashboard: %s", l.name) + return "dashboard: " + l.name } diff --git a/bundle/config/variable/resolve_instance_pool.go b/bundle/config/variable/resolve_instance_pool.go index cbf0775c9..600b47a50 100644 --- a/bundle/config/variable/resolve_instance_pool.go +++ b/bundle/config/variable/resolve_instance_pool.go @@ -2,7 +2,6 @@ package variable import ( "context" - "fmt" "github.com/databricks/databricks-sdk-go" ) @@ -16,9 +15,9 @@ func (l resolveInstancePool) Resolve(ctx context.Context, w *databricks.Workspac if err != nil { return "", err } - return fmt.Sprint(entity.InstancePoolId), nil + return entity.InstancePoolId, nil } func (l resolveInstancePool) String() string { - return fmt.Sprintf("instance-pool: %s", l.name) + return "instance-pool: " + l.name } diff --git a/bundle/config/variable/resolve_job.go b/bundle/config/variable/resolve_job.go index 3def64888..4fe6ae3e7 100644 --- a/bundle/config/variable/resolve_job.go +++ b/bundle/config/variable/resolve_job.go @@ -2,7 +2,7 @@ package variable import ( "context" - "fmt" + "strconv" "github.com/databricks/databricks-sdk-go" ) @@ -16,9 +16,9 @@ func (l resolveJob) Resolve(ctx context.Context, w *databricks.WorkspaceClient) if err != nil { return "", err } - return fmt.Sprint(entity.JobId), nil + return strconv.FormatInt(entity.JobId, 10), nil } func (l resolveJob) String() string { - return fmt.Sprintf("job: %s", l.name) + return "job: " + l.name } diff --git a/bundle/config/variable/resolve_metastore.go b/bundle/config/variable/resolve_metastore.go index 958e43787..8a0a8c7ed 100644 --- a/bundle/config/variable/resolve_metastore.go +++ b/bundle/config/variable/resolve_metastore.go @@ -2,7 +2,6 @@ package variable import ( "context" - "fmt" "github.com/databricks/databricks-sdk-go" ) @@ -16,9 +15,9 @@ func (l resolveMetastore) Resolve(ctx context.Context, w *databricks.WorkspaceCl if err != nil { return "", err } - return fmt.Sprint(entity.MetastoreId), nil + return entity.MetastoreId, nil } func (l resolveMetastore) String() string { - return fmt.Sprintf("metastore: %s", l.name) + return "metastore: " + l.name } diff --git a/bundle/config/variable/resolve_notification_destination.go b/bundle/config/variable/resolve_notification_destination.go index 4c4cd892a..4696a52c8 100644 --- a/bundle/config/variable/resolve_notification_destination.go +++ b/bundle/config/variable/resolve_notification_destination.go @@ -42,5 +42,5 @@ func (l resolveNotificationDestination) Resolve(ctx context.Context, w *databric } func (l resolveNotificationDestination) String() string { - return fmt.Sprintf("notification-destination: %s", l.name) + return "notification-destination: " + l.name } diff --git a/bundle/config/variable/resolve_notification_destination_test.go b/bundle/config/variable/resolve_notification_destination_test.go index 2b8201d15..f44b2f3e9 100644 --- a/bundle/config/variable/resolve_notification_destination_test.go +++ b/bundle/config/variable/resolve_notification_destination_test.go @@ -2,7 +2,7 @@ package variable import ( "context" - "fmt" + "errors" "testing" "github.com/databricks/databricks-sdk-go/experimental/mocks" @@ -35,7 +35,7 @@ func TestResolveNotificationDestination_ResolveError(t *testing.T) { api := m.GetMockNotificationDestinationsAPI() api.EXPECT(). ListAll(mock.Anything, mock.Anything). - Return(nil, fmt.Errorf("bad")) + Return(nil, errors.New("bad")) ctx := context.Background() l := resolveNotificationDestination{name: "destination"} diff --git a/bundle/config/variable/resolve_pipeline.go b/bundle/config/variable/resolve_pipeline.go index cabc620da..33b14530d 100644 --- a/bundle/config/variable/resolve_pipeline.go +++ b/bundle/config/variable/resolve_pipeline.go @@ -2,7 +2,6 @@ package variable import ( "context" - "fmt" "github.com/databricks/databricks-sdk-go" ) @@ -16,9 +15,9 @@ func (l resolvePipeline) Resolve(ctx context.Context, w *databricks.WorkspaceCli if err != nil { return "", err } - return fmt.Sprint(entity.PipelineId), nil + return entity.PipelineId, nil } func (l resolvePipeline) String() string { - return fmt.Sprintf("pipeline: %s", l.name) + return "pipeline: " + l.name } diff --git a/bundle/config/variable/resolve_query.go b/bundle/config/variable/resolve_query.go index 602ff8deb..88f653dc6 100644 --- a/bundle/config/variable/resolve_query.go +++ b/bundle/config/variable/resolve_query.go @@ -2,7 +2,6 @@ package variable import ( "context" - "fmt" "github.com/databricks/databricks-sdk-go" ) @@ -16,9 +15,9 @@ func (l resolveQuery) Resolve(ctx context.Context, w *databricks.WorkspaceClient if err != nil { return "", err } - return fmt.Sprint(entity.Id), nil + return entity.Id, nil } func (l resolveQuery) String() string { - return fmt.Sprintf("query: %s", l.name) + return "query: " + l.name } diff --git a/bundle/config/variable/resolve_service_principal.go b/bundle/config/variable/resolve_service_principal.go index 3bea4314b..03b8e3089 100644 --- a/bundle/config/variable/resolve_service_principal.go +++ b/bundle/config/variable/resolve_service_principal.go @@ -2,7 +2,6 @@ package variable import ( "context" - "fmt" "github.com/databricks/databricks-sdk-go" ) @@ -16,9 +15,9 @@ func (l resolveServicePrincipal) Resolve(ctx context.Context, w *databricks.Work if err != nil { return "", err } - return fmt.Sprint(entity.ApplicationId), nil + return entity.ApplicationId, nil } func (l resolveServicePrincipal) String() string { - return fmt.Sprintf("service-principal: %s", l.name) + return "service-principal: " + l.name } diff --git a/bundle/config/variable/resolve_warehouse.go b/bundle/config/variable/resolve_warehouse.go index fbd3663a2..cabdb1160 100644 --- a/bundle/config/variable/resolve_warehouse.go +++ b/bundle/config/variable/resolve_warehouse.go @@ -2,7 +2,6 @@ package variable import ( "context" - "fmt" "github.com/databricks/databricks-sdk-go" ) @@ -16,9 +15,9 @@ func (l resolveWarehouse) Resolve(ctx context.Context, w *databricks.WorkspaceCl if err != nil { return "", err } - return fmt.Sprint(entity.Id), nil + return entity.Id, nil } func (l resolveWarehouse) String() string { - return fmt.Sprintf("warehouse: %s", l.name) + return "warehouse: " + l.name } diff --git a/bundle/config/variable/variable.go b/bundle/config/variable/variable.go index 2362ad10d..d7f1cdede 100644 --- a/bundle/config/variable/variable.go +++ b/bundle/config/variable/variable.go @@ -1,6 +1,7 @@ package variable import ( + "errors" "fmt" "reflect" ) @@ -35,11 +36,12 @@ type Variable struct { // This field stores the resolved value for the variable. The variable are // resolved in the following priority order (from highest to lowest) // - // 1. Command line flag. For example: `--var="foo=bar"` - // 2. Target variable. eg: BUNDLE_VAR_foo=bar - // 3. Default value as defined in the applicable environments block - // 4. Default value defined in variable definition - // 5. Throw error, since if no default value is defined, then the variable + // 1. Command line flag `--var="foo=bar"` + // 2. Environment variable. eg: BUNDLE_VAR_foo=bar + // 3. Load defaults from .databricks/bundle//variable-overrides.json + // 4. Default value as defined in the applicable targets block + // 5. Default value defined in variable definition + // 6. Throw error, since if no default value is defined, then the variable // is required Value VariableValue `json:"value,omitempty" bundle:"readonly"` @@ -68,7 +70,7 @@ func (v *Variable) Set(val VariableValue) error { switch rv.Kind() { case reflect.Struct, reflect.Array, reflect.Slice, reflect.Map: if v.Type != VariableTypeComplex { - return fmt.Errorf("variable type is not complex") + return errors.New("variable type is not complex") } } diff --git a/bundle/context_test.go b/bundle/context_test.go index 3a0f159d9..89a6df052 100644 --- a/bundle/context_test.go +++ b/bundle/context_test.go @@ -12,7 +12,7 @@ func TestGetPanics(t *testing.T) { defer func() { r := recover() require.NotNil(t, r, "The function did not panic") - assert.Equal(t, r, "context not configured with bundle") + assert.Equal(t, "context not configured with bundle", r) }() Get(context.Background()) diff --git a/bundle/deploy/metadata/compute.go b/bundle/deploy/metadata/compute.go index bc8767de4..633d97081 100644 --- a/bundle/deploy/metadata/compute.go +++ b/bundle/deploy/metadata/compute.go @@ -54,5 +54,9 @@ func (m *compute) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { // Set file upload destination of the bundle in metadata b.Metadata.Config.Workspace.FilePath = b.Config.Workspace.FilePath + // In source-linked deployment files are not copied and resources use source files, therefore we use sync path as file path in metadata + if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { + b.Metadata.Config.Workspace.FilePath = b.SyncRootPath + } return nil } diff --git a/bundle/deploy/metadata/compute_test.go b/bundle/deploy/metadata/compute_test.go index 2c2c72376..c6fa9bddb 100644 --- a/bundle/deploy/metadata/compute_test.go +++ b/bundle/deploy/metadata/compute_test.go @@ -97,3 +97,24 @@ func TestComputeMetadataMutator(t *testing.T) { assert.Equal(t, expectedMetadata, b.Metadata) } + +func TestComputeMetadataMutatorSourceLinked(t *testing.T) { + syncRootPath := "/Users/shreyas.goenka@databricks.com/source" + enabled := true + b := &bundle.Bundle{ + SyncRootPath: syncRootPath, + Config: config.Root{ + Presets: config.Presets{ + SourceLinkedDeployment: &enabled, + }, + Workspace: config.Workspace{ + FilePath: "/Users/shreyas.goenka@databricks.com/files", + }, + }, + } + + diags := bundle.Apply(context.Background(), b, Compute()) + require.NoError(t, diags.Error()) + + assert.Equal(t, syncRootPath, b.Metadata.Config.Workspace.FilePath) +} diff --git a/bundle/deploy/state.go b/bundle/deploy/state.go index a131ab9c3..6e285034a 100644 --- a/bundle/deploy/state.go +++ b/bundle/deploy/state.go @@ -3,6 +3,7 @@ package deploy import ( "context" "encoding/json" + "errors" "fmt" "io" "io/fs" @@ -95,7 +96,7 @@ func (e *entry) Type() fs.FileMode { func (e *entry) Info() (fs.FileInfo, error) { if e.info == nil { - return nil, fmt.Errorf("no info available") + return nil, errors.New("no info available") } return e.info, nil } diff --git a/bundle/deploy/state_pull_test.go b/bundle/deploy/state_pull_test.go index 36c49fb01..f38b71f6b 100644 --- a/bundle/deploy/state_pull_test.go +++ b/bundle/deploy/state_pull_test.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "encoding/json" - "errors" "io" "io/fs" "os" @@ -279,7 +278,7 @@ func TestStatePullNoState(t *testing.T) { require.NoError(t, err) _, err = os.Stat(statePath) - require.True(t, errors.Is(err, fs.ErrNotExist)) + require.ErrorIs(t, err, fs.ErrNotExist) } func TestStatePullOlderState(t *testing.T) { diff --git a/bundle/deploy/state_update_test.go b/bundle/deploy/state_update_test.go index e561f534e..04c5579a8 100644 --- a/bundle/deploy/state_update_test.go +++ b/bundle/deploy/state_update_test.go @@ -60,7 +60,7 @@ func TestStateUpdate(t *testing.T) { require.NoError(t, err) require.Equal(t, int64(1), state.Seq) - require.Equal(t, state.Files, Filelist{ + require.Equal(t, Filelist{ { LocalPath: "test1.py", }, @@ -68,7 +68,7 @@ func TestStateUpdate(t *testing.T) { LocalPath: "test2.py", IsNotebook: true, }, - }) + }, state.Files) require.Equal(t, build.GetInfo().Version, state.CliVersion) diags = bundle.Apply(ctx, b, s) @@ -79,7 +79,7 @@ func TestStateUpdate(t *testing.T) { require.NoError(t, err) require.Equal(t, int64(2), state.Seq) - require.Equal(t, state.Files, Filelist{ + require.Equal(t, Filelist{ { LocalPath: "test1.py", }, @@ -87,7 +87,7 @@ func TestStateUpdate(t *testing.T) { LocalPath: "test2.py", IsNotebook: true, }, - }) + }, state.Files) require.Equal(t, build.GetInfo().Version, state.CliVersion) // Valid non-empty UUID is generated. @@ -130,7 +130,7 @@ func TestStateUpdateWithExistingState(t *testing.T) { require.NoError(t, err) require.Equal(t, int64(11), state.Seq) - require.Equal(t, state.Files, Filelist{ + require.Equal(t, Filelist{ { LocalPath: "test1.py", }, @@ -138,7 +138,7 @@ func TestStateUpdateWithExistingState(t *testing.T) { LocalPath: "test2.py", IsNotebook: true, }, - }) + }, state.Files) require.Equal(t, build.GetInfo().Version, state.CliVersion) // Existing UUID is not overwritten. diff --git a/bundle/deploy/terraform/check_dashboards_modified_remotely.go b/bundle/deploy/terraform/check_dashboards_modified_remotely.go index f263e8a7f..66914af54 100644 --- a/bundle/deploy/terraform/check_dashboards_modified_remotely.go +++ b/bundle/deploy/terraform/check_dashboards_modified_remotely.go @@ -72,7 +72,7 @@ func (l *checkDashboardsModifiedRemotely) Apply(ctx context.Context, b *bundle.B continue } - path := dyn.MustPathFromString(fmt.Sprintf("resources.dashboards.%s", dashboard.Name)) + path := dyn.MustPathFromString("resources.dashboards." + dashboard.Name) loc := b.Config.GetLocation(path.String()) actual, err := b.WorkspaceClient().Lakeview.GetByDashboardId(ctx, dashboard.ID) if err != nil { diff --git a/bundle/deploy/terraform/check_dashboards_modified_remotely_test.go b/bundle/deploy/terraform/check_dashboards_modified_remotely_test.go index 1bed3b1be..46bdc1f38 100644 --- a/bundle/deploy/terraform/check_dashboards_modified_remotely_test.go +++ b/bundle/deploy/terraform/check_dashboards_modified_remotely_test.go @@ -2,7 +2,7 @@ package terraform import ( "context" - "fmt" + "errors" "path/filepath" "testing" @@ -122,7 +122,7 @@ func TestCheckDashboardsModifiedRemotely_ExistingStateFailureToGet(t *testing.T) dashboardsAPI := m.GetMockLakeviewAPI() dashboardsAPI.EXPECT(). GetByDashboardId(mock.Anything, "id1"). - Return(nil, fmt.Errorf("failure")). + Return(nil, errors.New("failure")). Once() b.SetWorkpaceClient(m.WorkspaceClient) diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index b710c690f..d549b9797 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/deploy/terraform/tfdyn" "github.com/databricks/cli/bundle/internal/tf/schema" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/databricks-sdk-go/service/apps" tfjson "github.com/hashicorp/terraform-json" ) @@ -196,6 +197,20 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { } cur.ID = instance.Attributes.ID config.Resources.Dashboards[resource.Name] = cur + case "databricks_app": + if config.Resources.Apps == nil { + config.Resources.Apps = make(map[string]*resources.App) + } + cur := config.Resources.Apps[resource.Name] + if cur == nil { + cur = &resources.App{ModifiedStatus: resources.ModifiedStatusDeleted, App: &apps.App{}} + } else { + // If the app exists in terraform and bundle, we always set modified status to updated + // because we don't really know if the app source code was updated or not. + cur.ModifiedStatus = resources.ModifiedStatusUpdated + } + cur.Name = instance.Attributes.Name + config.Resources.Apps[resource.Name] = cur case "databricks_permissions": case "databricks_grants": // Ignore; no need to pull these back into the configuration. @@ -260,6 +275,11 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { src.ModifiedStatus = resources.ModifiedStatusCreated } } + for _, src := range config.Resources.Apps { + if src.ModifiedStatus == "" { + src.ModifiedStatus = resources.ModifiedStatusCreated + } + } return nil } diff --git a/bundle/deploy/terraform/convert_test.go b/bundle/deploy/terraform/convert_test.go index 61f26c088..ffe55db71 100644 --- a/bundle/deploy/terraform/convert_test.go +++ b/bundle/deploy/terraform/convert_test.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/internal/tf/schema" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/databricks-sdk-go/service/apps" "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/dashboards" @@ -254,10 +255,10 @@ func TestBundleToTerraformPipeline(t *testing.T) { assert.Equal(t, "my pipeline", resource.Name) assert.Len(t, resource.Library, 2) assert.Len(t, resource.Notification, 2) - assert.Equal(t, resource.Notification[0].Alerts, []string{"on-update-fatal-failure"}) - assert.Equal(t, resource.Notification[0].EmailRecipients, []string{"jane@doe.com"}) - assert.Equal(t, resource.Notification[1].Alerts, []string{"on-update-failure", "on-flow-failure"}) - assert.Equal(t, resource.Notification[1].EmailRecipients, []string{"jane@doe.com", "john@doe.com"}) + assert.Equal(t, []string{"on-update-fatal-failure"}, resource.Notification[0].Alerts) + assert.Equal(t, []string{"jane@doe.com"}, resource.Notification[0].EmailRecipients) + assert.Equal(t, []string{"on-update-failure", "on-flow-failure"}, resource.Notification[1].Alerts) + assert.Equal(t, []string{"jane@doe.com", "john@doe.com"}, resource.Notification[1].EmailRecipients) assert.Nil(t, out.Data) } @@ -454,7 +455,7 @@ func TestBundleToTerraformModelServing(t *testing.T) { assert.Equal(t, "name", resource.Name) assert.Equal(t, "model_name", resource.Config.ServedModels[0].ModelName) assert.Equal(t, "1", resource.Config.ServedModels[0].ModelVersion) - assert.Equal(t, true, resource.Config.ServedModels[0].ScaleToZeroEnabled) + assert.True(t, resource.Config.ServedModels[0].ScaleToZeroEnabled) assert.Equal(t, "Small", resource.Config.ServedModels[0].WorkloadSize) assert.Equal(t, "model_name-1", resource.Config.TrafficConfig.Routes[0].ServedModelName) assert.Equal(t, 100, resource.Config.TrafficConfig.Routes[0].TrafficPercentage) @@ -694,6 +695,14 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "1"}}, }, }, + { + Type: "databricks_app", + Mode: "managed", + Name: "test_app", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{Name: "app1"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -732,6 +741,9 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { assert.Equal(t, "1", config.Resources.Dashboards["test_dashboard"].ID) assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Dashboards["test_dashboard"].ModifiedStatus) + assert.Equal(t, "app1", config.Resources.Apps["test_app"].Name) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Apps["test_app"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -815,6 +827,13 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { }, }, }, + Apps: map[string]*resources.App{ + "test_app": { + App: &apps.App{ + Description: "test_app", + }, + }, + }, }, } tfState := resourcesState{ @@ -856,6 +875,9 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { assert.Equal(t, "", config.Resources.Dashboards["test_dashboard"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Dashboards["test_dashboard"].ModifiedStatus) + assert.Equal(t, "", config.Resources.Apps["test_app"].Name) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Apps["test_app"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -994,6 +1016,18 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { }, }, }, + Apps: map[string]*resources.App{ + "test_app": { + App: &apps.App{ + Name: "test_app", + }, + }, + "test_app_new": { + App: &apps.App{ + Name: "test_app_new", + }, + }, + }, }, } tfState := resourcesState{ @@ -1174,6 +1208,22 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "2"}}, }, }, + { + Type: "databricks_app", + Mode: "managed", + Name: "test_app", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{Name: "test_app"}}, + }, + }, + { + Type: "databricks_app", + Mode: "managed", + Name: "test_app_old", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{Name: "test_app_old"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -1256,12 +1306,19 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { assert.Equal(t, "", config.Resources.Dashboards["test_dashboard_new"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Dashboards["test_dashboard_new"].ModifiedStatus) + assert.Equal(t, "test_app", config.Resources.Apps["test_app"].Name) + assert.Equal(t, resources.ModifiedStatusUpdated, config.Resources.Apps["test_app"].ModifiedStatus) + assert.Equal(t, "test_app_old", config.Resources.Apps["test_app_old"].Name) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Apps["test_app_old"].ModifiedStatus) + assert.Equal(t, "test_app_new", config.Resources.Apps["test_app_new"].Name) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Apps["test_app_new"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } func AssertFullResourceCoverage(t *testing.T, config *config.Root) { resources := reflect.ValueOf(config.Resources) - for i := 0; i < resources.NumField(); i++ { + for i := range resources.NumField() { field := resources.Field(i) if field.Kind() == reflect.Map { assert.True( diff --git a/bundle/deploy/terraform/import.go b/bundle/deploy/terraform/import.go index 0a1d1b9ce..a0604e71d 100644 --- a/bundle/deploy/terraform/import.go +++ b/bundle/deploy/terraform/import.go @@ -7,6 +7,7 @@ import ( "io" "os" "path/filepath" + "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/cmdio" @@ -67,7 +68,7 @@ func (m *importResource) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn if changed && !m.opts.AutoApprove { output := buf.String() // Remove output starting from Warning until end of output - output = output[:bytes.Index([]byte(output), []byte("Warning:"))] + output = output[:strings.Index(output, "Warning:")] cmdio.LogString(ctx, output) if !cmdio.IsPromptSupported(ctx) { diff --git a/bundle/deploy/terraform/init.go b/bundle/deploy/terraform/init.go index 366f0be8c..5957611a4 100644 --- a/bundle/deploy/terraform/init.go +++ b/bundle/deploy/terraform/init.go @@ -54,7 +54,7 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con return tf.ExecPath, nil } - binDir, err := b.CacheDir(context.Background(), "bin") + binDir, err := b.CacheDir(ctx, "bin") if err != nil { return "", err } @@ -88,41 +88,43 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con return tf.ExecPath, nil } -// This function inherits some environment variables for Terraform CLI. -func inheritEnvVars(ctx context.Context, environ map[string]string) error { +var envCopy = []string{ // Include $HOME in set of environment variables to pass along. - home, ok := env.Lookup(ctx, "HOME") - if ok { - environ["HOME"] = home - } + "HOME", // Include $USERPROFILE in set of environment variables to pass along. // This variable is used by Azure CLI on Windows to find stored credentials and metadata - userProfile, ok := env.Lookup(ctx, "USERPROFILE") - if ok { - environ["USERPROFILE"] = userProfile - } + "USERPROFILE", // Include $PATH in set of environment variables to pass along. // This is necessary to ensure that our Terraform provider can use the // same auxiliary programs (e.g. `az`, or `gcloud`) as the CLI. - path, ok := env.Lookup(ctx, "PATH") - if ok { - environ["PATH"] = path - } + "PATH", // Include $AZURE_CONFIG_FILE in set of environment variables to pass along. // This is set in Azure DevOps by the AzureCLI@2 task. - azureConfigFile, ok := env.Lookup(ctx, "AZURE_CONFIG_FILE") - if ok { - environ["AZURE_CONFIG_FILE"] = azureConfigFile - } + "AZURE_CONFIG_FILE", // Include $TF_CLI_CONFIG_FILE to override terraform provider in development. // See: https://developer.hashicorp.com/terraform/cli/config/config-file#explicit-installation-method-configuration - devConfigFile, ok := env.Lookup(ctx, "TF_CLI_CONFIG_FILE") - if ok { - environ["TF_CLI_CONFIG_FILE"] = devConfigFile + "TF_CLI_CONFIG_FILE", + + // Include $USE_SDK_V2_RESOURCES and $USE_SDK_V2_DATA_SOURCES, these are used to switch back from plugin framework to SDKv2. + // This is used for mitigation issues with resource migrated to plugin framework, as recommended here: + // https://registry.terraform.io/providers/databricks/databricks/latest/docs/guides/troubleshooting#plugin-framework-migration-problems + // It is currently a workaround for deploying quality_monitors + // https://github.com/databricks/terraform-provider-databricks/issues/4229#issuecomment-2520344690 + "USE_SDK_V2_RESOURCES", + "USE_SDK_V2_DATA_SOURCES", +} + +// This function inherits some environment variables for Terraform CLI. +func inheritEnvVars(ctx context.Context, environ map[string]string) error { + for _, key := range envCopy { + value, ok := env.Lookup(ctx, key) + if ok { + environ[key] = value + } } // Map $DATABRICKS_TF_CLI_CONFIG_FILE to $TF_CLI_CONFIG_FILE @@ -230,9 +232,13 @@ func setUserAgentExtraEnvVar(environ map[string]string, b *bundle.Bundle) error // Add "cli" to the user agent in set by the Databricks Terraform provider. // This will allow us to attribute downstream requests made by the Databricks // Terraform provider to the CLI. - products := []string{fmt.Sprintf("cli/%s", build.GetInfo().Version)} + products := []string{"cli/" + build.GetInfo().Version} if experimental := b.Config.Experimental; experimental != nil { - if experimental.PyDABs.Enabled { + hasPython := experimental.Python.Resources != nil || experimental.Python.Mutators != nil + + if hasPython { + products = append(products, "databricks-pydabs/0.7.0") + } else if experimental.PyDABs.Enabled { products = append(products, "databricks-pydabs/0.0.0") } } diff --git a/bundle/deploy/terraform/init_test.go b/bundle/deploy/terraform/init_test.go index a1ffc5a1a..c7a4ffe4a 100644 --- a/bundle/deploy/terraform/init_test.go +++ b/bundle/deploy/terraform/init_test.go @@ -225,7 +225,7 @@ func TestSetProxyEnvVars(t *testing.T) { env := make(map[string]string, 0) err := setProxyEnvVars(context.Background(), env, b) require.NoError(t, err) - assert.Len(t, env, 0) + assert.Empty(t, env) // Lower case set. clearEnv() @@ -248,7 +248,7 @@ func TestSetProxyEnvVars(t *testing.T) { assert.ElementsMatch(t, []string{"HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY"}, maps.Keys(env)) } -func TestSetUserAgentExtraEnvVar(t *testing.T) { +func TestSetUserAgentExtraEnvVar_PyDABs(t *testing.T) { b := &bundle.Bundle{ BundleRootPath: t.TempDir(), Config: config.Root{ @@ -268,6 +268,26 @@ func TestSetUserAgentExtraEnvVar(t *testing.T) { }, env) } +func TestSetUserAgentExtraEnvVar_Python(t *testing.T) { + b := &bundle.Bundle{ + BundleRootPath: t.TempDir(), + Config: config.Root{ + Experimental: &config.Experimental{ + Python: config.Python{ + Resources: []string{"my_project.resources:load_resources"}, + }, + }, + }, + } + + env := make(map[string]string, 0) + err := setUserAgentExtraEnvVar(env, b) + require.NoError(t, err) + assert.Equal(t, map[string]string{ + "DATABRICKS_USER_AGENT_EXTRA": "cli/0.0.0-dev databricks-pydabs/0.7.0", + }, env) +} + func TestInheritEnvVars(t *testing.T) { t.Setenv("HOME", "/home/testuser") t.Setenv("PATH", "/foo:/bar") @@ -293,7 +313,7 @@ func TestSetUserProfileFromInheritEnvVars(t *testing.T) { require.NoError(t, err) assert.Contains(t, env, "USERPROFILE") - assert.Equal(t, env["USERPROFILE"], "c:\\foo\\c") + assert.Equal(t, "c:\\foo\\c", env["USERPROFILE"]) } func TestInheritEnvVarsWithAbsentTFConfigFile(t *testing.T) { diff --git a/bundle/deploy/terraform/interpolate.go b/bundle/deploy/terraform/interpolate.go index 813e6bbb7..719e6ad25 100644 --- a/bundle/deploy/terraform/interpolate.go +++ b/bundle/deploy/terraform/interpolate.go @@ -63,6 +63,8 @@ func (m *interpolateMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.D path = dyn.NewPath(dyn.Key("databricks_cluster")).Append(path[2:]...) case dyn.Key("dashboards"): path = dyn.NewPath(dyn.Key("databricks_dashboard")).Append(path[2:]...) + case dyn.Key("apps"): + path = dyn.NewPath(dyn.Key("databricks_app")).Append(path[2:]...) default: // Trigger "key not found" for unknown resource types. return dyn.GetByPath(root, path) diff --git a/bundle/deploy/terraform/interpolate_test.go b/bundle/deploy/terraform/interpolate_test.go index fc5c4d184..91a7bd54a 100644 --- a/bundle/deploy/terraform/interpolate_test.go +++ b/bundle/deploy/terraform/interpolate_test.go @@ -34,6 +34,7 @@ func TestInterpolate(t *testing.T) { "other_volume": "${resources.volumes.other_volume.id}", "other_cluster": "${resources.clusters.other_cluster.id}", "other_dashboard": "${resources.dashboards.other_dashboard.id}", + "other_app": "${resources.apps.other_app.id}", }, Tasks: []jobs.Task{ { @@ -73,6 +74,7 @@ func TestInterpolate(t *testing.T) { assert.Equal(t, "${databricks_volume.other_volume.id}", j.Tags["other_volume"]) assert.Equal(t, "${databricks_cluster.other_cluster.id}", j.Tags["other_cluster"]) assert.Equal(t, "${databricks_dashboard.other_dashboard.id}", j.Tags["other_dashboard"]) + assert.Equal(t, "${databricks_app.other_app.id}", j.Tags["other_app"]) m := b.Config.Resources.Models["my_model"] assert.Equal(t, "my_model", m.Model.Name) diff --git a/bundle/deploy/terraform/load.go b/bundle/deploy/terraform/load.go index 3fb76855e..1c563fa77 100644 --- a/bundle/deploy/terraform/load.go +++ b/bundle/deploy/terraform/load.go @@ -2,6 +2,7 @@ package terraform import ( "context" + "errors" "fmt" "slices" @@ -58,7 +59,7 @@ func (l *load) validateState(state *resourcesState) error { } if len(state.Resources) == 0 && slices.Contains(l.modes, ErrorOnEmptyState) { - return fmt.Errorf("no deployment state. Did you forget to run 'databricks bundle deploy'?") + return errors.New("no deployment state. Did you forget to run 'databricks bundle deploy'?") } return nil diff --git a/bundle/deploy/terraform/state_push_test.go b/bundle/deploy/terraform/state_push_test.go index 4cc52b7a7..54e7f621c 100644 --- a/bundle/deploy/terraform/state_push_test.go +++ b/bundle/deploy/terraform/state_push_test.go @@ -71,7 +71,7 @@ func TestStatePushLargeState(t *testing.T) { b := statePushTestBundle(t) largeState := map[string]any{} - for i := 0; i < 1000000; i++ { + for i := range 1000000 { largeState[fmt.Sprintf("field_%d", i)] = i } diff --git a/bundle/deploy/terraform/tfdyn/convert_app.go b/bundle/deploy/terraform/tfdyn/convert_app.go new file mode 100644 index 000000000..dcba0809b --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_app.go @@ -0,0 +1,55 @@ +package tfdyn + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go/service/apps" +) + +func convertAppResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { + // Check if the description is not set and if it's not, set it to an empty string. + // This is done to avoid TF drift because Apps API return empty string for description when if it's not set. + if _, err := dyn.Get(vin, "description"); err != nil { + vin, err = dyn.Set(vin, "description", dyn.V("")) + if err != nil { + return vin, err + } + } + + // Normalize the output value to the target schema. + vout, diags := convert.Normalize(apps.App{}, vin) + for _, diag := range diags { + log.Debugf(ctx, "app normalization diagnostic: %s", diag.Summary) + } + + return vout, nil +} + +type appConverter struct{} + +func (appConverter) Convert(ctx context.Context, key string, vin dyn.Value, out *schema.Resources) error { + vout, err := convertAppResource(ctx, vin) + if err != nil { + return err + } + + // Add the converted resource to the output. + out.App[key] = vout.AsAny() + + // Configure permissions for this resource. + if permissions := convertPermissionsResource(ctx, vin); permissions != nil { + permissions.AppName = fmt.Sprintf("${databricks_app.%s.name}", key) + out.Permissions["app_"+key] = permissions + } + + return nil +} + +func init() { + registerConverter("apps", appConverter{}) +} diff --git a/bundle/deploy/terraform/tfdyn/convert_app_test.go b/bundle/deploy/terraform/tfdyn/convert_app_test.go new file mode 100644 index 000000000..be8152cc6 --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_app_test.go @@ -0,0 +1,156 @@ +package tfdyn + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestConvertApp(t *testing.T) { + src := resources.App{ + SourceCodePath: "./app", + Config: map[string]any{ + "command": []string{"python", "app.py"}, + }, + App: &apps.App{ + Name: "app_id", + Description: "app description", + Resources: []apps.AppResource{ + { + Name: "job1", + Job: &apps.AppResourceJob{ + Id: "1234", + Permission: "CAN_MANAGE_RUN", + }, + }, + { + Name: "sql1", + SqlWarehouse: &apps.AppResourceSqlWarehouse{ + Id: "5678", + Permission: "CAN_USE", + }, + }, + }, + }, + Permissions: []resources.Permission{ + { + Level: "CAN_RUN", + UserName: "jack@gmail.com", + }, + { + Level: "CAN_MANAGE", + ServicePrincipalName: "sp", + }, + }, + } + + vin, err := convert.FromTyped(src, dyn.NilValue) + require.NoError(t, err) + + ctx := context.Background() + out := schema.NewResources() + err = appConverter{}.Convert(ctx, "my_app", vin, out) + require.NoError(t, err) + + app := out.App["my_app"] + assert.Equal(t, map[string]any{ + "description": "app description", + "name": "app_id", + "resources": []any{ + map[string]any{ + "name": "job1", + "job": map[string]any{ + "id": "1234", + "permission": "CAN_MANAGE_RUN", + }, + }, + map[string]any{ + "name": "sql1", + "sql_warehouse": map[string]any{ + "id": "5678", + "permission": "CAN_USE", + }, + }, + }, + }, app) + + // Assert equality on the permissions + assert.Equal(t, &schema.ResourcePermissions{ + AppName: "${databricks_app.my_app.name}", + AccessControl: []schema.ResourcePermissionsAccessControl{ + { + PermissionLevel: "CAN_RUN", + UserName: "jack@gmail.com", + }, + { + PermissionLevel: "CAN_MANAGE", + ServicePrincipalName: "sp", + }, + }, + }, out.Permissions["app_my_app"]) +} + +func TestConvertAppWithNoDescription(t *testing.T) { + src := resources.App{ + SourceCodePath: "./app", + Config: map[string]any{ + "command": []string{"python", "app.py"}, + }, + App: &apps.App{ + Name: "app_id", + Resources: []apps.AppResource{ + { + Name: "job1", + Job: &apps.AppResourceJob{ + Id: "1234", + Permission: "CAN_MANAGE_RUN", + }, + }, + { + Name: "sql1", + SqlWarehouse: &apps.AppResourceSqlWarehouse{ + Id: "5678", + Permission: "CAN_USE", + }, + }, + }, + }, + } + + vin, err := convert.FromTyped(src, dyn.NilValue) + require.NoError(t, err) + + ctx := context.Background() + out := schema.NewResources() + err = appConverter{}.Convert(ctx, "my_app", vin, out) + require.NoError(t, err) + + app := out.App["my_app"] + assert.Equal(t, map[string]any{ + "name": "app_id", + "description": "", // Due to Apps API always returning a description field, we set it in the output as well to avoid permanent TF drift + "resources": []any{ + map[string]any{ + "name": "job1", + "job": map[string]any{ + "id": "1234", + "permission": "CAN_MANAGE_RUN", + }, + }, + map[string]any{ + "name": "sql1", + "sql_warehouse": map[string]any{ + "id": "5678", + "permission": "CAN_USE", + }, + }, + }, + }, app) +} diff --git a/bundle/deploy/terraform/util.go b/bundle/deploy/terraform/util.go index 4da015c23..90dfe37b2 100644 --- a/bundle/deploy/terraform/util.go +++ b/bundle/deploy/terraform/util.go @@ -33,7 +33,12 @@ type stateResourceInstance struct { } type stateInstanceAttributes struct { - ID string `json:"id"` + ID string `json:"id"` + + // Some resources such as Apps do not have an ID, so we use the name instead. + // We need this for cases when such resource is removed from bundle config but + // exists in the workspace still so we can correctly display its summary. + Name string `json:"name,omitempty"` ETag string `json:"etag,omitempty"` } diff --git a/bundle/deploy/terraform/util_test.go b/bundle/deploy/terraform/util_test.go index 74b329259..5d1310392 100644 --- a/bundle/deploy/terraform/util_test.go +++ b/bundle/deploy/terraform/util_test.go @@ -97,7 +97,7 @@ func TestParseResourcesStateWithExistingStateFile(t *testing.T) { Type: "databricks_pipeline", Name: "test_pipeline", Instances: []stateResourceInstance{ - {Attributes: stateInstanceAttributes{ID: "123"}}, + {Attributes: stateInstanceAttributes{ID: "123", Name: "test_pipeline"}}, }, }, }, diff --git a/bundle/internal/schema/annotations.go b/bundle/internal/schema/annotations.go index aec5e68b0..91aaa4555 100644 --- a/bundle/internal/schema/annotations.go +++ b/bundle/internal/schema/annotations.go @@ -6,6 +6,7 @@ import ( "os" "reflect" "regexp" + "slices" "strings" yaml3 "gopkg.in/yaml.v3" @@ -119,7 +120,15 @@ func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error if err != nil { return err } - missingAnnotations, err := convert.FromTyped(&d.missingAnnotations, dyn.NilValue) + + for k := range d.missingAnnotations { + if !isCliPath(k) { + delete(d.missingAnnotations, k) + fmt.Printf("Missing annotations for `%s` that are not in CLI package, try to fetch latest OpenAPI spec and regenerate annotations", k) + } + } + + missingAnnotations, err := convert.FromTyped(d.missingAnnotations, dyn.NilValue) if err != nil { return err } @@ -129,7 +138,13 @@ func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error return err } - err = saveYamlWithStyle(outputPath, output) + var outputTyped annotationFile + err = convert.ToTyped(&outputTyped, output) + if err != nil { + return err + } + + err = saveYamlWithStyle(outputPath, outputTyped) if err != nil { return err } @@ -153,21 +168,50 @@ func assignAnnotation(s *jsonschema.Schema, a annotation) { s.Enum = a.Enum } -func saveYamlWithStyle(outputPath string, input dyn.Value) error { +func saveYamlWithStyle(outputPath string, annotations annotationFile) error { + annotationOrder := yamlsaver.NewOrder([]string{"description", "markdown_description", "title", "default", "enum"}) style := map[string]yaml3.Style{} - file, _ := input.AsMap() - for _, v := range file.Keys() { - style[v.MustString()] = yaml3.LiteralStyle + + order := getAlphabeticalOrder(annotations) + dynMap := map[string]dyn.Value{} + for k, v := range annotations { + style[k] = yaml3.LiteralStyle + + properties := map[string]dyn.Value{} + propertiesOrder := getAlphabeticalOrder(v) + for key, value := range v { + d, err := convert.FromTyped(value, dyn.NilValue) + if d.Kind() == dyn.KindNil || err != nil { + properties[key] = dyn.NewValue(map[string]dyn.Value{}, []dyn.Location{{Line: propertiesOrder.Get(key)}}) + continue + } + val, err := yamlsaver.ConvertToMapValue(value, annotationOrder, []string{}, map[string]dyn.Value{}) + if err != nil { + return err + } + properties[key] = val.WithLocations([]dyn.Location{{Line: propertiesOrder.Get(key)}}) + } + + dynMap[k] = dyn.NewValue(properties, []dyn.Location{{Line: order.Get(k)}}) } saver := yamlsaver.NewSaverWithStyle(style) - err := saver.SaveAsYAML(file, outputPath, true) + err := saver.SaveAsYAML(dynMap, outputPath, true) if err != nil { return err } return nil } +func getAlphabeticalOrder[T any](mapping map[string]T) *yamlsaver.Order { + order := []string{} + for k := range mapping { + order = append(order, k) + } + slices.Sort(order) + return yamlsaver.NewOrder(order) +} + func convertLinksToAbsoluteUrl(s string) string { if s == "" { return s @@ -207,3 +251,7 @@ func convertLinksToAbsoluteUrl(s string) string { return result } + +func isCliPath(path string) bool { + return !strings.HasPrefix(path, "github.com/databricks/databricks-sdk-go") +} diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index e52189daa..e18de7896 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -55,7 +55,7 @@ github.com/databricks/cli/bundle/config.Bundle: The name of the bundle. "uuid": "description": |- - PLACEHOLDER + Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command). github.com/databricks/cli/bundle/config.Deployment: "fail_on_active_runs": "description": |- @@ -69,6 +69,9 @@ github.com/databricks/cli/bundle/config.Experimental: "pydabs": "description": |- The PyDABs configuration. + "python": + "description": |- + Configures loading of Python code defined with 'databricks-bundles' package. "python_wheel_wrapper": "description": |- Whether to use a Python wheel wrapper @@ -125,7 +128,28 @@ github.com/databricks/cli/bundle/config.PyDABs: "venv_path": "description": |- The Python virtual environment path +github.com/databricks/cli/bundle/config.Python: + "mutators": + "description": |- + Mutators contains a list of fully qualified function paths to mutator functions. + + Example: ["my_project.mutators:add_default_cluster"] + "resources": + "description": |- + Resources contains a list of fully qualified function paths to load resources + defined in Python code. + + Example: ["my_project.resources:load_resources"] + "venv_path": + "description": |- + VEnvPath is path to the virtual environment. + + If enabled, Python code will execute within this environment. If disabled, + it defaults to using the Python interpreter available in the current shell. github.com/databricks/cli/bundle/config.Resources: + "apps": + "description": |- + PLACEHOLDER "clusters": "description": |- The cluster definitions for the bundle. @@ -350,6 +374,64 @@ github.com/databricks/cli/bundle/config.Workspace: "state_path": "description": |- The workspace state path +github.com/databricks/cli/bundle/config/resources.App: + "active_deployment": + "description": |- + PLACEHOLDER + "app_status": + "description": |- + PLACEHOLDER + "compute_status": + "description": |- + PLACEHOLDER + "config": + "description": |- + PLACEHOLDER + "create_time": + "description": |- + PLACEHOLDER + "creator": + "description": |- + PLACEHOLDER + "default_source_code_path": + "description": |- + PLACEHOLDER + "description": + "description": |- + PLACEHOLDER + "name": + "description": |- + PLACEHOLDER + "pending_deployment": + "description": |- + PLACEHOLDER + "permissions": + "description": |- + PLACEHOLDER + "resources": + "description": |- + PLACEHOLDER + "service_principal_client_id": + "description": |- + PLACEHOLDER + "service_principal_id": + "description": |- + PLACEHOLDER + "service_principal_name": + "description": |- + PLACEHOLDER + "source_code_path": + "description": |- + PLACEHOLDER + "update_time": + "description": |- + PLACEHOLDER + "updater": + "description": |- + PLACEHOLDER + "url": + "description": |- + PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Grant: "principal": "description": |- @@ -417,10 +499,10 @@ github.com/databricks/cli/bundle/config/variable.TargetVariable: "lookup": "description": |- The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID. - "type": + "markdown_description": "description": |- The type of the variable. - "markdown_description": + "type": "description": |- The type of the variable. github.com/databricks/cli/bundle/config/variable.Variable: @@ -438,64 +520,103 @@ github.com/databricks/cli/bundle/config/variable.Variable: "type": "description": |- The type of the variable. -github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig: - "ai21labs_api_key": +github.com/databricks/databricks-sdk-go/service/apps.AppDeployment: + "create_time": "description": |- PLACEHOLDER - "ai21labs_api_key_plaintext": + "creator": "description": |- PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig: - "private_key": + "deployment_artifacts": "description": |- PLACEHOLDER - "private_key_plaintext": + "deployment_id": "description": |- PLACEHOLDER - "project_id": + "mode": "description": |- PLACEHOLDER - "region": + "source_code_path": "description": |- PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/serving.OpenAiConfig: - "microsoft_entra_client_id": + "status": "description": |- PLACEHOLDER - "microsoft_entra_client_secret": + "update_time": "description": |- PLACEHOLDER - "microsoft_entra_client_secret_plaintext": +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts: + "source_code_path": "description": |- PLACEHOLDER - "microsoft_entra_tenant_id": +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: + "message": "description": |- PLACEHOLDER - "openai_api_base": + "state": "description": |- PLACEHOLDER - "openai_api_key": +github.com/databricks/databricks-sdk-go/service/apps.AppResource: + "description": "description": |- PLACEHOLDER - "openai_api_key_plaintext": + "job": "description": |- PLACEHOLDER - "openai_api_type": + "name": "description": |- PLACEHOLDER - "openai_api_version": + "secret": "description": |- PLACEHOLDER - "openai_deployment_name": + "serving_endpoint": "description": |- PLACEHOLDER - "openai_organization": + "sql_warehouse": "description": |- PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/serving.PaLmConfig: - "palm_api_key": +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: + "id": "description": |- PLACEHOLDER - "palm_api_key_plaintext": + "permission": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret: + "key": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER + "scope": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint: + "name": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse: + "id": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus: + "message": + "description": |- + PLACEHOLDER + "state": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus: + "message": + "description": |- + PLACEHOLDER + "state": "description": |- PLACEHOLDER diff --git a/bundle/internal/schema/annotations_openapi.yml b/bundle/internal/schema/annotations_openapi.yml index d8f681a28..8ff5c9253 100644 --- a/bundle/internal/schema/annotations_openapi.yml +++ b/bundle/internal/schema/annotations_openapi.yml @@ -1,89 +1,132 @@ # This file is auto-generated. DO NOT EDIT. +github.com/databricks/cli/bundle/config/resources.App: + "active_deployment": + "description": |- + The active deployment of the app. A deployment is considered active when it has been deployed + to the app compute. + "app_status": {} + "compute_status": {} + "create_time": + "description": |- + The creation time of the app. Formatted timestamp in ISO 6801. + "creator": + "description": |- + The email of the user that created the app. + "default_source_code_path": + "description": |- + The default workspace file system path of the source code from which app deployment are + created. This field tracks the workspace source code path of the last active deployment. + "description": + "description": |- + The description of the app. + "name": + "description": |- + The name of the app. The name must contain only lowercase alphanumeric characters and hyphens. + It must be unique within the workspace. + "pending_deployment": + "description": |- + The pending deployment of the app. A deployment is considered pending when it is being prepared + for deployment to the app compute. + "resources": + "description": |- + Resources for the app. + "service_principal_client_id": {} + "service_principal_id": {} + "service_principal_name": {} + "update_time": + "description": |- + The update time of the app. Formatted timestamp in ISO 6801. + "updater": + "description": |- + The email of the user that last updated the app. + "url": + "description": |- + The URL of the app once it is deployed. github.com/databricks/cli/bundle/config/resources.Cluster: - apply_policy_default_values: - description: When set to true, fixed and default values from the policy will be - used for fields that are omitted. When set to false, only fixed values from - the policy will be applied. - autoscale: - description: |- + "apply_policy_default_values": + "description": |- + When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. + "autoscale": + "description": |- Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. - autotermination_minutes: - description: |- + "autotermination_minutes": + "description": |- Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. - aws_attributes: - description: |- + "aws_attributes": + "description": |- Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. - azure_attributes: - description: |- + "azure_attributes": + "description": |- Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. - cluster_log_conf: - description: |- + "cluster_log_conf": + "description": |- The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. - cluster_name: - description: | + "cluster_name": + "description": | Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. - custom_tags: - description: |- + "custom_tags": + "description": |- Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - data_security_mode: {} - docker_image: {} - driver_instance_pool_id: - description: |- + "data_security_mode": {} + "docker_image": {} + "driver_instance_pool_id": + "description": |- The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. - driver_node_type_id: - description: | + "driver_node_type_id": + "description": | The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. - enable_elastic_disk: - description: |- + "enable_elastic_disk": + "description": |- Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. - enable_local_disk_encryption: - description: Whether to enable LUKS on cluster VMs' local disks - gcp_attributes: - description: |- + "enable_local_disk_encryption": + "description": |- + Whether to enable LUKS on cluster VMs' local disks + "gcp_attributes": + "description": |- Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. - init_scripts: - description: The configuration for storing init scripts. Any number of destinations - can be specified. The scripts are executed sequentially in the order provided. - If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - instance_pool_id: - description: The optional ID of the instance pool to which the cluster belongs. - is_single_node: - description: | + "init_scripts": + "description": |- + The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + "instance_pool_id": + "description": |- + The optional ID of the instance pool to which the cluster belongs. + "is_single_node": + "description": | This field can only be used with `kind`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - kind: {} - node_type_id: - description: | + "kind": {} + "node_type_id": + "description": | This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - num_workers: - description: |- + "num_workers": + "description": |- Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. @@ -92,18 +135,20 @@ github.com/databricks/cli/bundle/config/resources.Cluster: is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. - policy_id: - description: The ID of the cluster policy used to create the cluster if applicable. - runtime_engine: {} - single_user_name: - description: Single user name if data_security_mode is `SINGLE_USER` - spark_conf: - description: | + "policy_id": + "description": |- + The ID of the cluster policy used to create the cluster if applicable. + "runtime_engine": {} + "single_user_name": + "description": |- + Single user name if data_security_mode is `SINGLE_USER` + "spark_conf": + "description": | An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - spark_env_vars: - description: |- + "spark_env_vars": + "description": |- An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. @@ -115,399 +160,643 @@ github.com/databricks/cli/bundle/config/resources.Cluster: Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - spark_version: - description: | + "spark_version": + "description": | The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. - ssh_public_keys: - description: |- + "ssh_public_keys": + "description": |- SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. - use_ml_runtime: - description: | + "use_ml_runtime": + "description": | This field can only be used with `kind`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. - workload_type: {} + "workload_type": {} github.com/databricks/cli/bundle/config/resources.Dashboard: - create_time: - description: The timestamp of when the dashboard was created. - dashboard_id: - description: UUID identifying the dashboard. - display_name: - description: The display name of the dashboard. - etag: - description: |- + "create_time": + "description": |- + The timestamp of when the dashboard was created. + "dashboard_id": + "description": |- + UUID identifying the dashboard. + "display_name": + "description": |- + The display name of the dashboard. + "etag": + "description": |- The etag for the dashboard. Can be optionally provided on updates to ensure that the dashboard has not been modified since the last read. This field is excluded in List Dashboards responses. - lifecycle_state: - description: The state of the dashboard resource. Used for tracking trashed status. - parent_path: - description: |- + "lifecycle_state": + "description": |- + The state of the dashboard resource. Used for tracking trashed status. + "parent_path": + "description": |- The workspace path of the folder containing the dashboard. Includes leading slash and no trailing slash. This field is excluded in List Dashboards responses. - path: - description: |- + "path": + "description": |- The workspace path of the dashboard asset, including the file name. Exported dashboards always have the file extension `.lvdash.json`. This field is excluded in List Dashboards responses. - serialized_dashboard: - description: |- + "serialized_dashboard": + "description": |- The contents of the dashboard in serialized string form. This field is excluded in List Dashboards responses. Use the [get dashboard API](https://docs.databricks.com/api/workspace/lakeview/get) to retrieve an example response, which includes the `serialized_dashboard` field. This field provides the structure of the JSON string that represents the dashboard's layout and components. - update_time: - description: |- + "update_time": + "description": |- The timestamp of when the dashboard was last updated by the user. This field is excluded in List Dashboards responses. - warehouse_id: - description: The warehouse ID used to run the dashboard. + "warehouse_id": + "description": |- + The warehouse ID used to run the dashboard. github.com/databricks/cli/bundle/config/resources.Job: - budget_policy_id: - description: |- + "budget_policy_id": + "description": |- The id of the user specified budget policy to use for this job. If not specified, a default budget policy may be applied when creating or modifying the job. See `effective_budget_policy_id` for the budget policy used by this workload. - continuous: - description: An optional continuous property for this job. The continuous property - will ensure that there is always one run executing. Only one of `schedule` and - `continuous` can be used. - deployment: - description: Deployment information for jobs managed by external sources. - description: - description: An optional description for the job. The maximum length is 27700 - characters in UTF-8 encoding. - edit_mode: - description: |- + "continuous": + "description": |- + An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. + "deployment": + "description": |- + Deployment information for jobs managed by external sources. + "description": + "description": |- + An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. + "edit_mode": + "description": |- Edit mode of the job. * `UI_LOCKED`: The job is in a locked UI state and cannot be modified. * `EDITABLE`: The job is in an editable state and can be modified. - email_notifications: - description: An optional set of email addresses that is notified when runs of - this job begin or complete as well as when this job is deleted. - environments: - description: |- + "email_notifications": + "description": |- + An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. + "environments": + "description": |- A list of task execution environment specifications that can be referenced by serverless tasks of this job. An environment is required to be present for serverless tasks. For serverless notebook tasks, the environment is accessible in the notebook environment panel. For other serverless tasks, the task environment is required to be specified using environment_key in the task settings. - format: - description: Used to tell what is the format of the job. This field is ignored - in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always - set to `"MULTI_TASK"`. - git_source: - description: |- + "format": + "description": |- + Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `"MULTI_TASK"`. + "git_source": + "description": |- An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. - health: {} - job_clusters: - description: A list of job cluster specifications that can be shared and reused - by tasks of this job. Libraries cannot be declared in a shared job cluster. - You must declare dependent libraries in task settings. - max_concurrent_runs: - description: |- + "health": {} + "job_clusters": + "description": |- + A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. + If more than 100 job clusters are available, you can paginate through them using :method:jobs/get. + "max_concurrent_runs": + "description": |- An optional maximum allowed number of concurrent runs of the job. Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped. - name: - description: An optional name for the job. The maximum length is 4096 bytes in - UTF-8 encoding. - notification_settings: - description: Optional notification settings that are used when sending notifications - to each of the `email_notifications` and `webhook_notifications` for this job. - parameters: - description: Job-level parameter definitions - queue: - description: The queue settings of the job. - run_as: {} - schedule: - description: An optional periodic schedule for this job. The default behavior - is that the job only runs when triggered by clicking “Run Now” in the Jobs UI - or sending an API request to `runNow`. - tags: - description: A map of tags associated with the job. These are forwarded to the - cluster as cluster tags for jobs clusters, and are subject to the same limitations - as cluster tags. A maximum of 25 tags can be added to the job. - tasks: - description: A list of task specifications to be executed by this job. - timeout_seconds: - description: An optional timeout applied to each run of this job. A value of `0` - means no timeout. - trigger: - description: A configuration to trigger a run when certain conditions are met. - The default behavior is that the job runs only when triggered by clicking “Run - Now” in the Jobs UI or sending an API request to `runNow`. - webhook_notifications: - description: A collection of system notification IDs to notify when runs of this - job begin or complete. + "name": + "description": |- + An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. + "notification_settings": + "description": |- + Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. + "parameters": + "description": |- + Job-level parameter definitions + "queue": + "description": |- + The queue settings of the job. + "run_as": {} + "schedule": + "description": |- + An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + "tags": + "description": |- + A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. + "tasks": + "description": |- + A list of task specifications to be executed by this job. + If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. + "timeout_seconds": + "description": |- + An optional timeout applied to each run of this job. A value of `0` means no timeout. + "trigger": + "description": |- + A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + "webhook_notifications": + "description": |- + A collection of system notification IDs to notify when runs of this job begin or complete. github.com/databricks/cli/bundle/config/resources.MlflowExperiment: - artifact_location: - description: Location where artifacts for the experiment are stored. - creation_time: - description: Creation time - experiment_id: - description: Unique identifier for the experiment. - last_update_time: - description: Last update time - lifecycle_stage: - description: |- + "artifact_location": + "description": |- + Location where artifacts for the experiment are stored. + "creation_time": + "description": |- + Creation time + "experiment_id": + "description": |- + Unique identifier for the experiment. + "last_update_time": + "description": |- + Last update time + "lifecycle_stage": + "description": |- Current life cycle stage of the experiment: "active" or "deleted". Deleted experiments are not returned by APIs. - name: - description: Human readable name that identifies the experiment. - tags: - description: 'Tags: Additional metadata key-value pairs.' + "name": + "description": |- + Human readable name that identifies the experiment. + "tags": + "description": |- + Tags: Additional metadata key-value pairs. github.com/databricks/cli/bundle/config/resources.MlflowModel: - creation_timestamp: - description: Timestamp recorded when this `registered_model` was created. - description: - description: Description of this `registered_model`. - last_updated_timestamp: - description: Timestamp recorded when metadata for this `registered_model` was - last updated. - latest_versions: - description: |- + "creation_timestamp": + "description": |- + Timestamp recorded when this `registered_model` was created. + "description": + "description": |- + Description of this `registered_model`. + "last_updated_timestamp": + "description": |- + Timestamp recorded when metadata for this `registered_model` was last updated. + "latest_versions": + "description": |- Collection of latest model versions for each stage. Only contains models with current `READY` status. - name: - description: Unique name for the model. - tags: - description: 'Tags: Additional metadata key-value pairs for this `registered_model`.' - user_id: - description: User that created this `registered_model` + "name": + "description": |- + Unique name for the model. + "tags": + "description": |- + Tags: Additional metadata key-value pairs for this `registered_model`. + "user_id": + "description": |- + User that created this `registered_model` github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: - ai_gateway: - description: 'The AI Gateway configuration for the serving endpoint. NOTE: only - external model endpoints are supported as of now.' - config: - description: The core config of the serving endpoint. - name: - description: | + "ai_gateway": + "description": |- + The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now. + "config": + "description": |- + The core config of the serving endpoint. + "name": + "description": | The name of the serving endpoint. This field is required and must be unique across a Databricks workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. - rate_limits: - description: 'Rate limits to be applied to the serving endpoint. NOTE: this field - is deprecated, please use AI Gateway to manage rate limits.' - route_optimized: - description: Enable route optimization for the serving endpoint. - tags: - description: Tags to be attached to the serving endpoint and automatically propagated - to billing logs. + "rate_limits": + "description": |- + Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits. + "route_optimized": + "description": |- + Enable route optimization for the serving endpoint. + "tags": + "description": |- + Tags to be attached to the serving endpoint and automatically propagated to billing logs. github.com/databricks/cli/bundle/config/resources.Pipeline: - budget_policy_id: - description: Budget policy of this pipeline. - catalog: - description: A catalog in Unity Catalog to publish data from this pipeline to. - If `target` is specified, tables in this pipeline are published to a `target` - schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` - is not specified, no data is published to Unity Catalog. - channel: - description: DLT Release Channel that specifies which version to use. - clusters: - description: Cluster settings for this pipeline deployment. - configuration: - description: String-String configuration for this pipeline execution. - continuous: - description: Whether the pipeline is continuous or triggered. This replaces `trigger`. - deployment: - description: Deployment type of this pipeline. - development: - description: Whether the pipeline is in Development mode. Defaults to false. - edition: - description: Pipeline product edition. - filters: - description: Filters on which Pipeline packages to include in the deployed graph. - gateway_definition: - description: The definition of a gateway pipeline to support change data capture. - id: - description: Unique identifier for this pipeline. - ingestion_definition: - description: The configuration for a managed ingestion pipeline. These settings - cannot be used with the 'libraries', 'target' or 'catalog' settings. - libraries: - description: Libraries or code needed by this deployment. - name: - description: Friendly identifier for this pipeline. - notifications: - description: List of notification settings for this pipeline. - photon: - description: Whether Photon is enabled for this pipeline. - restart_window: - description: Restart window of this pipeline. - schema: - description: The default schema (database) where tables are read from or published - to. The presence of this field implies that the pipeline is in direct publishing - mode. - serverless: - description: Whether serverless compute is enabled for this pipeline. - storage: - description: DBFS root directory for storing checkpoints and tables. - target: - description: Target schema (database) to add tables in this pipeline to. If not - specified, no data is published to the Hive metastore or Unity Catalog. To publish - to Unity Catalog, also specify `catalog`. - trigger: - description: 'Which pipeline trigger to use. Deprecated: Use `continuous` instead.' + "budget_policy_id": + "description": |- + Budget policy of this pipeline. + "catalog": + "description": |- + A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + "channel": + "description": |- + DLT Release Channel that specifies which version to use. + "clusters": + "description": |- + Cluster settings for this pipeline deployment. + "configuration": + "description": |- + String-String configuration for this pipeline execution. + "continuous": + "description": |- + Whether the pipeline is continuous or triggered. This replaces `trigger`. + "deployment": + "description": |- + Deployment type of this pipeline. + "development": + "description": |- + Whether the pipeline is in Development mode. Defaults to false. + "edition": + "description": |- + Pipeline product edition. + "filters": + "description": |- + Filters on which Pipeline packages to include in the deployed graph. + "gateway_definition": + "description": |- + The definition of a gateway pipeline to support change data capture. + "id": + "description": |- + Unique identifier for this pipeline. + "ingestion_definition": + "description": |- + The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'target' or 'catalog' settings. + "libraries": + "description": |- + Libraries or code needed by this deployment. + "name": + "description": |- + Friendly identifier for this pipeline. + "notifications": + "description": |- + List of notification settings for this pipeline. + "photon": + "description": |- + Whether Photon is enabled for this pipeline. + "restart_window": + "description": |- + Restart window of this pipeline. + "schema": + "description": |- + The default schema (database) where tables are read from or published to. The presence of this field implies that the pipeline is in direct publishing mode. + "serverless": + "description": |- + Whether serverless compute is enabled for this pipeline. + "storage": + "description": |- + DBFS root directory for storing checkpoints and tables. + "target": + "description": |- + Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + "trigger": + "description": |- + Which pipeline trigger to use. Deprecated: Use `continuous` instead. github.com/databricks/cli/bundle/config/resources.QualityMonitor: - assets_dir: - description: The directory to store monitoring assets (e.g. dashboard, metric - tables). - baseline_table_name: - description: | + "assets_dir": + "description": |- + The directory to store monitoring assets (e.g. dashboard, metric tables). + "baseline_table_name": + "description": | Name of the baseline table from which drift metrics are computed from. Columns in the monitored table should also be present in the baseline table. - custom_metrics: - description: | + "custom_metrics": + "description": | Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows). - data_classification_config: - description: The data classification config for the monitor. - inference_log: - description: Configuration for monitoring inference logs. - notifications: - description: The notification settings for the monitor. - output_schema_name: - description: Schema where output metric tables are created. - schedule: - description: The schedule for automatically updating and refreshing metric tables. - skip_builtin_dashboard: - description: Whether to skip creating a default dashboard summarizing data quality - metrics. - slicing_exprs: - description: | + "data_classification_config": + "description": |- + The data classification config for the monitor. + "inference_log": + "description": |- + Configuration for monitoring inference logs. + "notifications": + "description": |- + The notification settings for the monitor. + "output_schema_name": + "description": |- + Schema where output metric tables are created. + "schedule": + "description": |- + The schedule for automatically updating and refreshing metric tables. + "skip_builtin_dashboard": + "description": |- + Whether to skip creating a default dashboard summarizing data quality metrics. + "slicing_exprs": + "description": | List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices. - snapshot: - description: Configuration for monitoring snapshot tables. - time_series: - description: Configuration for monitoring time series tables. - warehouse_id: - description: | + "snapshot": + "description": |- + Configuration for monitoring snapshot tables. + "time_series": + "description": |- + Configuration for monitoring time series tables. + "warehouse_id": + "description": | Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used. github.com/databricks/cli/bundle/config/resources.RegisteredModel: - catalog_name: - description: The name of the catalog where the schema and the registered model - reside - comment: - description: The comment attached to the registered model - name: - description: The name of the registered model - schema_name: - description: The name of the schema where the registered model resides - storage_location: - description: The storage location on the cloud under which model version data - files are stored + "catalog_name": + "description": |- + The name of the catalog where the schema and the registered model reside + "comment": + "description": |- + The comment attached to the registered model + "name": + "description": |- + The name of the registered model + "schema_name": + "description": |- + The name of the schema where the registered model resides + "storage_location": + "description": |- + The storage location on the cloud under which model version data files are stored github.com/databricks/cli/bundle/config/resources.Schema: - catalog_name: - description: Name of parent catalog. - comment: - description: User-provided free-form text description. - name: - description: Name of schema, relative to parent catalog. - properties: {} - storage_root: - description: Storage root URL for managed tables within schema. + "catalog_name": + "description": |- + Name of parent catalog. + "comment": + "description": |- + User-provided free-form text description. + "name": + "description": |- + Name of schema, relative to parent catalog. + "properties": {} + "storage_root": + "description": |- + Storage root URL for managed tables within schema. github.com/databricks/cli/bundle/config/resources.Volume: - catalog_name: - description: The name of the catalog where the schema and the volume are - comment: - description: The comment attached to the volume - name: - description: The name of the volume - schema_name: - description: The name of the schema where the volume is - storage_location: - description: The storage location on the cloud - volume_type: {} + "catalog_name": + "description": |- + The name of the catalog where the schema and the volume are + "comment": + "description": |- + The comment attached to the volume + "name": + "description": |- + The name of the volume + "schema_name": + "description": |- + The name of the schema where the volume is + "storage_location": + "description": |- + The storage location on the cloud + "volume_type": {} +github.com/databricks/databricks-sdk-go/service/apps.AppDeployment: + "create_time": + "description": |- + The creation time of the deployment. Formatted timestamp in ISO 6801. + "creator": + "description": |- + The email of the user creates the deployment. + "deployment_artifacts": + "description": |- + The deployment artifacts for an app. + "deployment_id": + "description": |- + The unique id of the deployment. + "mode": + "description": |- + The mode of which the deployment will manage the source code. + "source_code_path": + "description": |- + The workspace file system path of the source code used to create the app deployment. This is different from + `deployment_artifacts.source_code_path`, which is the path used by the deployed app. The former refers + to the original source code location of the app in the workspace during deployment creation, whereas + the latter provides a system generated stable snapshotted source code path used by the deployment. + "status": + "description": |- + Status and status message of the deployment + "update_time": + "description": |- + The update time of the deployment. Formatted timestamp in ISO 6801. +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts: + "source_code_path": + "description": |- + The snapshotted workspace file system path of the source code loaded by the deployed app. +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentMode: + "_": + "enum": + - |- + SNAPSHOT + - |- + AUTO_SYNC +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentState: + "_": + "enum": + - |- + SUCCEEDED + - |- + FAILED + - |- + IN_PROGRESS + - |- + CANCELLED +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: + "message": + "description": |- + Message corresponding with the deployment state. + "state": + "description": |- + State of the deployment. +github.com/databricks/databricks-sdk-go/service/apps.AppResource: + "description": + "description": |- + Description of the App Resource. + "job": {} + "name": + "description": |- + Name of the App Resource. + "secret": {} + "serving_endpoint": {} + "sql_warehouse": {} +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: + "id": + "description": |- + Id of the job to grant permission on. + "permission": + "description": |- + Permissions to grant on the Job. Supported permissions are: "CAN_MANAGE", "IS_OWNER", "CAN_MANAGE_RUN", "CAN_VIEW". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJobJobPermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + IS_OWNER + - |- + CAN_MANAGE_RUN + - |- + CAN_VIEW +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret: + "key": + "description": |- + Key of the secret to grant permission on. + "permission": + "description": |- + Permission to grant on the secret scope. For secrets, only one permission is allowed. Permission must be one of: "READ", "WRITE", "MANAGE". + "scope": + "description": |- + Scope of the secret to grant permission on. +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecretSecretPermission: + "_": + "description": |- + Permission to grant on the secret scope. Supported permissions are: "READ", "WRITE", "MANAGE". + "enum": + - |- + READ + - |- + WRITE + - |- + MANAGE +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint: + "name": + "description": |- + Name of the serving endpoint to grant permission on. + "permission": + "description": |- + Permission to grant on the serving endpoint. Supported permissions are: "CAN_MANAGE", "CAN_QUERY", "CAN_VIEW". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpointServingEndpointPermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_QUERY + - |- + CAN_VIEW +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse: + "id": + "description": |- + Id of the SQL warehouse to grant permission on. + "permission": + "description": |- + Permission to grant on the SQL warehouse. Supported permissions are: "CAN_MANAGE", "CAN_USE", "IS_OWNER". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouseSqlWarehousePermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_USE + - |- + IS_OWNER +github.com/databricks/databricks-sdk-go/service/apps.ApplicationState: + "_": + "enum": + - |- + DEPLOYING + - |- + RUNNING + - |- + CRASHED + - |- + UNAVAILABLE +github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus: + "message": + "description": |- + Application status message + "state": + "description": |- + State of the application. +github.com/databricks/databricks-sdk-go/service/apps.ComputeState: + "_": + "enum": + - |- + ERROR + - |- + DELETING + - |- + STARTING + - |- + STOPPING + - |- + UPDATING + - |- + STOPPED + - |- + ACTIVE +github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus: + "message": + "description": |- + Compute status message + "state": + "description": |- + State of the app compute. github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedule: - pause_status: - description: Read only field that indicates whether a schedule is paused or not. - quartz_cron_expression: - description: | + "pause_status": + "description": |- + Read only field that indicates whether a schedule is paused or not. + "quartz_cron_expression": + "description": | The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). - timezone_id: - description: | + "timezone_id": + "description": | The timezone id (e.g., ``"PST"``) in which to evaluate the quartz expression. github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedulePauseStatus: - _: - description: Read only field that indicates whether a schedule is paused or not. - enum: - - UNPAUSED - - PAUSED + "_": + "description": |- + Read only field that indicates whether a schedule is paused or not. + "enum": + - |- + UNPAUSED + - |- + PAUSED github.com/databricks/databricks-sdk-go/service/catalog.MonitorDataClassificationConfig: - enabled: - description: Whether data classification is enabled. + "enabled": + "description": |- + Whether data classification is enabled. github.com/databricks/databricks-sdk-go/service/catalog.MonitorDestination: - email_addresses: - description: The list of email addresses to send the notification to. A maximum - of 5 email addresses is supported. + "email_addresses": + "description": |- + The list of email addresses to send the notification to. A maximum of 5 email addresses is supported. github.com/databricks/databricks-sdk-go/service/catalog.MonitorInferenceLog: - granularities: - description: | + "granularities": + "description": | Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. - label_col: - description: Optional column that contains the ground truth for the prediction. - model_id_col: - description: | + "label_col": + "description": |- + Optional column that contains the ground truth for the prediction. + "model_id_col": + "description": | Column that contains the id of the model generating the predictions. Metrics will be computed per model id by default, and also across all model ids. - prediction_col: - description: Column that contains the output/prediction from the model. - prediction_proba_col: - description: | + "prediction_col": + "description": |- + Column that contains the output/prediction from the model. + "prediction_proba_col": + "description": | Optional column that contains the prediction probabilities for each class in a classification problem type. The values in this column should be a map, mapping each class label to the prediction probability for a given sample. The map should be of PySpark MapType(). - problem_type: - description: Problem type the model aims to solve. Determines the type of model-quality - metrics that will be computed. - timestamp_col: - description: | + "problem_type": + "description": |- + Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. + "timestamp_col": + "description": | Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). github.com/databricks/databricks-sdk-go/service/catalog.MonitorInferenceLogProblemType: - _: - description: Problem type the model aims to solve. Determines the type of model-quality - metrics that will be computed. - enum: - - PROBLEM_TYPE_CLASSIFICATION - - PROBLEM_TYPE_REGRESSION + "_": + "description": |- + Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed. + "enum": + - |- + PROBLEM_TYPE_CLASSIFICATION + - |- + PROBLEM_TYPE_REGRESSION github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetric: - definition: - description: Jinja template for a SQL expression that specifies how to compute - the metric. See [create metric definition](https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition). - input_columns: - description: | + "definition": + "description": |- + Jinja template for a SQL expression that specifies how to compute the metric. See [create metric definition](https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition). + "input_columns": + "description": | A list of column names in the input table the metric should be computed for. Can use ``":table"`` to indicate that the metric needs information from multiple columns. - name: - description: Name of the metric in the output tables. - output_data_type: - description: The output type of the custom metric. - type: - description: | + "name": + "description": |- + Name of the metric in the output tables. + "output_data_type": + "description": |- + The output type of the custom metric. + "type": + "description": | Can only be one of ``"CUSTOM_METRIC_TYPE_AGGREGATE"``, ``"CUSTOM_METRIC_TYPE_DERIVED"``, or ``"CUSTOM_METRIC_TYPE_DRIFT"``. The ``"CUSTOM_METRIC_TYPE_AGGREGATE"`` and ``"CUSTOM_METRIC_TYPE_DERIVED"`` metrics are computed on a single table, whereas the ``"CUSTOM_METRIC_TYPE_DRIFT"`` compare metrics across @@ -516,8 +805,8 @@ github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetric: - CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics - CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetricType: - _: - description: | + "_": + "description": | Can only be one of ``"CUSTOM_METRIC_TYPE_AGGREGATE"``, ``"CUSTOM_METRIC_TYPE_DERIVED"``, or ``"CUSTOM_METRIC_TYPE_DRIFT"``. The ``"CUSTOM_METRIC_TYPE_AGGREGATE"`` and ``"CUSTOM_METRIC_TYPE_DERIVED"`` metrics are computed on a single table, whereas the ``"CUSTOM_METRIC_TYPE_DRIFT"`` compare metrics across @@ -525,50 +814,57 @@ github.com/databricks/databricks-sdk-go/service/catalog.MonitorMetricType: - CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table - CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics - CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics - enum: - - CUSTOM_METRIC_TYPE_AGGREGATE - - CUSTOM_METRIC_TYPE_DERIVED - - CUSTOM_METRIC_TYPE_DRIFT + "enum": + - |- + CUSTOM_METRIC_TYPE_AGGREGATE + - |- + CUSTOM_METRIC_TYPE_DERIVED + - |- + CUSTOM_METRIC_TYPE_DRIFT github.com/databricks/databricks-sdk-go/service/catalog.MonitorNotifications: - on_failure: - description: Who to send notifications to on monitor failure. - on_new_classification_tag_detected: - description: Who to send notifications to when new data classification tags are - detected. + "on_failure": + "description": |- + Who to send notifications to on monitor failure. + "on_new_classification_tag_detected": + "description": |- + Who to send notifications to when new data classification tags are detected. github.com/databricks/databricks-sdk-go/service/catalog.MonitorSnapshot: {} github.com/databricks/databricks-sdk-go/service/catalog.MonitorTimeSeries: - granularities: - description: | + "granularities": + "description": | Granularities for aggregating data into time windows based on their timestamp. Currently the following static granularities are supported: {``"5 minutes"``, ``"30 minutes"``, ``"1 hour"``, ``"1 day"``, ``" week(s)"``, ``"1 month"``, ``"1 year"``}. - timestamp_col: - description: | + "timestamp_col": + "description": | Column that contains the timestamps of requests. The column must be one of the following: - A ``TimestampType`` column - A column whose values can be converted to timestamps through the pyspark ``to_timestamp`` [function](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.functions.to_timestamp.html). github.com/databricks/databricks-sdk-go/service/catalog.VolumeType: - _: - enum: - - EXTERNAL - - MANAGED + "_": + "enum": + - |- + EXTERNAL + - |- + MANAGED github.com/databricks/databricks-sdk-go/service/compute.Adlsgen2Info: - destination: - description: abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. + "destination": + "description": |- + abfss destination, e.g. `abfss://@.dfs.core.windows.net/`. github.com/databricks/databricks-sdk-go/service/compute.AutoScale: - max_workers: - description: |- + "max_workers": + "description": |- The maximum number of workers to which the cluster can scale up when overloaded. Note that `max_workers` must be strictly greater than `min_workers`. - min_workers: - description: |- + "min_workers": + "description": |- The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation. github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: - availability: {} - ebs_volume_count: - description: |- + "availability": {} + "ebs_volume_count": + "description": |- The number of volumes launched for each instance. Users can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. @@ -585,22 +881,20 @@ github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` will be overridden. - ebs_volume_iops: - description: If using gp3 volumes, what IOPS to use for the disk. If this is not - set, the maximum performance of a gp2 volume with the same volume size will - be used. - ebs_volume_size: - description: |- + "ebs_volume_iops": + "description": |- + If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + "ebs_volume_size": + "description": |- The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. - ebs_volume_throughput: - description: If using gp3 volumes, what throughput to use for the disk. If this - is not set, the maximum performance of a gp2 volume with the same volume size - will be used. - ebs_volume_type: {} - first_on_demand: - description: |- + "ebs_volume_throughput": + "description": |- + If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used. + "ebs_volume_type": {} + "first_on_demand": + "description": |- The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node in particular will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all @@ -608,8 +902,8 @@ github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - instance_profile_arn: - description: |- + "instance_profile_arn": + "description": |- Nodes for this cluster will only be placed on AWS instances with this instance profile. If ommitted, nodes will be placed on instances without an IAM instance profile. The instance profile must have previously been added to the Databricks environment by an account @@ -618,8 +912,8 @@ github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: This feature may only be available to certain customer plans. If this field is ommitted, we will pull in the default from the conf if it exists. - spot_bid_price_percent: - description: |- + "spot_bid_price_percent": + "description": |- The bid price for AWS spot instances, as a percentage of the corresponding instance type's on-demand price. For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot @@ -632,8 +926,8 @@ github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: The default value and documentation here should be kept consistent with CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. - zone_id: - description: |- + "zone_id": + "description": |- Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like "us-west-2a". The provided availability zone must be in the same region as the Databricks deployment. For example, "us-west-2a" @@ -644,19 +938,22 @@ github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: The list of available zones as well as the default value can be found by using the `List Zones` method. github.com/databricks/databricks-sdk-go/service/compute.AwsAvailability: - _: - description: | + "_": + "description": | Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. - enum: - - SPOT - - ON_DEMAND - - SPOT_WITH_FALLBACK + "enum": + - |- + SPOT + - |- + ON_DEMAND + - |- + SPOT_WITH_FALLBACK github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes: - availability: {} - first_on_demand: - description: |- + "availability": {} + "first_on_demand": + "description": |- The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This value should be greater than 0, to make sure the cluster driver node is placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all @@ -664,126 +961,131 @@ github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes: size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will be placed on `availability` instances. Note that this value does not affect cluster size and cannot currently be mutated over the lifetime of a cluster. - log_analytics_info: - description: Defines values necessary to configure and run Azure Log Analytics - agent - spot_bid_max_price: - description: |- + "log_analytics_info": + "description": |- + Defines values necessary to configure and run Azure Log Analytics agent + "spot_bid_max_price": + "description": |- The max bid price to be used for Azure spot instances. The Max price for the bid cannot be higher than the on-demand price of the instance. If not specified, the default value is -1, which specifies that the instance cannot be evicted on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. github.com/databricks/databricks-sdk-go/service/compute.AzureAvailability: - _: - description: |- + "_": + "description": |- Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero (which only happens on pool clusters), this availability type will be used for the entire cluster. - enum: - - SPOT_AZURE - - ON_DEMAND_AZURE - - SPOT_WITH_FALLBACK_AZURE + "enum": + - |- + SPOT_AZURE + - |- + ON_DEMAND_AZURE + - |- + SPOT_WITH_FALLBACK_AZURE github.com/databricks/databricks-sdk-go/service/compute.ClientsTypes: - jobs: - description: With jobs set, the cluster can be used for jobs - notebooks: - description: With notebooks set, this cluster can be used for notebooks + "jobs": + "description": |- + With jobs set, the cluster can be used for jobs + "notebooks": + "description": |- + With notebooks set, this cluster can be used for notebooks github.com/databricks/databricks-sdk-go/service/compute.ClusterLogConf: - dbfs: - description: |- + "dbfs": + "description": |- destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` - s3: - description: |- + "s3": + "description": |- destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec: - apply_policy_default_values: - description: When set to true, fixed and default values from the policy will be - used for fields that are omitted. When set to false, only fixed values from - the policy will be applied. - autoscale: - description: |- + "apply_policy_default_values": + "description": |- + When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied. + "autoscale": + "description": |- Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. - autotermination_minutes: - description: |- + "autotermination_minutes": + "description": |- Automatically terminates the cluster after it is inactive for this time in minutes. If not set, this cluster will not be automatically terminated. If specified, the threshold must be between 10 and 10000 minutes. Users can also set this value to 0 to explicitly disable automatic termination. - aws_attributes: - description: |- + "aws_attributes": + "description": |- Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. - azure_attributes: - description: |- + "azure_attributes": + "description": |- Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. - cluster_log_conf: - description: |- + "cluster_log_conf": + "description": |- The configuration for delivering spark logs to a long-term storage destination. Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. - cluster_name: - description: | + "cluster_name": + "description": | Cluster name requested by the user. This doesn't have to be unique. If not specified at creation, the cluster name will be an empty string. - custom_tags: - description: |- + "custom_tags": + "description": |- Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - data_security_mode: {} - docker_image: {} - driver_instance_pool_id: - description: |- + "data_security_mode": {} + "docker_image": {} + "driver_instance_pool_id": + "description": |- The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. - driver_node_type_id: - description: | + "driver_node_type_id": + "description": | The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. - enable_elastic_disk: - description: |- + "enable_elastic_disk": + "description": |- Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions to function correctly - refer to the User Guide for more details. - enable_local_disk_encryption: - description: Whether to enable LUKS on cluster VMs' local disks - gcp_attributes: - description: |- + "enable_local_disk_encryption": + "description": |- + Whether to enable LUKS on cluster VMs' local disks + "gcp_attributes": + "description": |- Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. - init_scripts: - description: The configuration for storing init scripts. Any number of destinations - can be specified. The scripts are executed sequentially in the order provided. - If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - instance_pool_id: - description: The optional ID of the instance pool to which the cluster belongs. - is_single_node: - description: | + "init_scripts": + "description": |- + The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + "instance_pool_id": + "description": |- + The optional ID of the instance pool to which the cluster belongs. + "is_single_node": + "description": | This field can only be used with `kind`. When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers` - kind: {} - node_type_id: - description: | + "kind": {} + "node_type_id": + "description": | This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - num_workers: - description: |- + "num_workers": + "description": |- Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. @@ -792,18 +1094,20 @@ github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec: is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. - policy_id: - description: The ID of the cluster policy used to create the cluster if applicable. - runtime_engine: {} - single_user_name: - description: Single user name if data_security_mode is `SINGLE_USER` - spark_conf: - description: | + "policy_id": + "description": |- + The ID of the cluster policy used to create the cluster if applicable. + "runtime_engine": {} + "single_user_name": + "description": |- + Single user name if data_security_mode is `SINGLE_USER` + "spark_conf": + "description": | An object containing a set of optional, user-specified Spark configuration key-value pairs. Users can also pass in a string of extra JVM options to the driver and the executors via `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. - spark_env_vars: - description: |- + "spark_env_vars": + "description": |- An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. @@ -815,25 +1119,25 @@ github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec: Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - spark_version: - description: | + "spark_version": + "description": | The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call. - ssh_public_keys: - description: |- + "ssh_public_keys": + "description": |- SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. - use_ml_runtime: - description: | + "use_ml_runtime": + "description": | This field can only be used with `kind`. `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. - workload_type: {} + "workload_type": {} github.com/databricks/databricks-sdk-go/service/compute.DataSecurityMode: - _: - description: | + "_": + "description": | Data security mode decides what data governance model to use when accessing data from a cluster. @@ -854,193 +1158,218 @@ github.com/databricks/databricks-sdk-go/service/compute.DataSecurityMode: * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. - enum: - - DATA_SECURITY_MODE_AUTO - - DATA_SECURITY_MODE_STANDARD - - DATA_SECURITY_MODE_DEDICATED - - NONE - - SINGLE_USER - - USER_ISOLATION - - LEGACY_TABLE_ACL - - LEGACY_PASSTHROUGH - - LEGACY_SINGLE_USER - - LEGACY_SINGLE_USER_STANDARD + "enum": + - |- + DATA_SECURITY_MODE_AUTO + - |- + DATA_SECURITY_MODE_STANDARD + - |- + DATA_SECURITY_MODE_DEDICATED + - |- + NONE + - |- + SINGLE_USER + - |- + USER_ISOLATION + - |- + LEGACY_TABLE_ACL + - |- + LEGACY_PASSTHROUGH + - |- + LEGACY_SINGLE_USER + - |- + LEGACY_SINGLE_USER_STANDARD github.com/databricks/databricks-sdk-go/service/compute.DbfsStorageInfo: - destination: - description: dbfs destination, e.g. `dbfs:/my/path` + "destination": + "description": |- + dbfs destination, e.g. `dbfs:/my/path` github.com/databricks/databricks-sdk-go/service/compute.DockerBasicAuth: - password: - description: Password of the user - username: - description: Name of the user + "password": + "description": |- + Password of the user + "username": + "description": |- + Name of the user github.com/databricks/databricks-sdk-go/service/compute.DockerImage: - basic_auth: {} - url: - description: URL of the docker image. + "basic_auth": {} + "url": + "description": |- + URL of the docker image. github.com/databricks/databricks-sdk-go/service/compute.EbsVolumeType: - _: - description: The type of EBS volumes that will be launched with this cluster. - enum: - - GENERAL_PURPOSE_SSD - - THROUGHPUT_OPTIMIZED_HDD + "_": + "description": |- + The type of EBS volumes that will be launched with this cluster. + "enum": + - |- + GENERAL_PURPOSE_SSD + - |- + THROUGHPUT_OPTIMIZED_HDD github.com/databricks/databricks-sdk-go/service/compute.Environment: - _: - description: |- + "_": + "description": |- The environment entity used to preserve serverless environment side panel and jobs' environment for non-notebook task. In this minimal environment spec, only pip dependencies are supported. - client: - description: |- + "client": + "description": |- Client version used by the environment The client is the user-facing environment of the runtime. Each client comes with a specific set of pre-installed libraries. The version is a string, consisting of the major client version. - dependencies: - description: |- + "dependencies": + "description": |- List of pip dependencies, as supported by the version of pip in this environment. Each dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/ Allowed dependency could be , , (WSFS or Volumes in Databricks), E.g. dependencies: ["foo==0.0.1", "-r /Workspace/test/requirements.txt"] github.com/databricks/databricks-sdk-go/service/compute.GcpAttributes: - availability: {} - boot_disk_size: - description: boot disk size in GB - google_service_account: - description: |- + "availability": {} + "boot_disk_size": + "description": |- + boot disk size in GB + "google_service_account": + "description": |- If provided, the cluster will impersonate the google service account when accessing gcloud services (like GCS). The google service account must have previously been added to the Databricks environment by an account administrator. - local_ssd_count: - description: If provided, each node (workers and driver) in the cluster will have - this number of local SSDs attached. Each local SSD is 375GB in size. Refer to - [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) - for the supported number of local SSDs for each instance type. - use_preemptible_executors: - description: |- + "local_ssd_count": + "description": |- + If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + "use_preemptible_executors": + "description": |- This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon to be deprecated, use the availability field instead. - zone_id: - description: |- + "zone_id": + "description": |- Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones. github.com/databricks/databricks-sdk-go/service/compute.GcpAvailability: - _: - description: |- + "_": + "description": |- This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable. - enum: - - PREEMPTIBLE_GCP - - ON_DEMAND_GCP - - PREEMPTIBLE_WITH_FALLBACK_GCP + "enum": + - |- + PREEMPTIBLE_GCP + - |- + ON_DEMAND_GCP + - |- + PREEMPTIBLE_WITH_FALLBACK_GCP github.com/databricks/databricks-sdk-go/service/compute.GcsStorageInfo: - destination: - description: GCS destination/URI, e.g. `gs://my-bucket/some-prefix` + "destination": + "description": |- + GCS destination/URI, e.g. `gs://my-bucket/some-prefix` github.com/databricks/databricks-sdk-go/service/compute.InitScriptInfo: - abfss: - description: |- + "abfss": + "description": |- destination needs to be provided. e.g. `{ "abfss" : { "destination" : "abfss://@.dfs.core.windows.net/" } } - dbfs: - description: |- + "dbfs": + "description": |- destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` - file: - description: |- + "file": + "description": |- destination needs to be provided. e.g. `{ "file" : { "destination" : "file:/my/local/file.sh" } }` - gcs: - description: |- + "gcs": + "description": |- destination needs to be provided. e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }` - s3: - description: |- + "s3": + "description": |- destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to write data to the s3 destination. - volumes: - description: |- + "volumes": + "description": |- destination needs to be provided. e.g. `{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }` - workspace: - description: |- + "workspace": + "description": |- destination needs to be provided. e.g. `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` github.com/databricks/databricks-sdk-go/service/compute.Library: - cran: - description: Specification of a CRAN library to be installed as part of the library - egg: - description: Deprecated. URI of the egg library to install. Installing Python - egg files is deprecated and is not supported in Databricks Runtime 14.0 and - above. - jar: - description: |- + "cran": + "description": |- + Specification of a CRAN library to be installed as part of the library + "egg": + "description": |- + Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above. + "jar": + "description": |- URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. For example: `{ "jar": "/Workspace/path/to/library.jar" }`, `{ "jar" : "/Volumes/path/to/library.jar" }` or `{ "jar": "s3://my-bucket/library.jar" }`. If S3 is used, please make sure the cluster has read access on the library. You may need to launch the cluster with an IAM role to access the S3 URI. - maven: - description: |- + "maven": + "description": |- Specification of a maven library to be installed. For example: `{ "coordinates": "org.jsoup:jsoup:1.7.2" }` - pypi: - description: |- + "pypi": + "description": |- Specification of a PyPi library to be installed. For example: `{ "package": "simplejson" }` - requirements: - description: |- + "requirements": + "description": |- URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported. For example: `{ "requirements": "/Workspace/path/to/requirements.txt" }` or `{ "requirements" : "/Volumes/path/to/requirements.txt" }` - whl: - description: |- + "whl": + "description": |- URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs. For example: `{ "whl": "/Workspace/path/to/library.whl" }`, `{ "whl" : "/Volumes/path/to/library.whl" }` or `{ "whl": "s3://my-bucket/library.whl" }`. If S3 is used, please make sure the cluster has read access on the library. You may need to launch the cluster with an IAM role to access the S3 URI. github.com/databricks/databricks-sdk-go/service/compute.LocalFileInfo: - destination: - description: local file destination, e.g. `file:/my/local/file.sh` + "destination": + "description": |- + local file destination, e.g. `file:/my/local/file.sh` github.com/databricks/databricks-sdk-go/service/compute.LogAnalyticsInfo: - log_analytics_primary_key: - description: - log_analytics_workspace_id: - description: + "log_analytics_primary_key": + "description": |- + + "log_analytics_workspace_id": + "description": |- + github.com/databricks/databricks-sdk-go/service/compute.MavenLibrary: - coordinates: - description: 'Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2".' - exclusions: - description: |- + "coordinates": + "description": |- + Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". + "exclusions": + "description": |- List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. Maven dependency exclusions: https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. - repo: - description: |- + "repo": + "description": |- Maven repo to install the Maven package from. If omitted, both Maven Central Repository and Spark Packages are searched. github.com/databricks/databricks-sdk-go/service/compute.PythonPyPiLibrary: - package: - description: |- + "package": + "description": |- The name of the pypi package to install. An optional exact version specification is also supported. Examples: "simplejson" and "simplejson==3.8.0". - repo: - description: |- + "repo": + "description": |- The repository where the package can be found. If not specified, the default pip index is used. github.com/databricks/databricks-sdk-go/service/compute.RCranLibrary: - package: - description: The name of the CRAN package to install. - repo: - description: The repository where the package can be found. If not specified, - the default CRAN repo is used. + "package": + "description": |- + The name of the CRAN package to install. + "repo": + "description": |- + The repository where the package can be found. If not specified, the default CRAN repo is used. github.com/databricks/databricks-sdk-go/service/compute.RuntimeEngine: - _: - description: | + "_": + "description": | Determines the cluster's runtime engine, either standard or Photon. This field is not compatible with legacy `spark_version` values that contain `-photon-`. @@ -1048,13 +1377,16 @@ github.com/databricks/databricks-sdk-go/service/compute.RuntimeEngine: If left unspecified, the runtime engine defaults to standard unless the spark_version contains -photon-, in which case Photon will be used. - enum: - - "NULL" - - STANDARD - - PHOTON + "enum": + - |- + NULL + - |- + STANDARD + - |- + PHOTON github.com/databricks/databricks-sdk-go/service/compute.S3StorageInfo: - canned_acl: - description: |- + "canned_acl": + "description": |- (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on the destination bucket and prefix. The full list of possible canned acl can be found at @@ -1062,329 +1394,352 @@ github.com/databricks/databricks-sdk-go/service/compute.S3StorageInfo: Please also note that by default only the object owner gets full controls. If you are using cross account role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to read the logs. - destination: - description: |- + "destination": + "description": |- S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the destination. Please also note that you cannot use AWS keys to deliver logs. - enable_encryption: - description: (Optional) Flag to enable server side encryption, `false` by default. - encryption_type: - description: |- + "enable_encryption": + "description": |- + (Optional) Flag to enable server side encryption, `false` by default. + "encryption_type": + "description": |- (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when encryption is enabled and the default type is `sse-s3`. - endpoint: - description: |- + "endpoint": + "description": |- S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. If both are set, endpoint will be used. - kms_key: - description: (Optional) Kms key which will be used if encryption is enabled and - encryption type is set to `sse-kms`. - region: - description: |- + "kms_key": + "description": |- + (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + "region": + "description": |- S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, endpoint will be used. github.com/databricks/databricks-sdk-go/service/compute.VolumesStorageInfo: - destination: - description: Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh` + "destination": + "description": |- + Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh` github.com/databricks/databricks-sdk-go/service/compute.WorkloadType: - clients: - description: ' defined what type of clients can use the cluster. E.g. Notebooks, - Jobs' + "clients": + "description": |2- + defined what type of clients can use the cluster. E.g. Notebooks, Jobs github.com/databricks/databricks-sdk-go/service/compute.WorkspaceStorageInfo: - destination: - description: workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + "destination": + "description": |- + workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` github.com/databricks/databricks-sdk-go/service/dashboards.LifecycleState: - _: - enum: - - ACTIVE - - TRASHED + "_": + "enum": + - |- + ACTIVE + - |- + TRASHED github.com/databricks/databricks-sdk-go/service/jobs.CleanRoomsNotebookTask: - clean_room_name: - description: The clean room that the notebook belongs to. - etag: - description: |- + "clean_room_name": + "description": |- + The clean room that the notebook belongs to. + "etag": + "description": |- Checksum to validate the freshness of the notebook resource (i.e. the notebook being run is the latest version). It can be fetched by calling the :method:cleanroomassets/get API. - notebook_base_parameters: - description: Base parameters to be used for the clean room notebook job. - notebook_name: - description: Name of the notebook being run. + "notebook_base_parameters": + "description": |- + Base parameters to be used for the clean room notebook job. + "notebook_name": + "description": |- + Name of the notebook being run. github.com/databricks/databricks-sdk-go/service/jobs.Condition: - _: - enum: - - ANY_UPDATED - - ALL_UPDATED + "_": + "enum": + - |- + ANY_UPDATED + - |- + ALL_UPDATED github.com/databricks/databricks-sdk-go/service/jobs.ConditionTask: - left: - description: The left operand of the condition task. Can be either a string value - or a job state or parameter reference. - op: - description: |- + "left": + "description": |- + The left operand of the condition task. Can be either a string value or a job state or parameter reference. + "op": + "description": |- * `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`. * `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`. The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison. - right: - description: The right operand of the condition task. Can be either a string value - or a job state or parameter reference. + "right": + "description": |- + The right operand of the condition task. Can be either a string value or a job state or parameter reference. github.com/databricks/databricks-sdk-go/service/jobs.ConditionTaskOp: - _: - description: |- + "_": + "description": |- * `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`. * `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`. The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison. - enum: - - EQUAL_TO - - GREATER_THAN - - GREATER_THAN_OR_EQUAL - - LESS_THAN - - LESS_THAN_OR_EQUAL - - NOT_EQUAL + "enum": + - |- + EQUAL_TO + - |- + GREATER_THAN + - |- + GREATER_THAN_OR_EQUAL + - |- + LESS_THAN + - |- + LESS_THAN_OR_EQUAL + - |- + NOT_EQUAL github.com/databricks/databricks-sdk-go/service/jobs.Continuous: - pause_status: - description: Indicate whether the continuous execution of the job is paused or - not. Defaults to UNPAUSED. + "pause_status": + "description": |- + Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED. github.com/databricks/databricks-sdk-go/service/jobs.CronSchedule: - pause_status: - description: Indicate whether this schedule is paused or not. - quartz_cron_expression: - description: A Cron expression using Quartz syntax that describes the schedule - for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) - for details. This field is required. - timezone_id: - description: A Java timezone ID. The schedule for a job is resolved with respect - to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) - for details. This field is required. + "pause_status": + "description": |- + Indicate whether this schedule is paused or not. + "quartz_cron_expression": + "description": |- + A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required. + "timezone_id": + "description": |- + A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required. github.com/databricks/databricks-sdk-go/service/jobs.DbtTask: - catalog: - description: Optional name of the catalog to use. The value is the top level in - the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog - value can only be specified if a warehouse_id is specified. Requires dbt-databricks - >= 1.1.1. - commands: - description: A list of dbt commands to execute. All commands must start with `dbt`. - This parameter must not be empty. A maximum of up to 10 commands can be provided. - profiles_directory: - description: Optional (relative) path to the profiles directory. Can only be specified - if no warehouse_id is specified. If no warehouse_id is specified and this folder - is unset, the root directory is used. - project_directory: - description: |- + "catalog": + "description": |- + Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks >= 1.1.1. + "commands": + "description": |- + A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided. + "profiles_directory": + "description": |- + Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used. + "project_directory": + "description": |- Path to the project directory. Optional for Git sourced tasks, in which case if no value is provided, the root of the Git repository is used. - schema: - description: Optional schema to write to. This parameter is only used when a warehouse_id - is also provided. If not provided, the `default` schema is used. - source: - description: |- + "schema": + "description": |- + Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used. + "source": + "description": |- Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved from the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: Project is located in Databricks workspace. * `GIT`: Project is located in cloud Git provider. - warehouse_id: - description: ID of the SQL warehouse to connect to. If provided, we automatically - generate and provide the profile and connection details to dbt. It can be overridden - on a per-command basis by using the `--profiles-dir` command line argument. + "warehouse_id": + "description": |- + ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument. github.com/databricks/databricks-sdk-go/service/jobs.FileArrivalTriggerConfiguration: - min_time_between_triggers_seconds: - description: |- + "min_time_between_triggers_seconds": + "description": |- If set, the trigger starts a run only after the specified amount of time passed since the last time the trigger fired. The minimum allowed value is 60 seconds - url: - description: URL to be monitored for file arrivals. The path must point to the - root or a subpath of the external location. - wait_after_last_change_seconds: - description: |- + "url": + "description": |- + URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. + "wait_after_last_change_seconds": + "description": |- If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The minimum allowed value is 60 seconds. github.com/databricks/databricks-sdk-go/service/jobs.ForEachTask: - concurrency: - description: |- + "concurrency": + "description": |- An optional maximum allowed number of concurrent runs of the task. Set this value if you want to be able to execute multiple runs of the task concurrently. - inputs: - description: |- + "inputs": + "description": |- Array for task to iterate on. This can be a JSON string or a reference to an array parameter. - task: - description: Configuration for the task that will be run for each element in the - array + "task": + "description": |- + Configuration for the task that will be run for each element in the array github.com/databricks/databricks-sdk-go/service/jobs.Format: - _: - enum: - - SINGLE_TASK - - MULTI_TASK + "_": + "enum": + - |- + SINGLE_TASK + - |- + MULTI_TASK github.com/databricks/databricks-sdk-go/service/jobs.GitProvider: - _: - enum: - - gitHub - - bitbucketCloud - - azureDevOpsServices - - gitHubEnterprise - - bitbucketServer - - gitLab - - gitLabEnterpriseEdition - - awsCodeCommit + "_": + "enum": + - |- + gitHub + - |- + bitbucketCloud + - |- + azureDevOpsServices + - |- + gitHubEnterprise + - |- + bitbucketServer + - |- + gitLab + - |- + gitLabEnterpriseEdition + - |- + awsCodeCommit github.com/databricks/databricks-sdk-go/service/jobs.GitSnapshot: - _: - description: Read-only state of the remote repository at the time the job was - run. This field is only included on job runs. - used_commit: - description: Commit that was used to execute the run. If git_branch was specified, - this points to the HEAD of the branch at the time of the run; if git_tag was - specified, this points to the commit the tag points to. + "_": + "description": |- + Read-only state of the remote repository at the time the job was run. This field is only included on job runs. + "used_commit": + "description": |- + Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to. github.com/databricks/databricks-sdk-go/service/jobs.GitSource: - _: - description: |- + "_": + "description": |- An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks. If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task. Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job. - git_branch: - description: Name of the branch to be checked out and used by this job. This field - cannot be specified in conjunction with git_tag or git_commit. - git_commit: - description: Commit to be checked out and used by this job. This field cannot - be specified in conjunction with git_branch or git_tag. - git_provider: - description: Unique identifier of the service used to host the Git repository. - The value is case insensitive. - git_snapshot: {} - git_tag: - description: Name of the tag to be checked out and used by this job. This field - cannot be specified in conjunction with git_branch or git_commit. - git_url: - description: URL of the repository to be cloned by this job. - job_source: - description: The source of the job specification in the remote repository when - the job is source controlled. + "git_branch": + "description": |- + Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit. + "git_commit": + "description": |- + Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. + "git_provider": + "description": |- + Unique identifier of the service used to host the Git repository. The value is case insensitive. + "git_snapshot": {} + "git_tag": + "description": |- + Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit. + "git_url": + "description": |- + URL of the repository to be cloned by this job. + "job_source": + "description": |- + The source of the job specification in the remote repository when the job is source controlled. github.com/databricks/databricks-sdk-go/service/jobs.JobCluster: - job_cluster_key: - description: |- + "job_cluster_key": + "description": |- A unique name for the job cluster. This field is required and must be unique within the job. `JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution. - new_cluster: - description: If new_cluster, a description of a cluster that is created for each - task. + "new_cluster": + "description": |- + If new_cluster, a description of a cluster that is created for each task. github.com/databricks/databricks-sdk-go/service/jobs.JobDeployment: - kind: - description: |- + "kind": + "description": |- The kind of deployment that manages the job. * `BUNDLE`: The job is managed by Databricks Asset Bundle. - metadata_file_path: - description: Path of the file that contains deployment metadata. + "metadata_file_path": + "description": |- + Path of the file that contains deployment metadata. github.com/databricks/databricks-sdk-go/service/jobs.JobDeploymentKind: - _: - description: '* `BUNDLE`: The job is managed by Databricks Asset Bundle.' - enum: - - BUNDLE + "_": + "description": |- + * `BUNDLE`: The job is managed by Databricks Asset Bundle. + "enum": + - |- + BUNDLE github.com/databricks/databricks-sdk-go/service/jobs.JobEditMode: - _: - description: |- + "_": + "description": |- Edit mode of the job. * `UI_LOCKED`: The job is in a locked UI state and cannot be modified. * `EDITABLE`: The job is in an editable state and can be modified. - enum: - - UI_LOCKED - - EDITABLE + "enum": + - |- + UI_LOCKED + - |- + EDITABLE github.com/databricks/databricks-sdk-go/service/jobs.JobEmailNotifications: - no_alert_for_skipped_runs: - description: |- + "no_alert_for_skipped_runs": + "description": |- If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. - on_duration_warning_threshold_exceeded: - description: A list of email addresses to be notified when the duration of a run - exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the - `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified - in the `health` field for the job, notifications are not sent. - on_failure: - description: A list of email addresses to be notified when a run unsuccessfully - completes. A run is considered to have completed unsuccessfully if it ends with - an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. - If this is not specified on job creation, reset, or update the list is empty, - and notifications are not sent. - on_start: - description: A list of email addresses to be notified when a run begins. If not - specified on job creation, reset, or update, the list is empty, and notifications - are not sent. - on_streaming_backlog_exceeded: - description: |- + "on_duration_warning_threshold_exceeded": + "description": |- + A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. + "on_failure": + "description": |- + A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + "on_start": + "description": |- + A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + "on_streaming_backlog_exceeded": + "description": |- A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - on_success: - description: A list of email addresses to be notified when a run successfully - completes. A run is considered to have completed successfully if it ends with - a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified - on job creation, reset, or update, the list is empty, and notifications are - not sent. + "on_success": + "description": |- + A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. github.com/databricks/databricks-sdk-go/service/jobs.JobEnvironment: - environment_key: - description: The key of an environment. It has to be unique within a job. - spec: {} + "environment_key": + "description": |- + The key of an environment. It has to be unique within a job. + "spec": {} github.com/databricks/databricks-sdk-go/service/jobs.JobNotificationSettings: - no_alert_for_canceled_runs: - description: If true, do not send notifications to recipients specified in `on_failure` - if the run is canceled. - no_alert_for_skipped_runs: - description: If true, do not send notifications to recipients specified in `on_failure` - if the run is skipped. + "no_alert_for_canceled_runs": + "description": |- + If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. + "no_alert_for_skipped_runs": + "description": |- + If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. github.com/databricks/databricks-sdk-go/service/jobs.JobParameterDefinition: - default: - description: Default value of the parameter. - name: - description: The name of the defined parameter. May only contain alphanumeric - characters, `_`, `-`, and `.` + "default": + "description": |- + Default value of the parameter. + "name": + "description": |- + The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.` github.com/databricks/databricks-sdk-go/service/jobs.JobRunAs: - _: - description: |- + "_": + "description": |- Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job. Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown. - service_principal_name: - description: Application ID of an active service principal. Setting this field - requires the `servicePrincipal/user` role. - user_name: - description: The email of an active workspace user. Non-admin users can only set - this field to their own email. + "service_principal_name": + "description": |- + Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + "user_name": + "description": |- + The email of an active workspace user. Non-admin users can only set this field to their own email. github.com/databricks/databricks-sdk-go/service/jobs.JobSource: - _: - description: The source of the job specification in the remote repository when - the job is source controlled. - dirty_state: - description: |- + "_": + "description": |- + The source of the job specification in the remote repository when the job is source controlled. + "dirty_state": + "description": |- Dirty state indicates the job is not fully synced with the job specification in the remote repository. Possible values are: * `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced. * `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced. - import_from_git_branch: - description: Name of the branch which the job is imported from. - job_config_path: - description: Path of the job YAML file that contains the job specification. + "import_from_git_branch": + "description": |- + Name of the branch which the job is imported from. + "job_config_path": + "description": |- + Path of the job YAML file that contains the job specification. github.com/databricks/databricks-sdk-go/service/jobs.JobSourceDirtyState: - _: - description: |- + "_": + "description": |- Dirty state indicates the job is not fully synced with the job specification in the remote repository. Possible values are: * `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced. * `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced. - enum: - - NOT_SYNCED - - DISCONNECTED + "enum": + - |- + NOT_SYNCED + - |- + DISCONNECTED github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthMetric: - _: - description: |- + "_": + "description": |- Specifies the health metric that is being evaluated for a particular health rule. * `RUN_DURATION_SECONDS`: Expected total time for a run in seconds. @@ -1392,31 +1747,38 @@ github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthMetric: * `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview. * `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview. - enum: - - RUN_DURATION_SECONDS - - STREAMING_BACKLOG_BYTES - - STREAMING_BACKLOG_RECORDS - - STREAMING_BACKLOG_SECONDS - - STREAMING_BACKLOG_FILES + "enum": + - |- + RUN_DURATION_SECONDS + - |- + STREAMING_BACKLOG_BYTES + - |- + STREAMING_BACKLOG_RECORDS + - |- + STREAMING_BACKLOG_SECONDS + - |- + STREAMING_BACKLOG_FILES github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthOperator: - _: - description: Specifies the operator used to compare the health metric value with - the specified threshold. - enum: - - GREATER_THAN + "_": + "description": |- + Specifies the operator used to compare the health metric value with the specified threshold. + "enum": + - |- + GREATER_THAN github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRule: - metric: {} - op: {} - value: - description: Specifies the threshold value that the health metric should obey - to satisfy the health rule. + "metric": {} + "op": {} + "value": + "description": |- + Specifies the threshold value that the health metric should obey to satisfy the health rule. github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules: - _: - description: An optional set of health rules that can be defined for this job. - rules: {} + "_": + "description": |- + An optional set of health rules that can be defined for this job. + "rules": {} github.com/databricks/databricks-sdk-go/service/jobs.NotebookTask: - base_parameters: - description: |- + "base_parameters": + "description": |- Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run Now with parameters specified, the two parameters maps are merged. If the same key is specified in `base_parameters` and in `run-now`, the value from `run-now` is used. @@ -1428,65 +1790,76 @@ github.com/databricks/databricks-sdk-go/service/jobs.NotebookTask: Retrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets). The JSON representation of this field cannot exceed 1MB. - notebook_path: - description: |- + "notebook_path": + "description": |- The path of the notebook to be run in the Databricks workspace or remote repository. For notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash. For notebooks stored in a remote repository, the path must be relative. This field is required. - source: - description: |- + "source": + "description": |- Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: Notebook is located in Databricks workspace. * `GIT`: Notebook is located in cloud Git provider. - warehouse_id: - description: |- + "warehouse_id": + "description": |- Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses. Note that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail. github.com/databricks/databricks-sdk-go/service/jobs.PauseStatus: - _: - enum: - - UNPAUSED - - PAUSED + "_": + "enum": + - |- + UNPAUSED + - |- + PAUSED github.com/databricks/databricks-sdk-go/service/jobs.PeriodicTriggerConfiguration: - interval: - description: The interval at which the trigger should run. - unit: - description: The unit of time for the interval. + "interval": + "description": |- + The interval at which the trigger should run. + "unit": + "description": |- + The unit of time for the interval. github.com/databricks/databricks-sdk-go/service/jobs.PeriodicTriggerConfigurationTimeUnit: - _: - enum: - - HOURS - - DAYS - - WEEKS + "_": + "enum": + - |- + HOURS + - |- + DAYS + - |- + WEEKS github.com/databricks/databricks-sdk-go/service/jobs.PipelineParams: - full_refresh: - description: If true, triggers a full refresh on the delta live table. + "full_refresh": + "description": |- + If true, triggers a full refresh on the delta live table. github.com/databricks/databricks-sdk-go/service/jobs.PipelineTask: - full_refresh: - description: If true, triggers a full refresh on the delta live table. - pipeline_id: - description: The full name of the pipeline task to execute. + "full_refresh": + "description": |- + If true, triggers a full refresh on the delta live table. + "pipeline_id": + "description": |- + The full name of the pipeline task to execute. github.com/databricks/databricks-sdk-go/service/jobs.PythonWheelTask: - entry_point: - description: Named entry point to use, if it does not exist in the metadata of - the package it executes the function from the package directly using `$packageName.$entryPoint()` - named_parameters: - description: Command-line parameters passed to Python wheel task in the form of - `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` - is not null. - package_name: - description: Name of the package to execute - parameters: - description: Command-line parameters passed to Python wheel task. Leave it empty - if `named_parameters` is not null. + "entry_point": + "description": |- + Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()` + "named_parameters": + "description": |- + Command-line parameters passed to Python wheel task in the form of `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` is not null. + "package_name": + "description": |- + Name of the package to execute + "parameters": + "description": |- + Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null. github.com/databricks/databricks-sdk-go/service/jobs.QueueSettings: - enabled: - description: If true, enable queueing for the job. This is a required field. + "enabled": + "description": |- + If true, enable queueing for the job. This is a required field. github.com/databricks/databricks-sdk-go/service/jobs.RunIf: - _: - description: |- + "_": + "description": |- An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`. Possible values are: @@ -1496,20 +1869,25 @@ github.com/databricks/databricks-sdk-go/service/jobs.RunIf: * `ALL_DONE`: All dependencies have been completed * `AT_LEAST_ONE_FAILED`: At least one dependency failed * `ALL_FAILED`: ALl dependencies have failed - enum: - - ALL_SUCCESS - - ALL_DONE - - NONE_FAILED - - AT_LEAST_ONE_SUCCESS - - ALL_FAILED - - AT_LEAST_ONE_FAILED + "enum": + - |- + ALL_SUCCESS + - |- + ALL_DONE + - |- + NONE_FAILED + - |- + AT_LEAST_ONE_SUCCESS + - |- + ALL_FAILED + - |- + AT_LEAST_ONE_FAILED github.com/databricks/databricks-sdk-go/service/jobs.RunJobTask: - dbt_commands: - description: 'An array of commands to execute for jobs with the dbt task, for - example `"dbt_commands": ["dbt deps", "dbt seed", "dbt deps", "dbt seed", "dbt - run"]`' - jar_params: - description: |- + "dbt_commands": + "description": |- + An array of commands to execute for jobs with the dbt task, for example `"dbt_commands": ["dbt deps", "dbt seed", "dbt deps", "dbt seed", "dbt run"]` + "jar_params": + "description": |- A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. The parameters are used to invoke the main function of the main class specified in the Spark JAR task. If not specified upon `run-now`, it defaults to an empty list. @@ -1517,12 +1895,14 @@ github.com/databricks/databricks-sdk-go/service/jobs.RunJobTask: The JSON representation of this field (for example `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. - job_id: - description: ID of the job to trigger. - job_parameters: - description: Job-level parameters used to trigger the job. - notebook_params: - description: |- + "job_id": + "description": |- + ID of the job to trigger. + "job_parameters": + "description": |- + Job-level parameters used to trigger the job. + "notebook_params": + "description": |- A map from keys to values for jobs with notebook task, for example `"notebook_params": {"name": "john doe", "age": "35"}`. The map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function. @@ -1533,11 +1913,12 @@ github.com/databricks/databricks-sdk-go/service/jobs.RunJobTask: Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. The JSON representation of this field (for example `{"notebook_params":{"name":"john doe","age":"35"}}`) cannot exceed 10,000 bytes. - pipeline_params: - description: Controls whether the pipeline should perform a full refresh - python_named_params: {} - python_params: - description: |- + "pipeline_params": + "description": |- + Controls whether the pipeline should perform a full refresh + "python_named_params": {} + "python_params": + "description": |- A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified in job setting. The JSON representation of this field (for example `{"python_params":["john doe","35"]}`) @@ -1549,8 +1930,8 @@ github.com/databricks/databricks-sdk-go/service/jobs.RunJobTask: These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis. - spark_submit_params: - description: |- + "spark_submit_params": + "description": |- A list of parameters for jobs with spark submit task, for example `"spark_submit_params": ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified in job setting. The JSON representation of this field (for example `{"python_params":["john doe","35"]}`) @@ -1562,50 +1943,48 @@ github.com/databricks/databricks-sdk-go/service/jobs.RunJobTask: These parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error. Examples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis. - sql_params: - description: 'A map from keys to values for jobs with SQL task, for example `"sql_params": - {"name": "john doe", "age": "35"}`. The SQL alert task does not support custom - parameters.' + "sql_params": + "description": |- + A map from keys to values for jobs with SQL task, for example `"sql_params": {"name": "john doe", "age": "35"}`. The SQL alert task does not support custom parameters. github.com/databricks/databricks-sdk-go/service/jobs.Source: - _: - description: |- + "_": + "description": |- Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\ from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. * `WORKSPACE`: SQL file is located in Databricks workspace. * `GIT`: SQL file is located in cloud Git provider. - enum: - - WORKSPACE - - GIT + "enum": + - |- + WORKSPACE + - |- + GIT github.com/databricks/databricks-sdk-go/service/jobs.SparkJarTask: - jar_uri: - description: Deprecated since 04/2016. Provide a `jar` through the `libraries` - field instead. For an example, see :method:jobs/create. - main_class_name: - description: |- + "jar_uri": + "description": |- + Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create. + "main_class_name": + "description": |- The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library. The code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail. - parameters: - description: |- + "parameters": + "description": |- Parameters passed to the main method. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. github.com/databricks/databricks-sdk-go/service/jobs.SparkPythonTask: - parameters: - description: |- + "parameters": + "description": |- Command line parameters passed to the Python file. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. - python_file: - description: The Python file to be executed. Cloud file URIs (such as dbfs:/, - s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored - in the Databricks workspace, the path must be absolute and begin with `/`. For - files stored in a remote repository, the path must be relative. This field is - required. - source: - description: |- + "python_file": + "description": |- + The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required. + "source": + "description": |- Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local Databricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`, the Python file will be retrieved from a Git repository defined in `git_source`. @@ -1613,52 +1992,59 @@ github.com/databricks/databricks-sdk-go/service/jobs.SparkPythonTask: * `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI. * `GIT`: The Python file is located in a remote Git repository. github.com/databricks/databricks-sdk-go/service/jobs.SparkSubmitTask: - parameters: - description: |- + "parameters": + "description": |- Command-line parameters passed to spark submit. Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. github.com/databricks/databricks-sdk-go/service/jobs.SqlTask: - alert: - description: If alert, indicates that this job must refresh a SQL alert. - dashboard: - description: If dashboard, indicates that this job must refresh a SQL dashboard. - file: - description: If file, indicates that this job runs a SQL file in a remote Git - repository. - parameters: - description: Parameters to be used for each run of this job. The SQL alert task - does not support custom parameters. - query: - description: If query, indicates that this job must execute a SQL query. - warehouse_id: - description: The canonical identifier of the SQL warehouse. Recommended to use - with serverless or pro SQL warehouses. Classic SQL warehouses are only supported - for SQL alert, dashboard and query tasks and are limited to scheduled single-task - jobs. + "alert": + "description": |- + If alert, indicates that this job must refresh a SQL alert. + "dashboard": + "description": |- + If dashboard, indicates that this job must refresh a SQL dashboard. + "file": + "description": |- + If file, indicates that this job runs a SQL file in a remote Git repository. + "parameters": + "description": |- + Parameters to be used for each run of this job. The SQL alert task does not support custom parameters. + "query": + "description": |- + If query, indicates that this job must execute a SQL query. + "warehouse_id": + "description": |- + The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs. github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskAlert: - alert_id: - description: The canonical identifier of the SQL alert. - pause_subscriptions: - description: If true, the alert notifications are not sent to subscribers. - subscriptions: - description: If specified, alert notifications are sent to subscribers. + "alert_id": + "description": |- + The canonical identifier of the SQL alert. + "pause_subscriptions": + "description": |- + If true, the alert notifications are not sent to subscribers. + "subscriptions": + "description": |- + If specified, alert notifications are sent to subscribers. github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskDashboard: - custom_subject: - description: Subject of the email sent to subscribers of this task. - dashboard_id: - description: The canonical identifier of the SQL dashboard. - pause_subscriptions: - description: If true, the dashboard snapshot is not taken, and emails are not - sent to subscribers. - subscriptions: - description: If specified, dashboard snapshots are sent to subscriptions. + "custom_subject": + "description": |- + Subject of the email sent to subscribers of this task. + "dashboard_id": + "description": |- + The canonical identifier of the SQL dashboard. + "pause_subscriptions": + "description": |- + If true, the dashboard snapshot is not taken, and emails are not sent to subscribers. + "subscriptions": + "description": |- + If specified, dashboard snapshots are sent to subscriptions. github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskFile: - path: - description: Path of the SQL file. Must be relative if the source is a remote - Git repository and absolute for workspace paths. - source: - description: |- + "path": + "description": |- + Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths. + "source": + "description": |- Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise. @@ -1666,107 +2052,104 @@ github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskFile: * `WORKSPACE`: SQL file is located in Databricks workspace. * `GIT`: SQL file is located in cloud Git provider. github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskQuery: - query_id: - description: The canonical identifier of the SQL query. + "query_id": + "description": |- + The canonical identifier of the SQL query. github.com/databricks/databricks-sdk-go/service/jobs.SqlTaskSubscription: - destination_id: - description: The canonical identifier of the destination to receive email notification. - This parameter is mutually exclusive with user_name. You cannot set both destination_id - and user_name for subscription notifications. - user_name: - description: The user name to receive the subscription email. This parameter is - mutually exclusive with destination_id. You cannot set both destination_id and - user_name for subscription notifications. + "destination_id": + "description": |- + The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications. + "user_name": + "description": |- + The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications. github.com/databricks/databricks-sdk-go/service/jobs.TableUpdateTriggerConfiguration: - condition: - description: The table(s) condition based on which to trigger a job run. - min_time_between_triggers_seconds: - description: |- + "condition": + "description": |- + The table(s) condition based on which to trigger a job run. + "min_time_between_triggers_seconds": + "description": |- If set, the trigger starts a run only after the specified amount of time has passed since the last time the trigger fired. The minimum allowed value is 60 seconds. - table_names: - description: A list of Delta tables to monitor for changes. The table name must - be in the format `catalog_name.schema_name.table_name`. - wait_after_last_change_seconds: - description: |- + "table_names": + "description": |- + A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`. + "wait_after_last_change_seconds": + "description": |- If set, the trigger starts a run only after no table updates have occurred for the specified time and can be used to wait for a series of table updates before triggering a run. The minimum allowed value is 60 seconds. github.com/databricks/databricks-sdk-go/service/jobs.Task: - clean_rooms_notebook_task: - description: |- + "clean_rooms_notebook_task": + "description": |- The task runs a [clean rooms](https://docs.databricks.com/en/clean-rooms/index.html) notebook when the `clean_rooms_notebook_task` field is present. - condition_task: - description: |- + "condition_task": + "description": |- The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present. The condition task does not require a cluster to execute and does not support retries or notifications. - dbt_task: - description: The task runs one or more dbt commands when the `dbt_task` field - is present. The dbt task requires both Databricks SQL and the ability to use - a serverless or a pro SQL warehouse. - depends_on: - description: |- + "dbt_task": + "description": |- + The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. + "depends_on": + "description": |- An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true. The key is `task_key`, and the value is the name assigned to the dependent task. - description: - description: An optional description for this task. - disable_auto_optimization: - description: An option to disable auto optimization in serverless - email_notifications: - description: An optional set of email addresses that is notified when runs of - this task begin or complete as well as when this task is deleted. The default - behavior is to not send any emails. - environment_key: - description: The key that references an environment spec in a job. This field - is required for Python script, Python wheel and dbt tasks when using serverless - compute. - existing_cluster_id: - description: |- + "description": + "description": |- + An optional description for this task. + "disable_auto_optimization": + "description": |- + An option to disable auto optimization in serverless + "email_notifications": + "description": |- + An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. + "environment_key": + "description": |- + The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute. + "existing_cluster_id": + "description": |- If existing_cluster_id, the ID of an existing cluster that is used for all runs. When running jobs or tasks on an existing cluster, you may need to manually restart the cluster if it stops responding. We suggest running jobs and tasks on new clusters for greater reliability - for_each_task: - description: The task executes a nested task for every input provided when the - `for_each_task` field is present. - health: {} - job_cluster_key: - description: If job_cluster_key, this task is executed reusing the cluster specified - in `job.settings.job_clusters`. - libraries: - description: |- + "for_each_task": + "description": |- + The task executes a nested task for every input provided when the `for_each_task` field is present. + "health": {} + "job_cluster_key": + "description": |- + If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`. + "libraries": + "description": |- An optional list of libraries to be installed on the cluster. The default value is an empty list. - max_retries: - description: An optional maximum number of times to retry an unsuccessful run. - A run is considered to be unsuccessful if it completes with the `FAILED` result_state - or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely - and the value `0` means to never retry. - min_retry_interval_millis: - description: An optional minimal interval in milliseconds between the start of - the failed run and the subsequent retry run. The default behavior is that unsuccessful - runs are immediately retried. - new_cluster: - description: If new_cluster, a description of a new cluster that is created for - each run. - notebook_task: - description: The task runs a notebook when the `notebook_task` field is present. - notification_settings: - description: Optional notification settings that are used when sending notifications - to each of the `email_notifications` and `webhook_notifications` for this task. - pipeline_task: - description: The task triggers a pipeline update when the `pipeline_task` field - is present. Only pipelines configured to use triggered more are supported. - python_wheel_task: - description: The task runs a Python wheel when the `python_wheel_task` field is - present. - retry_on_timeout: - description: |- + "max_retries": + "description": |- + An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry. + "min_retry_interval_millis": + "description": |- + An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried. + "new_cluster": + "description": |- + If new_cluster, a description of a new cluster that is created for each run. + "notebook_task": + "description": |- + The task runs a notebook when the `notebook_task` field is present. + "notification_settings": + "description": |- + Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task. + "pipeline_task": + "description": |- + The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported. + "python_wheel_task": + "description": |- + The task runs a Python wheel when the `python_wheel_task` field is present. + "retry_on_timeout": + "description": |- An optional policy to specify whether to retry a job when it times out. The default behavior is to not retry on timeout. - run_if: - description: |- + "run_if": + "description": |- An optional value specifying the condition determining whether the task is run once its dependencies have been completed. * `ALL_SUCCESS`: All dependencies have executed and succeeded @@ -1775,15 +2158,17 @@ github.com/databricks/databricks-sdk-go/service/jobs.Task: * `ALL_DONE`: All dependencies have been completed * `AT_LEAST_ONE_FAILED`: At least one dependency failed * `ALL_FAILED`: ALl dependencies have failed - run_job_task: - description: The task triggers another job when the `run_job_task` field is present. - spark_jar_task: - description: The task runs a JAR when the `spark_jar_task` field is present. - spark_python_task: - description: The task runs a Python file when the `spark_python_task` field is - present. - spark_submit_task: - description: |- + "run_job_task": + "description": |- + The task triggers another job when the `run_job_task` field is present. + "spark_jar_task": + "description": |- + The task runs a JAR when the `spark_jar_task` field is present. + "spark_python_task": + "description": |- + The task runs a Python file when the `spark_python_task` field is present. + "spark_submit_task": + "description": |- (Legacy) The task runs the spark-submit script when the `spark_submit_task` field is present. This task can run only on new clusters and is not compatible with serverless compute. In the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations. @@ -1793,220 +2178,258 @@ github.com/databricks/databricks-sdk-go/service/jobs.Task: By default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage. The `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths. - sql_task: - description: The task runs a SQL query or file, or it refreshes a SQL alert or - a legacy SQL dashboard when the `sql_task` field is present. - task_key: - description: |- + "sql_task": + "description": |- + The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present. + "task_key": + "description": |- A unique name for the task. This field is used to refer to this task from other tasks. This field is required and must be unique within its parent job. On Update or Reset, this field is used to reference the tasks to be updated or reset. - timeout_seconds: - description: An optional timeout applied to each run of this job task. A value - of `0` means no timeout. - webhook_notifications: - description: A collection of system notification IDs to notify when runs of this - task begin or complete. The default behavior is to not send any system notifications. + "timeout_seconds": + "description": |- + An optional timeout applied to each run of this job task. A value of `0` means no timeout. + "webhook_notifications": + "description": |- + A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications. github.com/databricks/databricks-sdk-go/service/jobs.TaskDependency: - outcome: - description: Can only be specified on condition task dependencies. The outcome - of the dependent task that must be met for this task to run. - task_key: - description: The name of the task this task depends on. + "outcome": + "description": |- + Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run. + "task_key": + "description": |- + The name of the task this task depends on. github.com/databricks/databricks-sdk-go/service/jobs.TaskEmailNotifications: - no_alert_for_skipped_runs: - description: |- + "no_alert_for_skipped_runs": + "description": |- If true, do not send email to recipients specified in `on_failure` if the run is skipped. This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field. - on_duration_warning_threshold_exceeded: - description: A list of email addresses to be notified when the duration of a run - exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the - `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified - in the `health` field for the job, notifications are not sent. - on_failure: - description: A list of email addresses to be notified when a run unsuccessfully - completes. A run is considered to have completed unsuccessfully if it ends with - an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. - If this is not specified on job creation, reset, or update the list is empty, - and notifications are not sent. - on_start: - description: A list of email addresses to be notified when a run begins. If not - specified on job creation, reset, or update, the list is empty, and notifications - are not sent. - on_streaming_backlog_exceeded: - description: |- + "on_duration_warning_threshold_exceeded": + "description": |- + A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent. + "on_failure": + "description": |- + A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + "on_start": + "description": |- + A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + "on_streaming_backlog_exceeded": + "description": |- A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. - on_success: - description: A list of email addresses to be notified when a run successfully - completes. A run is considered to have completed successfully if it ends with - a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified - on job creation, reset, or update, the list is empty, and notifications are - not sent. + "on_success": + "description": |- + A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. github.com/databricks/databricks-sdk-go/service/jobs.TaskNotificationSettings: - alert_on_last_attempt: - description: If true, do not send notifications to recipients specified in `on_start` - for the retried runs and do not send notifications to recipients specified in - `on_failure` until the last retry of the run. - no_alert_for_canceled_runs: - description: If true, do not send notifications to recipients specified in `on_failure` - if the run is canceled. - no_alert_for_skipped_runs: - description: If true, do not send notifications to recipients specified in `on_failure` - if the run is skipped. + "alert_on_last_attempt": + "description": |- + If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run. + "no_alert_for_canceled_runs": + "description": |- + If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. + "no_alert_for_skipped_runs": + "description": |- + If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. github.com/databricks/databricks-sdk-go/service/jobs.TriggerSettings: - file_arrival: - description: File arrival trigger settings. - pause_status: - description: Whether this trigger is paused or not. - periodic: - description: Periodic trigger settings. - table: - description: Old table trigger settings name. Deprecated in favor of `table_update`. - table_update: {} + "file_arrival": + "description": |- + File arrival trigger settings. + "pause_status": + "description": |- + Whether this trigger is paused or not. + "periodic": + "description": |- + Periodic trigger settings. + "table": + "description": |- + Old table trigger settings name. Deprecated in favor of `table_update`. + "table_update": {} github.com/databricks/databricks-sdk-go/service/jobs.Webhook: - id: {} + "id": {} github.com/databricks/databricks-sdk-go/service/jobs.WebhookNotifications: - on_duration_warning_threshold_exceeded: - description: An optional list of system notification IDs to call when the duration - of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric - in the `health` field. A maximum of 3 destinations can be specified for the - `on_duration_warning_threshold_exceeded` property. - on_failure: - description: An optional list of system notification IDs to call when the run - fails. A maximum of 3 destinations can be specified for the `on_failure` property. - on_start: - description: An optional list of system notification IDs to call when the run - starts. A maximum of 3 destinations can be specified for the `on_start` property. - on_streaming_backlog_exceeded: - description: |- + "on_duration_warning_threshold_exceeded": + "description": |- + An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property. + "on_failure": + "description": |- + An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. + "on_start": + "description": |- + An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. + "on_streaming_backlog_exceeded": + "description": |- An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream. Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`. Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes. A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property. - on_success: - description: An optional list of system notification IDs to call when the run - completes successfully. A maximum of 3 destinations can be specified for the - `on_success` property. + "on_success": + "description": |- + An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. github.com/databricks/databricks-sdk-go/service/ml.ExperimentTag: - key: - description: The tag key. - value: - description: The tag value. + "key": + "description": |- + The tag key. + "value": + "description": |- + The tag value. github.com/databricks/databricks-sdk-go/service/ml.ModelTag: - key: - description: The tag key. - value: - description: The tag value. + "key": + "description": |- + The tag key. + "value": + "description": |- + The tag value. github.com/databricks/databricks-sdk-go/service/ml.ModelVersion: - creation_timestamp: - description: Timestamp recorded when this `model_version` was created. - current_stage: - description: Current stage for this `model_version`. - description: - description: Description of this `model_version`. - last_updated_timestamp: - description: Timestamp recorded when metadata for this `model_version` was last - updated. - name: - description: Unique name of the model - run_id: - description: |- + "creation_timestamp": + "description": |- + Timestamp recorded when this `model_version` was created. + "current_stage": + "description": |- + Current stage for this `model_version`. + "description": + "description": |- + Description of this `model_version`. + "last_updated_timestamp": + "description": |- + Timestamp recorded when metadata for this `model_version` was last updated. + "name": + "description": |- + Unique name of the model + "run_id": + "description": |- MLflow run ID used when creating `model_version`, if `source` was generated by an experiment run stored in MLflow tracking server. - run_link: - description: 'Run Link: Direct link to the run that generated this version' - source: - description: URI indicating the location of the source model artifacts, used when - creating `model_version` - status: - description: Current status of `model_version` - status_message: - description: Details on current `status`, if it is pending or failed. - tags: - description: 'Tags: Additional metadata key-value pairs for this `model_version`.' - user_id: - description: User that created this `model_version`. - version: - description: Model's version number. + "run_link": + "description": |- + Run Link: Direct link to the run that generated this version + "source": + "description": |- + URI indicating the location of the source model artifacts, used when creating `model_version` + "status": + "description": |- + Current status of `model_version` + "status_message": + "description": |- + Details on current `status`, if it is pending or failed. + "tags": + "description": |- + Tags: Additional metadata key-value pairs for this `model_version`. + "user_id": + "description": |- + User that created this `model_version`. + "version": + "description": |- + Model's version number. github.com/databricks/databricks-sdk-go/service/ml.ModelVersionStatus: - _: - description: Current status of `model_version` - enum: - - PENDING_REGISTRATION - - FAILED_REGISTRATION - - READY + "_": + "description": |- + Current status of `model_version` + "enum": + - |- + PENDING_REGISTRATION + - |- + FAILED_REGISTRATION + - |- + READY github.com/databricks/databricks-sdk-go/service/ml.ModelVersionTag: - key: - description: The tag key. - value: - description: The tag value. + "key": + "description": |- + The tag key. + "value": + "description": |- + The tag value. github.com/databricks/databricks-sdk-go/service/pipelines.CronTrigger: - quartz_cron_schedule: {} - timezone_id: {} + "quartz_cron_schedule": {} + "timezone_id": {} +github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek: + "_": + "description": |- + Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). + If not specified all days of the week will be used. + "enum": + - |- + MONDAY + - |- + TUESDAY + - |- + WEDNESDAY + - |- + THURSDAY + - |- + FRIDAY + - |- + SATURDAY + - |- + SUNDAY github.com/databricks/databricks-sdk-go/service/pipelines.DeploymentKind: - _: - description: | + "_": + "description": | The deployment method that manages the pipeline: - BUNDLE: The pipeline is managed by a Databricks Asset Bundle. - enum: - - BUNDLE + "enum": + - |- + BUNDLE github.com/databricks/databricks-sdk-go/service/pipelines.FileLibrary: - path: - description: The absolute path of the file. + "path": + "description": |- + The absolute path of the file. github.com/databricks/databricks-sdk-go/service/pipelines.Filters: - exclude: - description: Paths to exclude. - include: - description: Paths to include. + "exclude": + "description": |- + Paths to exclude. + "include": + "description": |- + Paths to include. github.com/databricks/databricks-sdk-go/service/pipelines.IngestionConfig: - report: - description: Select a specific source report. - schema: - description: Select all tables from a specific source schema. - table: - description: Select a specific source table. + "report": + "description": |- + Select a specific source report. + "schema": + "description": |- + Select all tables from a specific source schema. + "table": + "description": |- + Select a specific source table. github.com/databricks/databricks-sdk-go/service/pipelines.IngestionGatewayPipelineDefinition: - connection_id: - description: '[Deprecated, use connection_name instead] Immutable. The Unity Catalog - connection that this gateway pipeline uses to communicate with the source.' - connection_name: - description: Immutable. The Unity Catalog connection that this gateway pipeline - uses to communicate with the source. - gateway_storage_catalog: - description: Required, Immutable. The name of the catalog for the gateway pipeline's - storage location. - gateway_storage_name: - description: | + "connection_id": + "description": |- + [Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. + "connection_name": + "description": |- + Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source. + "gateway_storage_catalog": + "description": |- + Required, Immutable. The name of the catalog for the gateway pipeline's storage location. + "gateway_storage_name": + "description": | Optional. The Unity Catalog-compatible name for the gateway storage location. This is the destination to use for the data that is extracted by the gateway. Delta Live Tables system will automatically create the storage location under the catalog and schema. - gateway_storage_schema: - description: Required, Immutable. The name of the schema for the gateway pipelines's - storage location. + "gateway_storage_schema": + "description": |- + Required, Immutable. The name of the schema for the gateway pipelines's storage location. github.com/databricks/databricks-sdk-go/service/pipelines.IngestionPipelineDefinition: - connection_name: - description: Immutable. The Unity Catalog connection that this ingestion pipeline - uses to communicate with the source. This is used with connectors for applications - like Salesforce, Workday, and so on. - ingestion_gateway_id: - description: Immutable. Identifier for the gateway that is used by this ingestion - pipeline to communicate with the source database. This is used with connectors - to databases like SQL Server. - objects: - description: Required. Settings specifying tables to replicate and the destination - for the replicated tables. - table_configuration: - description: Configuration settings to control the ingestion of tables. These - settings are applied to all tables in the pipeline. + "connection_name": + "description": |- + Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on. + "ingestion_gateway_id": + "description": |- + Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server. + "objects": + "description": |- + Required. Settings specifying tables to replicate and the destination for the replicated tables. + "table_configuration": + "description": |- + Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline. github.com/databricks/databricks-sdk-go/service/pipelines.ManualTrigger: {} github.com/databricks/databricks-sdk-go/service/pipelines.NotebookLibrary: - path: - description: The absolute path of the notebook. + "path": + "description": |- + The absolute path of the notebook. github.com/databricks/databricks-sdk-go/service/pipelines.Notifications: - alerts: - description: | + "alerts": + "description": | A list of alerts that trigger the sending of notifications to the configured destinations. The supported alerts are: @@ -2014,74 +2437,74 @@ github.com/databricks/databricks-sdk-go/service/pipelines.Notifications: * `on-update-failure`: Each time a pipeline update fails. * `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error. * `on-flow-failure`: A single data flow fails. - email_recipients: - description: | + "email_recipients": + "description": | A list of email addresses notified when a configured alert is triggered. github.com/databricks/databricks-sdk-go/service/pipelines.PipelineCluster: - apply_policy_default_values: - description: 'Note: This field won''t be persisted. Only API users will check - this field.' - autoscale: - description: |- + "apply_policy_default_values": + "description": |- + Note: This field won't be persisted. Only API users will check this field. + "autoscale": + "description": |- Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later. - aws_attributes: - description: |- + "aws_attributes": + "description": |- Attributes related to clusters running on Amazon Web Services. If not specified at cluster creation, a set of default values will be used. - azure_attributes: - description: |- + "azure_attributes": + "description": |- Attributes related to clusters running on Microsoft Azure. If not specified at cluster creation, a set of default values will be used. - cluster_log_conf: - description: | + "cluster_log_conf": + "description": | The configuration for delivering spark logs to a long-term storage destination. Only dbfs destinations are supported. Only one destination can be specified for one cluster. If the conf is given, the logs will be delivered to the destination every `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while the destination of executor logs is `$destination/$clusterId/executor`. - custom_tags: - description: |- + "custom_tags": + "description": |- Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS instances and EBS volumes) with these tags in addition to `default_tags`. Notes: - Currently, Databricks allows at most 45 custom tags - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags - driver_instance_pool_id: - description: |- + "driver_instance_pool_id": + "description": |- The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. - driver_node_type_id: - description: |- + "driver_node_type_id": + "description": |- The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. - enable_local_disk_encryption: - description: Whether to enable local disk encryption for the cluster. - gcp_attributes: - description: |- + "enable_local_disk_encryption": + "description": |- + Whether to enable local disk encryption for the cluster. + "gcp_attributes": + "description": |- Attributes related to clusters running on Google Cloud Platform. If not specified at cluster creation, a set of default values will be used. - init_scripts: - description: The configuration for storing init scripts. Any number of destinations - can be specified. The scripts are executed sequentially in the order provided. - If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. - instance_pool_id: - description: The optional ID of the instance pool to which the cluster belongs. - label: - description: A label for the cluster specification, either `default` to configure - the default cluster, or `maintenance` to configure the maintenance cluster. - This field is optional. The default value is `default`. - node_type_id: - description: | + "init_scripts": + "description": |- + The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + "instance_pool_id": + "description": |- + The optional ID of the instance pool to which the cluster belongs. + "label": + "description": |- + A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`. + "node_type_id": + "description": | This field encodes, through a single value, the resources available to each of the Spark nodes in this cluster. For example, the Spark nodes can be provisioned and optimized for memory or compute intensive workloads. A list of available node types can be retrieved by using the :method:clusters/listNodeTypes API call. - num_workers: - description: |- + "num_workers": + "description": |- Number of worker nodes that this cluster should have. A cluster has one Spark Driver and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. @@ -2090,14 +2513,15 @@ github.com/databricks/databricks-sdk-go/service/pipelines.PipelineCluster: is resized from 5 to 10 workers, this field will immediately be updated to reflect the target size of 10 workers, whereas the workers listed in `spark_info` will gradually increase from 5 to 10 as the new nodes are provisioned. - policy_id: - description: The ID of the cluster policy used to create the cluster if applicable. - spark_conf: - description: | + "policy_id": + "description": |- + The ID of the cluster policy used to create the cluster if applicable. + "spark_conf": + "description": | An object containing a set of optional, user-specified Spark configuration key-value pairs. See :method:clusters/create for more details. - spark_env_vars: - description: |- + "spark_env_vars": + "description": |- An object containing a set of optional, user-specified environment variable key-value pairs. Please note that key-value pair of the form (X,Y) will be exported as is (i.e., `export X='Y'`) while launching the driver and workers. @@ -2109,523 +2533,618 @@ github.com/databricks/databricks-sdk-go/service/pipelines.PipelineCluster: Example Spark environment variables: `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` - ssh_public_keys: - description: |- + "ssh_public_keys": + "description": |- SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. Up to 10 keys can be specified. github.com/databricks/databricks-sdk-go/service/pipelines.PipelineClusterAutoscale: - max_workers: - description: The maximum number of workers to which the cluster can scale up when - overloaded. `max_workers` must be strictly greater than `min_workers`. - min_workers: - description: |- + "max_workers": + "description": |- + The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`. + "min_workers": + "description": |- The minimum number of workers the cluster can scale down to when underutilized. It is also the initial number of workers the cluster will have after creation. - mode: - description: | + "mode": + "description": | Databricks Enhanced Autoscaling optimizes cluster utilization by automatically allocating cluster resources based on workload volume, with minimal impact to the data processing latency of your pipelines. Enhanced Autoscaling is available for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` clusters. github.com/databricks/databricks-sdk-go/service/pipelines.PipelineClusterAutoscaleMode: - _: - description: | + "_": + "description": | Databricks Enhanced Autoscaling optimizes cluster utilization by automatically allocating cluster resources based on workload volume, with minimal impact to the data processing latency of your pipelines. Enhanced Autoscaling is available for `updates` clusters only. The legacy autoscaling feature is used for `maintenance` clusters. - enum: - - ENHANCED - - LEGACY + "enum": + - |- + ENHANCED + - |- + LEGACY github.com/databricks/databricks-sdk-go/service/pipelines.PipelineDeployment: - kind: - description: The deployment method that manages the pipeline. - metadata_file_path: - description: The path to the file containing metadata about the deployment. + "kind": + "description": |- + The deployment method that manages the pipeline. + "metadata_file_path": + "description": |- + The path to the file containing metadata about the deployment. github.com/databricks/databricks-sdk-go/service/pipelines.PipelineLibrary: - file: - description: | + "file": + "description": | The path to a file that defines a pipeline and is stored in the Databricks Repos. - jar: - description: | + "jar": + "description": | URI of the jar to be installed. Currently only DBFS is supported. - maven: - description: | + "maven": + "description": | Specification of a maven library to be installed. - notebook: - description: | + "notebook": + "description": | The path to a notebook that defines a pipeline and is stored in the Databricks workspace. - whl: - description: URI of the whl to be installed. + "whl": + "description": |- + URI of the whl to be installed. github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger: - cron: {} - manual: {} + "cron": {} + "manual": {} github.com/databricks/databricks-sdk-go/service/pipelines.ReportSpec: - destination_catalog: - description: Required. Destination catalog to store table. - destination_schema: - description: Required. Destination schema to store table. - destination_table: - description: Required. Destination table name. The pipeline fails if a table with - that name already exists. - source_url: - description: Required. Report URL in the source system. - table_configuration: - description: Configuration settings to control the ingestion of tables. These - settings override the table_configuration defined in the IngestionPipelineDefinition - object. + "destination_catalog": + "description": |- + Required. Destination catalog to store table. + "destination_schema": + "description": |- + Required. Destination schema to store table. + "destination_table": + "description": |- + Required. Destination table name. The pipeline fails if a table with that name already exists. + "source_url": + "description": |- + Required. Report URL in the source system. + "table_configuration": + "description": |- + Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object. github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindow: - days_of_week: - description: |- + "days_of_week": + "description": |- Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). If not specified all days of the week will be used. - start_hour: - description: |- + "start_hour": + "description": |- An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day. Continuous pipeline restart is triggered only within a five-hour window starting at this hour. - time_zone_id: - description: |- + "time_zone_id": + "description": |- Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. -github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek: - _: - description: |- - Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). - If not specified all days of the week will be used. - enum: - - MONDAY - - TUESDAY - - WEDNESDAY - - THURSDAY - - FRIDAY - - SATURDAY - - SUNDAY github.com/databricks/databricks-sdk-go/service/pipelines.SchemaSpec: - destination_catalog: - description: Required. Destination catalog to store tables. - destination_schema: - description: Required. Destination schema to store tables in. Tables with the - same name as the source tables are created in this destination schema. The pipeline - fails If a table with the same name already exists. - source_catalog: - description: The source catalog name. Might be optional depending on the type - of source. - source_schema: - description: Required. Schema name in the source database. - table_configuration: - description: Configuration settings to control the ingestion of tables. These - settings are applied to all tables in this schema and override the table_configuration - defined in the IngestionPipelineDefinition object. + "destination_catalog": + "description": |- + Required. Destination catalog to store tables. + "destination_schema": + "description": |- + Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists. + "source_catalog": + "description": |- + The source catalog name. Might be optional depending on the type of source. + "source_schema": + "description": |- + Required. Schema name in the source database. + "table_configuration": + "description": |- + Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object. github.com/databricks/databricks-sdk-go/service/pipelines.TableSpec: - destination_catalog: - description: Required. Destination catalog to store table. - destination_schema: - description: Required. Destination schema to store table. - destination_table: - description: Optional. Destination table name. The pipeline fails if a table with - that name already exists. If not set, the source table name is used. - source_catalog: - description: Source catalog name. Might be optional depending on the type of source. - source_schema: - description: Schema name in the source database. Might be optional depending on - the type of source. - source_table: - description: Required. Table name in the source database. - table_configuration: - description: Configuration settings to control the ingestion of tables. These - settings override the table_configuration defined in the IngestionPipelineDefinition - object and the SchemaSpec. + "destination_catalog": + "description": |- + Required. Destination catalog to store table. + "destination_schema": + "description": |- + Required. Destination schema to store table. + "destination_table": + "description": |- + Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used. + "source_catalog": + "description": |- + Source catalog name. Might be optional depending on the type of source. + "source_schema": + "description": |- + Schema name in the source database. Might be optional depending on the type of source. + "source_table": + "description": |- + Required. Table name in the source database. + "table_configuration": + "description": |- + Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec. github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfig: - primary_keys: - description: The primary key of the table used to apply changes. - salesforce_include_formula_fields: - description: If true, formula fields defined in the table are included in the - ingestion. This setting is only valid for the Salesforce connector - scd_type: - description: The SCD type to use to ingest the table. - sequence_by: - description: The column names specifying the logical order of events in the source - data. Delta Live Tables uses this sequencing to handle change events that arrive - out of order. + "primary_keys": + "description": |- + The primary key of the table used to apply changes. + "salesforce_include_formula_fields": + "description": |- + If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector + "scd_type": + "description": |- + The SCD type to use to ingest the table. + "sequence_by": + "description": |- + The column names specifying the logical order of events in the source data. Delta Live Tables uses this sequencing to handle change events that arrive out of order. github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfigScdType: - _: - description: The SCD type to use to ingest the table. - enum: - - SCD_TYPE_1 - - SCD_TYPE_2 + "_": + "description": |- + The SCD type to use to ingest the table. + "enum": + - |- + SCD_TYPE_1 + - |- + SCD_TYPE_2 +github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig: + "ai21labs_api_key": + "description": |- + The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. + "ai21labs_api_key_plaintext": + "description": |- + An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayConfig: - guardrails: - description: Configuration for AI Guardrails to prevent unwanted data and unsafe - data in requests and responses. - inference_table_config: - description: Configuration for payload logging using inference tables. Use these - tables to monitor and audit data being sent to and received from model APIs - and to improve model quality. - rate_limits: - description: Configuration for rate limits which can be set to limit endpoint - traffic. - usage_tracking_config: - description: Configuration to enable usage tracking using system tables. These - tables allow you to monitor operational usage on endpoints and their associated - costs. + "guardrails": + "description": |- + Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. + "inference_table_config": + "description": |- + Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality. + "rate_limits": + "description": |- + Configuration for rate limits which can be set to limit endpoint traffic. + "usage_tracking_config": + "description": |- + Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailParameters: - invalid_keywords: - description: List of invalid keywords. AI guardrail uses keyword or string matching - to decide if the keyword exists in the request or response content. - pii: - description: Configuration for guardrail PII filter. - safety: - description: Indicates whether the safety filter is enabled. - valid_topics: - description: The list of allowed topics. Given a chat request, this guardrail - flags the request if its topic is not in the allowed topics. + "invalid_keywords": + "description": |- + List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content. + "pii": + "description": |- + Configuration for guardrail PII filter. + "safety": + "description": |- + Indicates whether the safety filter is enabled. + "valid_topics": + "description": |- + The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailPiiBehavior: - behavior: - description: Behavior for PII filter. Currently only 'BLOCK' is supported. If - 'BLOCK' is set for the input guardrail and the request contains PII, the request - is not sent to the model server and 400 status code is returned; if 'BLOCK' - is set for the output guardrail and the model response contains PII, the PII - info in the response is redacted and 400 status code is returned. + "behavior": + "description": |- + Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailPiiBehaviorBehavior: - _: - description: Behavior for PII filter. Currently only 'BLOCK' is supported. If - 'BLOCK' is set for the input guardrail and the request contains PII, the request - is not sent to the model server and 400 status code is returned; if 'BLOCK' - is set for the output guardrail and the model response contains PII, the PII - info in the response is redacted and 400 status code is returned. - enum: - - NONE - - BLOCK + "_": + "description": |- + Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned. + "enum": + - |- + NONE + - |- + BLOCK github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrails: - input: - description: Configuration for input guardrail filters. - output: - description: Configuration for output guardrail filters. + "input": + "description": |- + Configuration for input guardrail filters. + "output": + "description": |- + Configuration for output guardrail filters. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayInferenceTableConfig: - catalog_name: - description: 'The name of the catalog in Unity Catalog. Required when enabling - inference tables. NOTE: On update, you have to disable inference table first - in order to change the catalog name.' - enabled: - description: Indicates whether the inference table is enabled. - schema_name: - description: 'The name of the schema in Unity Catalog. Required when enabling - inference tables. NOTE: On update, you have to disable inference table first - in order to change the schema name.' - table_name_prefix: - description: 'The prefix of the table in Unity Catalog. NOTE: On update, you have - to disable inference table first in order to change the prefix name.' + "catalog_name": + "description": |- + The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name. + "enabled": + "description": |- + Indicates whether the inference table is enabled. + "schema_name": + "description": |- + The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name. + "table_name_prefix": + "description": |- + The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimit: - calls: - description: Used to specify how many calls are allowed for a key within the renewal_period. - key: - description: Key field for a rate limit. Currently, only 'user' and 'endpoint' - are supported, with 'endpoint' being the default if not specified. - renewal_period: - description: Renewal period field for a rate limit. Currently, only 'minute' is - supported. + "calls": + "description": |- + Used to specify how many calls are allowed for a key within the renewal_period. + "key": + "description": |- + Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified. + "renewal_period": + "description": |- + Renewal period field for a rate limit. Currently, only 'minute' is supported. github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitKey: - _: - description: Key field for a rate limit. Currently, only 'user' and 'endpoint' - are supported, with 'endpoint' being the default if not specified. - enum: - - user - - endpoint + "_": + "description": |- + Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified. + "enum": + - |- + user + - |- + endpoint github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitRenewalPeriod: - _: - description: Renewal period field for a rate limit. Currently, only 'minute' is - supported. - enum: - - minute + "_": + "description": |- + Renewal period field for a rate limit. Currently, only 'minute' is supported. + "enum": + - |- + minute github.com/databricks/databricks-sdk-go/service/serving.AiGatewayUsageTrackingConfig: - enabled: - description: Whether to enable usage tracking. + "enabled": + "description": |- + Whether to enable usage tracking. github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfig: - aws_access_key_id: - description: 'The Databricks secret key reference for an AWS access key ID with - permissions to interact with Bedrock services. If you prefer to paste your API - key directly, see `aws_access_key_id`. You must provide an API key using one - of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`.' - aws_access_key_id_plaintext: - description: 'An AWS access key ID with permissions to interact with Bedrock services - provided as a plaintext string. If you prefer to reference your key using Databricks - Secrets, see `aws_access_key_id`. You must provide an API key using one of the - following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`.' - aws_region: - description: The AWS region to use. Bedrock has to be enabled there. - aws_secret_access_key: - description: 'The Databricks secret key reference for an AWS secret access key - paired with the access key ID, with permissions to interact with Bedrock services. - If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. - You must provide an API key using one of the following fields: `aws_secret_access_key` - or `aws_secret_access_key_plaintext`.' - aws_secret_access_key_plaintext: - description: 'An AWS secret access key paired with the access key ID, with permissions - to interact with Bedrock services provided as a plaintext string. If you prefer - to reference your key using Databricks Secrets, see `aws_secret_access_key`. - You must provide an API key using one of the following fields: `aws_secret_access_key` - or `aws_secret_access_key_plaintext`.' - bedrock_provider: - description: 'The underlying provider in Amazon Bedrock. Supported values (case - insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.' + "aws_access_key_id": + "description": |- + The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. + "aws_access_key_id_plaintext": + "description": |- + An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`. + "aws_region": + "description": |- + The AWS region to use. Bedrock has to be enabled there. + "aws_secret_access_key": + "description": |- + The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. + "aws_secret_access_key_plaintext": + "description": |- + An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`. + "bedrock_provider": + "description": |- + The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfigBedrockProvider: - _: - description: 'The underlying provider in Amazon Bedrock. Supported values (case - insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.' - enum: - - anthropic - - cohere - - ai21labs - - amazon + "_": + "description": |- + The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. + "enum": + - |- + anthropic + - |- + cohere + - |- + ai21labs + - |- + amazon github.com/databricks/databricks-sdk-go/service/serving.AnthropicConfig: - anthropic_api_key: - description: 'The Databricks secret key reference for an Anthropic API key. If - you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. - You must provide an API key using one of the following fields: `anthropic_api_key` - or `anthropic_api_key_plaintext`.' - anthropic_api_key_plaintext: - description: 'The Anthropic API key provided as a plaintext string. If you prefer - to reference your key using Databricks Secrets, see `anthropic_api_key`. You - must provide an API key using one of the following fields: `anthropic_api_key` - or `anthropic_api_key_plaintext`.' + "anthropic_api_key": + "description": |- + The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. + "anthropic_api_key_plaintext": + "description": |- + The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`. github.com/databricks/databricks-sdk-go/service/serving.AutoCaptureConfigInput: - catalog_name: - description: 'The name of the catalog in Unity Catalog. NOTE: On update, you cannot - change the catalog name if the inference table is already enabled.' - enabled: - description: Indicates whether the inference table is enabled. - schema_name: - description: 'The name of the schema in Unity Catalog. NOTE: On update, you cannot - change the schema name if the inference table is already enabled.' - table_name_prefix: - description: 'The prefix of the table in Unity Catalog. NOTE: On update, you cannot - change the prefix name if the inference table is already enabled.' + "catalog_name": + "description": |- + The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled. + "enabled": + "description": |- + Indicates whether the inference table is enabled. + "schema_name": + "description": |- + The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled. + "table_name_prefix": + "description": |- + The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled. github.com/databricks/databricks-sdk-go/service/serving.CohereConfig: - cohere_api_base: - description: "This is an optional field to provide a customized base URL for the - Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n" - cohere_api_key: - description: 'The Databricks secret key reference for a Cohere API key. If you - prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must - provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`.' - cohere_api_key_plaintext: - description: 'The Cohere API key provided as a plaintext string. If you prefer - to reference your key using Databricks Secrets, see `cohere_api_key`. You must - provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`.' + "cohere_api_base": + "description": "This is an optional field to provide a customized base URL for the Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n" + "cohere_api_key": + "description": |- + The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. + "cohere_api_key_plaintext": + "description": |- + The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`. github.com/databricks/databricks-sdk-go/service/serving.DatabricksModelServingConfig: - databricks_api_token: - description: | + "databricks_api_token": + "description": | The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model. If you prefer to paste your API key directly, see `databricks_api_token_plaintext`. You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. - databricks_api_token_plaintext: - description: | + "databricks_api_token_plaintext": + "description": | The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `databricks_api_token`. You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`. - databricks_workspace_url: - description: | + "databricks_workspace_url": + "description": | The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model. github.com/databricks/databricks-sdk-go/service/serving.EndpointCoreConfigInput: - auto_capture_config: - description: Configuration for Inference Tables which automatically logs requests - and responses to Unity Catalog. - served_entities: - description: A list of served entities for the endpoint to serve. A serving endpoint - can have up to 15 served entities. - served_models: - description: (Deprecated, use served_entities instead) A list of served models - for the endpoint to serve. A serving endpoint can have up to 15 served models. - traffic_config: - description: The traffic config defining how invocations to the serving endpoint - should be routed. + "auto_capture_config": + "description": |- + Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog. + "served_entities": + "description": |- + A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities. + "served_models": + "description": |- + (Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models. + "traffic_config": + "description": |- + The traffic config defining how invocations to the serving endpoint should be routed. github.com/databricks/databricks-sdk-go/service/serving.EndpointTag: - key: - description: Key field for a serving endpoint tag. - value: - description: Optional value field for a serving endpoint tag. + "key": + "description": |- + Key field for a serving endpoint tag. + "value": + "description": |- + Optional value field for a serving endpoint tag. github.com/databricks/databricks-sdk-go/service/serving.ExternalModel: - ai21labs_config: - description: AI21Labs Config. Only required if the provider is 'ai21labs'. - amazon_bedrock_config: - description: Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. - anthropic_config: - description: Anthropic Config. Only required if the provider is 'anthropic'. - cohere_config: - description: Cohere Config. Only required if the provider is 'cohere'. - databricks_model_serving_config: - description: Databricks Model Serving Config. Only required if the provider is - 'databricks-model-serving'. - google_cloud_vertex_ai_config: - description: Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. - name: - description: The name of the external model. - openai_config: - description: OpenAI Config. Only required if the provider is 'openai'. - palm_config: - description: PaLM Config. Only required if the provider is 'palm'. - provider: - description: | + "ai21labs_config": + "description": |- + AI21Labs Config. Only required if the provider is 'ai21labs'. + "amazon_bedrock_config": + "description": |- + Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. + "anthropic_config": + "description": |- + Anthropic Config. Only required if the provider is 'anthropic'. + "cohere_config": + "description": |- + Cohere Config. Only required if the provider is 'cohere'. + "databricks_model_serving_config": + "description": |- + Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'. + "google_cloud_vertex_ai_config": + "description": |- + Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'. + "name": + "description": |- + The name of the external model. + "openai_config": + "description": |- + OpenAI Config. Only required if the provider is 'openai'. + "palm_config": + "description": |- + PaLM Config. Only required if the provider is 'palm'. + "provider": + "description": | The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.", - task: - description: The task type of the external model. + "task": + "description": |- + The task type of the external model. github.com/databricks/databricks-sdk-go/service/serving.ExternalModelProvider: - _: - description: | + "_": + "description": | The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.", - enum: - - ai21labs - - anthropic - - amazon-bedrock - - cohere - - databricks-model-serving - - google-cloud-vertex-ai - - openai - - palm + "enum": + - |- + ai21labs + - |- + anthropic + - |- + amazon-bedrock + - |- + cohere + - |- + databricks-model-serving + - |- + google-cloud-vertex-ai + - |- + openai + - |- + palm +github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig: + "private_key": + "description": |- + The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext` + "private_key_plaintext": + "description": |- + The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`. + "project_id": + "description": |- + This is the Google Cloud project id that the service account is associated with. + "region": + "description": |- + This is the region for the Google Cloud Vertex AI Service. See [supported regions](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. Some models are only available in specific regions. +github.com/databricks/databricks-sdk-go/service/serving.OpenAiConfig: + "microsoft_entra_client_id": + "description": | + This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID. + "microsoft_entra_client_secret": + "description": | + The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication. + If you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`. + You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. + "microsoft_entra_client_secret_plaintext": + "description": | + The client secret used for Microsoft Entra ID authentication provided as a plaintext string. + If you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`. + You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`. + "microsoft_entra_tenant_id": + "description": | + This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID. + "openai_api_base": + "description": | + This is a field to provide a customized base URl for the OpenAI API. + For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service + provided by Azure. + For other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used. + "openai_api_key": + "description": |- + The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. + "openai_api_key_plaintext": + "description": |- + The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`. + "openai_api_type": + "description": | + This is an optional field to specify the type of OpenAI API to use. + For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security + access validation protocol. For access token validation, use azure. For authentication using Azure Active + Directory (Azure AD) use, azuread. + "openai_api_version": + "description": | + This is an optional field to specify the OpenAI API version. + For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to + utilize, specified by a date. + "openai_deployment_name": + "description": | + This field is only required for Azure OpenAI and is the name of the deployment resource for the + Azure OpenAI service. + "openai_organization": + "description": | + This is an optional field to specify the organization in OpenAI or Azure OpenAI. +github.com/databricks/databricks-sdk-go/service/serving.PaLmConfig: + "palm_api_key": + "description": |- + The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. + "palm_api_key_plaintext": + "description": |- + The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`. github.com/databricks/databricks-sdk-go/service/serving.RateLimit: - calls: - description: Used to specify how many calls are allowed for a key within the renewal_period. - key: - description: Key field for a serving endpoint rate limit. Currently, only 'user' - and 'endpoint' are supported, with 'endpoint' being the default if not specified. - renewal_period: - description: Renewal period field for a serving endpoint rate limit. Currently, - only 'minute' is supported. + "calls": + "description": |- + Used to specify how many calls are allowed for a key within the renewal_period. + "key": + "description": |- + Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified. + "renewal_period": + "description": |- + Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported. github.com/databricks/databricks-sdk-go/service/serving.RateLimitKey: - _: - description: Key field for a serving endpoint rate limit. Currently, only 'user' - and 'endpoint' are supported, with 'endpoint' being the default if not specified. - enum: - - user - - endpoint + "_": + "description": |- + Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified. + "enum": + - |- + user + - |- + endpoint github.com/databricks/databricks-sdk-go/service/serving.RateLimitRenewalPeriod: - _: - description: Renewal period field for a serving endpoint rate limit. Currently, - only 'minute' is supported. - enum: - - minute + "_": + "description": |- + Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported. + "enum": + - |- + minute github.com/databricks/databricks-sdk-go/service/serving.Route: - served_model_name: - description: The name of the served model this route configures traffic for. - traffic_percentage: - description: The percentage of endpoint traffic to send to this route. It must - be an integer between 0 and 100 inclusive. + "served_model_name": + "description": |- + The name of the served model this route configures traffic for. + "traffic_percentage": + "description": |- + The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive. github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput: - entity_name: - description: | + "entity_name": + "description": | The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of __catalog_name__.__schema_name__.__model_name__. - entity_version: - description: The version of the model in Databricks Model Registry to be served - or empty if the entity is a FEATURE_SPEC. - environment_vars: - description: "An object containing a set of optional, user-specified environment - variable key-value pairs used for serving this entity.\nNote: this is an experimental - feature and subject to change. \nExample entity environment variables that refer - to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", - \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`" - external_model: - description: | + "entity_version": + "description": |- + The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC. + "environment_vars": + "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity.\nNote: this is an experimental feature and subject to change. \nExample entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`" + "external_model": + "description": | The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same. - instance_profile_arn: - description: ARN of the instance profile that the served entity uses to access - AWS resources. - max_provisioned_throughput: - description: The maximum tokens per second that the endpoint can scale up to. - min_provisioned_throughput: - description: The minimum tokens per second that the endpoint can scale down to. - name: - description: | + "instance_profile_arn": + "description": |- + ARN of the instance profile that the served entity uses to access AWS resources. + "max_provisioned_throughput": + "description": |- + The maximum tokens per second that the endpoint can scale up to. + "min_provisioned_throughput": + "description": |- + The minimum tokens per second that the endpoint can scale down to. + "name": + "description": | The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale_to_zero_enabled: - description: Whether the compute resources for the served entity should scale - down to zero. - workload_size: - description: | + "scale_to_zero_enabled": + "description": |- + Whether the compute resources for the served entity should scale down to zero. + "workload_size": + "description": | The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload_type: - description: | + "workload_type": + "description": | The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput: - environment_vars: - description: "An object containing a set of optional, user-specified environment - variable key-value pairs used for serving this model.\nNote: this is an experimental - feature and subject to change. \nExample model environment variables that refer - to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", - \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`" - instance_profile_arn: - description: ARN of the instance profile that the served model will use to access - AWS resources. - max_provisioned_throughput: - description: The maximum tokens per second that the endpoint can scale up to. - min_provisioned_throughput: - description: The minimum tokens per second that the endpoint can scale down to. - model_name: - description: | + "environment_vars": + "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this model.\nNote: this is an experimental feature and subject to change. \nExample model environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`" + "instance_profile_arn": + "description": |- + ARN of the instance profile that the served model will use to access AWS resources. + "max_provisioned_throughput": + "description": |- + The maximum tokens per second that the endpoint can scale up to. + "min_provisioned_throughput": + "description": |- + The minimum tokens per second that the endpoint can scale down to. + "model_name": + "description": | The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model, in the form of __catalog_name__.__schema_name__.__model_name__. - model_version: - description: The version of the model in Databricks Model Registry or Unity Catalog - to be served. - name: - description: | + "model_version": + "description": |- + The version of the model in Databricks Model Registry or Unity Catalog to be served. + "name": + "description": | The name of a served model. It must be unique across an endpoint. If not specified, this field will default to -. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale_to_zero_enabled: - description: Whether the compute resources for the served model should scale down - to zero. - workload_size: - description: | + "scale_to_zero_enabled": + "description": |- + Whether the compute resources for the served model should scale down to zero. + "workload_size": + "description": | The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. - workload_type: - description: | + "workload_type": + "description": | The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadSize: - _: - description: | + "_": + "description": | The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. - enum: - - Small - - Medium - - Large + "enum": + - |- + Small + - |- + Medium + - |- + Large github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadType: - _: - description: | + "_": + "description": | The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types). - enum: - - CPU - - GPU_SMALL - - GPU_MEDIUM - - GPU_LARGE - - MULTIGPU_MEDIUM + "enum": + - |- + CPU + - |- + GPU_SMALL + - |- + GPU_MEDIUM + - |- + GPU_LARGE + - |- + MULTIGPU_MEDIUM github.com/databricks/databricks-sdk-go/service/serving.TrafficConfig: - routes: - description: The list of routes that define traffic to each served entity. + "routes": + "description": |- + The list of routes that define traffic to each served entity. diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index ef602d6ef..120a12543 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -1,3 +1,28 @@ +github.com/databricks/cli/bundle/config/resources.App: + "app_status": + "description": |- + PLACEHOLDER + "compute_status": + "description": |- + PLACEHOLDER + "config": + "description": |- + PLACEHOLDER + "permissions": + "description": |- + PLACEHOLDER + "service_principal_client_id": + "description": |- + PLACEHOLDER + "service_principal_id": + "description": |- + PLACEHOLDER + "service_principal_name": + "description": |- + PLACEHOLDER + "source_code_path": + "description": |- + PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Cluster: "data_security_mode": "description": |- @@ -75,6 +100,19 @@ github.com/databricks/cli/bundle/config/resources.Volume: "volume_type": "description": |- PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResource: + "job": + "description": |- + PLACEHOLDER + "secret": + "description": |- + PLACEHOLDER + "serving_endpoint": + "description": |- + PLACEHOLDER + "sql_warehouse": + "description": |- + PLACEHOLDER github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: "availability": "description": |- diff --git a/bundle/internal/schema/main.go b/bundle/internal/schema/main.go index 77927a966..38e099ece 100644 --- a/bundle/internal/schema/main.go +++ b/bundle/internal/schema/main.go @@ -40,6 +40,19 @@ func addInterpolationPatterns(typ reflect.Type, s jsonschema.Schema) jsonschema. } } + // Allows using variables in enum fields + if s.Type == jsonschema.StringType && s.Enum != nil { + return jsonschema.Schema{ + OneOf: []jsonschema.Schema{ + s, + { + Type: jsonschema.StringType, + Pattern: interpolationPattern("var"), + }, + }, + } + } + switch s.Type { case jsonschema.ArrayType, jsonschema.ObjectType: // arrays and objects can have complex variable values specified. @@ -159,6 +172,15 @@ func generateSchema(workdir, outputFile string) { a.addAnnotations, addInterpolationPatterns, }) + + // AdditionalProperties is set to an empty schema to allow non-typed keys used as yaml-anchors + // Example: + // some_anchor: &some_anchor + // file_path: /some/path/ + // workspace: + // <<: *some_anchor + s.AdditionalProperties = jsonschema.Schema{} + if err != nil { log.Fatal(err) } diff --git a/bundle/internal/schema/main_test.go b/bundle/internal/schema/main_test.go index 607347b6b..06e89c856 100644 --- a/bundle/internal/schema/main_test.go +++ b/bundle/internal/schema/main_test.go @@ -2,7 +2,6 @@ package main import ( "bytes" - "fmt" "io" "os" "path" @@ -42,7 +41,8 @@ func copyFile(src, dst string) error { // Checks whether descriptions are added for new config fields in the annotations.yml file // If this test fails either manually add descriptions to the `annotations.yml` or do the following: -// 1. run `make schema` from the repository root to add placeholder descriptions +// 1. for fields described outside of CLI package fetch latest schema from the OpenAPI spec and add path to file to DATABRICKS_OPENAPI_SPEC env variable +// 2. run `make schema` from the repository root to add placeholder descriptions // 2. replace all "PLACEHOLDER" values with the actual descriptions if possible // 3. run `make schema` again to regenerate the schema with acutal descriptions func TestRequiredAnnotationsForNewFields(t *testing.T) { @@ -79,7 +79,7 @@ func TestRequiredAnnotationsForNewFields(t *testing.T) { }, }) assert.NoError(t, err) - assert.Empty(t, updatedFieldPaths, fmt.Sprintf("Missing JSON-schema descriptions for new config fields in bundle/internal/schema/annotations.yml:\n%s", strings.Join(updatedFieldPaths, "\n"))) + assert.Empty(t, updatedFieldPaths, "Missing JSON-schema descriptions for new config fields in bundle/internal/schema/annotations.yml:\n%s", strings.Join(updatedFieldPaths, "\n")) } // Checks whether types in annotation files are still present in Config type diff --git a/bundle/internal/schema/parser.go b/bundle/internal/schema/parser.go index 3fbec0528..919908429 100644 --- a/bundle/internal/schema/parser.go +++ b/bundle/internal/schema/parser.go @@ -1,7 +1,6 @@ package main import ( - "bytes" "encoding/json" "fmt" "os" @@ -9,7 +8,6 @@ import ( "reflect" "strings" - "github.com/databricks/cli/libs/dyn/yamlloader" "github.com/databricks/cli/libs/jsonschema" "gopkg.in/yaml.v3" ) @@ -56,7 +54,7 @@ func (p *openapiParser) findRef(typ reflect.Type) (jsonschema.Schema, bool) { // Check for embedded Databricks Go SDK types. if typ.Kind() == reflect.Struct { - for i := 0; i < typ.NumField(); i++ { + for i := range typ.NumField() { if !typ.Field(i).Anonymous { continue } @@ -83,7 +81,11 @@ func (p *openapiParser) findRef(typ reflect.Type) (jsonschema.Schema, bool) { // Skip if the type is not in the openapi spec. _, ok := p.ref[k] if !ok { - continue + k = mapIncorrectTypNames(k) + _, ok = p.ref[k] + if !ok { + continue + } } // Return the first Go SDK type found in the openapi spec. @@ -93,6 +95,23 @@ func (p *openapiParser) findRef(typ reflect.Type) (jsonschema.Schema, bool) { return jsonschema.Schema{}, false } +// Fix inconsistent type names between the Go SDK and the OpenAPI spec. +// E.g. "serving.PaLmConfig" in the Go SDK is "serving.PaLMConfig" in the OpenAPI spec. +func mapIncorrectTypNames(ref string) string { + switch ref { + case "serving.PaLmConfig": + return "serving.PaLMConfig" + case "serving.OpenAiConfig": + return "serving.OpenAIConfig" + case "serving.GoogleCloudVertexAiConfig": + return "serving.GoogleCloudVertexAIConfig" + case "serving.Ai21LabsConfig": + return "serving.AI21LabsConfig" + default: + return ref + } +} + // Use the OpenAPI spec to load descriptions for the given type. func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overridesPath string) error { annotations := annotationFile{} @@ -142,31 +161,40 @@ func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overrid return err } - b, err = yaml.Marshal(overrides) + err = saveYamlWithStyle(overridesPath, overrides) if err != nil { return err } - o, err := yamlloader.LoadYAML("", bytes.NewBuffer(b)) + err = saveYamlWithStyle(outputPath, annotations) if err != nil { return err } - err = saveYamlWithStyle(overridesPath, o) + err = prependCommentToFile(outputPath, "# This file is auto-generated. DO NOT EDIT.\n") if err != nil { return err } - b, err = yaml.Marshal(annotations) - if err != nil { - return err - } - b = bytes.Join([][]byte{[]byte("# This file is auto-generated. DO NOT EDIT."), b}, []byte("\n")) - err = os.WriteFile(outputPath, b, 0o644) - if err != nil { - return err - } - return nil } +func prependCommentToFile(outputPath, comment string) error { + b, err := os.ReadFile(outputPath) + if err != nil { + return err + } + f, err := os.OpenFile(outputPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644) + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString(comment) + if err != nil { + return err + } + _, err = f.Write(b) + return err +} + func addEmptyOverride(key, pkg string, overridesFile annotationFile) { if overridesFile[pkg] == nil { overridesFile[pkg] = map[string]annotation{} diff --git a/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml b/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml deleted file mode 100644 index e8a8866bc..000000000 --- a/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml +++ /dev/null @@ -1 +0,0 @@ -unknown: value diff --git a/bundle/internal/schema/testdata/pass/job.yml b/bundle/internal/schema/testdata/pass/job.yml index e13a52c03..ec447ba39 100644 --- a/bundle/internal/schema/testdata/pass/job.yml +++ b/bundle/internal/schema/testdata/pass/job.yml @@ -13,6 +13,8 @@ variables: simplevar: default: true description: "simplevar description" + schedule_status: + default: "PAUSED" complexvar: default: @@ -42,6 +44,8 @@ resources: dependencies: - python=3.7 client: "myclient" + trigger: + pause_status: ${var.schedule_status} tags: foo: bar bar: baz diff --git a/bundle/internal/schema/testdata/pass/yaml_anchors.yml b/bundle/internal/schema/testdata/pass/yaml_anchors.yml new file mode 100644 index 000000000..18749891d --- /dev/null +++ b/bundle/internal/schema/testdata/pass/yaml_anchors.yml @@ -0,0 +1,11 @@ +tags: &job-tags + environment: "some_environment" + +resources: + jobs: + db1: + tags: + <<: *job-tags + db2: + tags: + <<: *job-tags diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index 27c4b16cd..677b8fc10 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.62.0" +const ProviderVersion = "1.63.0" diff --git a/bundle/internal/tf/schema/resource_external_location.go b/bundle/internal/tf/schema/resource_external_location.go index da28271bc..72411f4dc 100644 --- a/bundle/internal/tf/schema/resource_external_location.go +++ b/bundle/internal/tf/schema/resource_external_location.go @@ -13,8 +13,13 @@ type ResourceExternalLocationEncryptionDetails struct { type ResourceExternalLocation struct { AccessPoint string `json:"access_point,omitempty"` + BrowseOnly bool `json:"browse_only,omitempty"` Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + CredentialId string `json:"credential_id,omitempty"` CredentialName string `json:"credential_name"` + Fallback bool `json:"fallback,omitempty"` ForceDestroy bool `json:"force_destroy,omitempty"` ForceUpdate bool `json:"force_update,omitempty"` Id string `json:"id,omitempty"` @@ -24,6 +29,8 @@ type ResourceExternalLocation struct { Owner string `json:"owner,omitempty"` ReadOnly bool `json:"read_only,omitempty"` SkipValidation bool `json:"skip_validation,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` Url string `json:"url"` EncryptionDetails *ResourceExternalLocationEncryptionDetails `json:"encryption_details,omitempty"` } diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index 1f89dc64d..7dd3f9210 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.62.0" +const ProviderVersion = "1.63.0" func NewRoot() *Root { return &Root{ diff --git a/bundle/libraries/filer.go b/bundle/libraries/filer.go index 4448ed325..e09c75e0e 100644 --- a/bundle/libraries/filer.go +++ b/bundle/libraries/filer.go @@ -24,7 +24,7 @@ func GetFilerForLibraries(ctx context.Context, b *bundle.Bundle) (filer.Filer, s switch { case IsVolumesPath(artifactPath): - return filerForVolume(ctx, b) + return filerForVolume(b) default: return filerForWorkspace(b) diff --git a/bundle/libraries/filer_test.go b/bundle/libraries/filer_test.go index 88ba152fc..c18da9726 100644 --- a/bundle/libraries/filer_test.go +++ b/bundle/libraries/filer_test.go @@ -7,10 +7,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/libs/filer" - sdkconfig "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) @@ -39,11 +36,6 @@ func TestGetFilerForLibrariesValidUcVolume(t *testing.T) { }, } - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &sdkconfig.Config{} - m.GetMockFilesAPI().EXPECT().GetDirectoryMetadataByDirectoryPath(mock.Anything, "/Volumes/main/my_schema/my_volume").Return(nil) - b.SetWorkpaceClient(m.WorkspaceClient) - client, uploadPath, diags := GetFilerForLibraries(context.Background(), b) require.NoError(t, diags.Error()) assert.Equal(t, "/Volumes/main/my_schema/my_volume/.internal", uploadPath) diff --git a/bundle/libraries/filer_volume.go b/bundle/libraries/filer_volume.go index aecf68db1..176f475c6 100644 --- a/bundle/libraries/filer_volume.go +++ b/bundle/libraries/filer_volume.go @@ -1,132 +1,16 @@ package libraries import ( - "context" - "errors" - "fmt" "path" - "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" - "github.com/databricks/cli/libs/dyn/dynvar" "github.com/databricks/cli/libs/filer" - "github.com/databricks/databricks-sdk-go/apierr" ) -func extractVolumeFromPath(artifactPath string) (string, string, string, error) { - if !IsVolumesPath(artifactPath) { - return "", "", "", fmt.Errorf("expected artifact_path to start with /Volumes/, got %s", artifactPath) - } - - parts := strings.Split(artifactPath, "/") - volumeFormatErr := fmt.Errorf("expected UC volume path to be in the format /Volumes////..., got %s", artifactPath) - - // Incorrect format. - if len(parts) < 5 { - return "", "", "", volumeFormatErr - } - - catalogName := parts[2] - schemaName := parts[3] - volumeName := parts[4] - - // Incorrect format. - if catalogName == "" || schemaName == "" || volumeName == "" { - return "", "", "", volumeFormatErr - } - - return catalogName, schemaName, volumeName, nil -} - -// This function returns a filer for ".internal" folder inside the directory configured -// at `workspace.artifact_path`. -// This function also checks if the UC volume exists in the workspace and then: -// 1. If the UC volume exists in the workspace: -// Returns a filer for the UC volume. -// 2. If the UC volume does not exist in the workspace but is (with high confidence) defined in -// the bundle configuration: -// Returns an error and a warning that instructs the user to deploy the -// UC volume before using it in the artifact path. -// 3. If the UC volume does not exist in the workspace and is not defined in the bundle configuration: -// Returns an error. -func filerForVolume(ctx context.Context, b *bundle.Bundle) (filer.Filer, string, diag.Diagnostics) { - artifactPath := b.Config.Workspace.ArtifactPath +func filerForVolume(b *bundle.Bundle) (filer.Filer, string, diag.Diagnostics) { w := b.WorkspaceClient() - - catalogName, schemaName, volumeName, err := extractVolumeFromPath(artifactPath) - if err != nil { - return nil, "", diag.Diagnostics{ - { - Severity: diag.Error, - Summary: err.Error(), - Locations: b.Config.GetLocations("workspace.artifact_path"), - Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, - }, - } - } - - // Check if the UC volume exists in the workspace. - volumePath := fmt.Sprintf("/Volumes/%s/%s/%s", catalogName, schemaName, volumeName) - err = w.Files.GetDirectoryMetadataByDirectoryPath(ctx, volumePath) - - // If the volume exists already, directly return the filer for the path to - // upload the artifacts to. - if err == nil { - uploadPath := path.Join(artifactPath, InternalDirName) - f, err := filer.NewFilesClient(w, uploadPath) - return f, uploadPath, diag.FromErr(err) - } - - baseErr := diag.Diagnostic{ - Severity: diag.Error, - Summary: fmt.Sprintf("unable to determine if volume at %s exists: %s", volumePath, err), - Locations: b.Config.GetLocations("workspace.artifact_path"), - Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, - } - - if errors.Is(err, apierr.ErrNotFound) { - // Since the API returned a 404, the volume does not exist. - // Modify the error message to provide more context. - baseErr.Summary = fmt.Sprintf("volume %s does not exist: %s", volumePath, err) - - // If the volume is defined in the bundle, provide a more helpful error diagnostic, - // with more details and location information. - path, locations, ok := findVolumeInBundle(b, catalogName, schemaName, volumeName) - if !ok { - return nil, "", diag.Diagnostics{baseErr} - } - baseErr.Detail = `You are using a volume in your artifact_path that is managed by -this bundle but which has not been deployed yet. Please first deploy -the volume using 'bundle deploy' and then switch over to using it in -the artifact_path.` - baseErr.Paths = append(baseErr.Paths, path) - baseErr.Locations = append(baseErr.Locations, locations...) - } - - return nil, "", diag.Diagnostics{baseErr} -} - -func findVolumeInBundle(b *bundle.Bundle, catalogName, schemaName, volumeName string) (dyn.Path, []dyn.Location, bool) { - volumes := b.Config.Resources.Volumes - for k, v := range volumes { - if v.CatalogName != catalogName || v.Name != volumeName { - continue - } - // UC schemas can be defined in the bundle itself, and thus might be interpolated - // at runtime via the ${resources.schemas.} syntax. Thus we match the volume - // definition if the schema name is the same as the one in the bundle, or if the - // schema name is interpolated. - // We only have to check for ${resources.schemas...} references because any - // other valid reference (like ${var.foo}) would have been interpolated by this point. - p, ok := dynvar.PureReferenceToPath(v.SchemaName) - isSchemaDefinedInBundle := ok && p.HasPrefix(dyn.Path{dyn.Key("resources"), dyn.Key("schemas")}) - if v.SchemaName != schemaName && !isSchemaDefinedInBundle { - continue - } - pathString := fmt.Sprintf("resources.volumes.%s", k) - return dyn.MustPathFromString(pathString), b.Config.GetLocations(pathString), true - } - return nil, nil, false + uploadPath := path.Join(b.Config.Workspace.ArtifactPath, InternalDirName) + f, err := filer.NewFilesClient(w, uploadPath) + return f, uploadPath, diag.FromErr(err) } diff --git a/bundle/libraries/filer_volume_test.go b/bundle/libraries/filer_volume_test.go index 7b2f5c5ba..39bdc4135 100644 --- a/bundle/libraries/filer_volume_test.go +++ b/bundle/libraries/filer_volume_test.go @@ -1,277 +1,27 @@ package libraries import ( - "context" - "fmt" "path" "testing" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/bundle/internal/bundletest" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/filer" - "github.com/databricks/databricks-sdk-go/apierr" - sdkconfig "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" - "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) -func TestFindVolumeInBundle(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Resources: config.Resources{ - Volumes: map[string]*resources.Volume{ - "foo": { - CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ - CatalogName: "main", - Name: "my_volume", - SchemaName: "my_schema", - }, - }, - }, - }, - }, - } - - bundletest.SetLocation(b, "resources.volumes.foo", []dyn.Location{ - { - File: "volume.yml", - Line: 1, - Column: 2, - }, - }) - - // volume is in DAB. - path, locations, ok := findVolumeInBundle(b, "main", "my_schema", "my_volume") - assert.True(t, ok) - assert.Equal(t, []dyn.Location{{ - File: "volume.yml", - Line: 1, - Column: 2, - }}, locations) - assert.Equal(t, dyn.MustPathFromString("resources.volumes.foo"), path) - - // wrong volume name - _, _, ok = findVolumeInBundle(b, "main", "my_schema", "doesnotexist") - assert.False(t, ok) - - // wrong schema name - _, _, ok = findVolumeInBundle(b, "main", "doesnotexist", "my_volume") - assert.False(t, ok) - - // wrong catalog name - _, _, ok = findVolumeInBundle(b, "doesnotexist", "my_schema", "my_volume") - assert.False(t, ok) - - // schema name is interpolated but does not have the right prefix. In this case - // we should not match the volume. - b.Config.Resources.Volumes["foo"].SchemaName = "${foo.bar.baz}" - _, _, ok = findVolumeInBundle(b, "main", "my_schema", "my_volume") - assert.False(t, ok) - - // schema name is interpolated. - b.Config.Resources.Volumes["foo"].SchemaName = "${resources.schemas.my_schema.name}" - path, locations, ok = findVolumeInBundle(b, "main", "valuedoesnotmatter", "my_volume") - assert.True(t, ok) - assert.Equal(t, []dyn.Location{{ - File: "volume.yml", - Line: 1, - Column: 2, - }}, locations) - assert.Equal(t, dyn.MustPathFromString("resources.volumes.foo"), path) -} - -func TestFilerForVolumeForErrorFromAPI(t *testing.T) { +func TestFilerForVolume(t *testing.T) { b := &bundle.Bundle{ Config: config.Root{ Workspace: config.Workspace{ - ArtifactPath: "/Volumes/main/my_schema/my_volume", + ArtifactPath: "/Volumes/main/my_schema/my_volume/abc", }, }, } - bundletest.SetLocation(b, "workspace.artifact_path", []dyn.Location{{File: "config.yml", Line: 1, Column: 2}}) - - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &sdkconfig.Config{} - m.GetMockFilesAPI().EXPECT().GetDirectoryMetadataByDirectoryPath(mock.Anything, "/Volumes/main/my_schema/my_volume").Return(fmt.Errorf("error from API")) - b.SetWorkpaceClient(m.WorkspaceClient) - - _, _, diags := filerForVolume(context.Background(), b) - assert.Equal(t, diag.Diagnostics{ - { - Severity: diag.Error, - Summary: "unable to determine if volume at /Volumes/main/my_schema/my_volume exists: error from API", - Locations: []dyn.Location{{File: "config.yml", Line: 1, Column: 2}}, - Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, - }, - }, diags) -} - -func TestFilerForVolumeWithVolumeNotFound(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Workspace: config.Workspace{ - ArtifactPath: "/Volumes/main/my_schema/doesnotexist", - }, - }, - } - - bundletest.SetLocation(b, "workspace.artifact_path", []dyn.Location{{File: "config.yml", Line: 1, Column: 2}}) - - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &sdkconfig.Config{} - m.GetMockFilesAPI().EXPECT().GetDirectoryMetadataByDirectoryPath(mock.Anything, "/Volumes/main/my_schema/doesnotexist").Return(apierr.NotFound("some error message")) - b.SetWorkpaceClient(m.WorkspaceClient) - - _, _, diags := filerForVolume(context.Background(), b) - assert.Equal(t, diag.Diagnostics{ - { - Severity: diag.Error, - Summary: "volume /Volumes/main/my_schema/doesnotexist does not exist: some error message", - Locations: []dyn.Location{{File: "config.yml", Line: 1, Column: 2}}, - Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, - }, - }, diags) -} - -func TestFilerForVolumeNotFoundAndInBundle(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Workspace: config.Workspace{ - ArtifactPath: "/Volumes/main/my_schema/my_volume", - }, - Resources: config.Resources{ - Volumes: map[string]*resources.Volume{ - "foo": { - CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ - CatalogName: "main", - Name: "my_volume", - VolumeType: "MANAGED", - SchemaName: "my_schema", - }, - }, - }, - }, - }, - } - - bundletest.SetLocation(b, "workspace.artifact_path", []dyn.Location{{File: "config.yml", Line: 1, Column: 2}}) - bundletest.SetLocation(b, "resources.volumes.foo", []dyn.Location{{File: "volume.yml", Line: 1, Column: 2}}) - - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &sdkconfig.Config{} - m.GetMockFilesAPI().EXPECT().GetDirectoryMetadataByDirectoryPath(mock.Anything, "/Volumes/main/my_schema/my_volume").Return(apierr.NotFound("error from API")) - b.SetWorkpaceClient(m.WorkspaceClient) - - _, _, diags := GetFilerForLibraries(context.Background(), b) - assert.Equal(t, diag.Diagnostics{ - { - Severity: diag.Error, - Summary: "volume /Volumes/main/my_schema/my_volume does not exist: error from API", - Locations: []dyn.Location{{File: "config.yml", Line: 1, Column: 2}, {File: "volume.yml", Line: 1, Column: 2}}, - Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path"), dyn.MustPathFromString("resources.volumes.foo")}, - Detail: `You are using a volume in your artifact_path that is managed by -this bundle but which has not been deployed yet. Please first deploy -the volume using 'bundle deploy' and then switch over to using it in -the artifact_path.`, - }, - }, diags) -} - -func invalidVolumePaths() []string { - return []string{ - "/Volumes/", - "/Volumes/main", - "/Volumes/main/", - "/Volumes/main//", - "/Volumes/main//my_schema", - "/Volumes/main/my_schema", - "/Volumes/main/my_schema/", - "/Volumes/main/my_schema//", - "/Volumes//my_schema/my_volume", - } -} - -func TestFilerForVolumeWithInvalidVolumePaths(t *testing.T) { - for _, p := range invalidVolumePaths() { - b := &bundle.Bundle{ - Config: config.Root{ - Workspace: config.Workspace{ - ArtifactPath: p, - }, - }, - } - - bundletest.SetLocation(b, "workspace.artifact_path", []dyn.Location{{File: "config.yml", Line: 1, Column: 2}}) - - _, _, diags := GetFilerForLibraries(context.Background(), b) - require.Equal(t, diags, diag.Diagnostics{{ - Severity: diag.Error, - Summary: fmt.Sprintf("expected UC volume path to be in the format /Volumes////..., got %s", p), - Locations: []dyn.Location{{File: "config.yml", Line: 1, Column: 2}}, - Paths: []dyn.Path{dyn.MustPathFromString("workspace.artifact_path")}, - }}) - } -} - -func TestFilerForVolumeWithInvalidPrefix(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Workspace: config.Workspace{ - ArtifactPath: "/Volume/main/my_schema/my_volume", - }, - }, - } - - _, _, diags := filerForVolume(context.Background(), b) - require.EqualError(t, diags.Error(), "expected artifact_path to start with /Volumes/, got /Volume/main/my_schema/my_volume") -} - -func TestFilerForVolumeWithValidVolumePaths(t *testing.T) { - validPaths := []string{ - "/Volumes/main/my_schema/my_volume", - "/Volumes/main/my_schema/my_volume/", - "/Volumes/main/my_schema/my_volume/a/b/c", - "/Volumes/main/my_schema/my_volume/a/a/a", - } - - for _, p := range validPaths { - b := &bundle.Bundle{ - Config: config.Root{ - Workspace: config.Workspace{ - ArtifactPath: p, - }, - }, - } - - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &sdkconfig.Config{} - m.GetMockFilesAPI().EXPECT().GetDirectoryMetadataByDirectoryPath(mock.Anything, "/Volumes/main/my_schema/my_volume").Return(nil) - b.SetWorkpaceClient(m.WorkspaceClient) - - client, uploadPath, diags := filerForVolume(context.Background(), b) - require.NoError(t, diags.Error()) - assert.Equal(t, path.Join(p, ".internal"), uploadPath) - assert.IsType(t, &filer.FilesClient{}, client) - } -} - -func TestExtractVolumeFromPath(t *testing.T) { - catalogName, schemaName, volumeName, err := extractVolumeFromPath("/Volumes/main/my_schema/my_volume") - require.NoError(t, err) - assert.Equal(t, "main", catalogName) - assert.Equal(t, "my_schema", schemaName) - assert.Equal(t, "my_volume", volumeName) - - for _, p := range invalidVolumePaths() { - _, _, _, err := extractVolumeFromPath(p) - assert.EqualError(t, err, fmt.Sprintf("expected UC volume path to be in the format /Volumes////..., got %s", p)) - } + client, uploadPath, diags := filerForVolume(b) + require.NoError(t, diags.Error()) + assert.Equal(t, path.Join("/Volumes/main/my_schema/my_volume/abc/.internal"), uploadPath) + assert.IsType(t, &filer.FilesClient{}, client) } diff --git a/bundle/libraries/helpers.go b/bundle/libraries/helpers.go index 2149e5885..5a1a9511c 100644 --- a/bundle/libraries/helpers.go +++ b/bundle/libraries/helpers.go @@ -1,7 +1,7 @@ package libraries import ( - "fmt" + "errors" "github.com/databricks/databricks-sdk-go/service/compute" ) @@ -20,5 +20,5 @@ func libraryPath(library *compute.Library) (string, error) { return library.Requirements, nil } - return "", fmt.Errorf("not supported library type") + return "", errors.New("not supported library type") } diff --git a/bundle/libraries/helpers_test.go b/bundle/libraries/helpers_test.go index 9d7e12ee5..754aa8f95 100644 --- a/bundle/libraries/helpers_test.go +++ b/bundle/libraries/helpers_test.go @@ -12,25 +12,25 @@ func TestLibraryPath(t *testing.T) { p, err := libraryPath(&compute.Library{Whl: path}) assert.Equal(t, path, p) - assert.Nil(t, err) + assert.NoError(t, err) p, err = libraryPath(&compute.Library{Jar: path}) assert.Equal(t, path, p) - assert.Nil(t, err) + assert.NoError(t, err) p, err = libraryPath(&compute.Library{Egg: path}) assert.Equal(t, path, p) - assert.Nil(t, err) + assert.NoError(t, err) p, err = libraryPath(&compute.Library{Requirements: path}) assert.Equal(t, path, p) - assert.Nil(t, err) + assert.NoError(t, err) p, err = libraryPath(&compute.Library{}) assert.Equal(t, "", p) - assert.NotNil(t, err) + assert.Error(t, err) p, err = libraryPath(&compute.Library{Pypi: &compute.PythonPyPiLibrary{Package: "pypipackage"}}) assert.Equal(t, "", p) - assert.NotNil(t, err) + assert.Error(t, err) } diff --git a/bundle/libraries/upload_test.go b/bundle/libraries/upload_test.go index 493785bf5..44b194c56 100644 --- a/bundle/libraries/upload_test.go +++ b/bundle/libraries/upload_test.go @@ -11,8 +11,6 @@ import ( mockfiler "github.com/databricks/cli/internal/mocks/libs/filer" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/filer" - sdkconfig "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/mock" @@ -183,11 +181,6 @@ func TestArtifactUploadForVolumes(t *testing.T) { filer.CreateParentDirectories, ).Return(nil) - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &sdkconfig.Config{} - m.GetMockFilesAPI().EXPECT().GetDirectoryMetadataByDirectoryPath(mock.Anything, "/Volumes/foo/bar/artifacts").Return(nil) - b.SetWorkpaceClient(m.WorkspaceClient) - diags := bundle.Apply(context.Background(), b, bundle.Seq(ExpandGlobReferences(), UploadWithClient(mockFiler))) require.NoError(t, diags.Error()) diff --git a/bundle/permissions/filter_test.go b/bundle/permissions/filter_test.go index e6e5a3799..ef7167d75 100644 --- a/bundle/permissions/filter_test.go +++ b/bundle/permissions/filter_test.go @@ -99,32 +99,32 @@ func TestFilterCurrentUser(t *testing.T) { assert.NoError(t, diags.Error()) // Assert current user is filtered out. - assert.Equal(t, 2, len(b.Config.Resources.Jobs["job1"].Permissions)) + assert.Len(t, b.Config.Resources.Jobs["job1"].Permissions, 2) assert.Contains(t, b.Config.Resources.Jobs["job1"].Permissions, robot) assert.Contains(t, b.Config.Resources.Jobs["job1"].Permissions, bob) - assert.Equal(t, 2, len(b.Config.Resources.Jobs["job2"].Permissions)) + assert.Len(t, b.Config.Resources.Jobs["job2"].Permissions, 2) assert.Contains(t, b.Config.Resources.Jobs["job2"].Permissions, robot) assert.Contains(t, b.Config.Resources.Jobs["job2"].Permissions, bob) - assert.Equal(t, 2, len(b.Config.Resources.Pipelines["pipeline1"].Permissions)) + assert.Len(t, b.Config.Resources.Pipelines["pipeline1"].Permissions, 2) assert.Contains(t, b.Config.Resources.Pipelines["pipeline1"].Permissions, robot) assert.Contains(t, b.Config.Resources.Pipelines["pipeline1"].Permissions, bob) - assert.Equal(t, 2, len(b.Config.Resources.Experiments["experiment1"].Permissions)) + assert.Len(t, b.Config.Resources.Experiments["experiment1"].Permissions, 2) assert.Contains(t, b.Config.Resources.Experiments["experiment1"].Permissions, robot) assert.Contains(t, b.Config.Resources.Experiments["experiment1"].Permissions, bob) - assert.Equal(t, 2, len(b.Config.Resources.Models["model1"].Permissions)) + assert.Len(t, b.Config.Resources.Models["model1"].Permissions, 2) assert.Contains(t, b.Config.Resources.Models["model1"].Permissions, robot) assert.Contains(t, b.Config.Resources.Models["model1"].Permissions, bob) - assert.Equal(t, 2, len(b.Config.Resources.ModelServingEndpoints["endpoint1"].Permissions)) + assert.Len(t, b.Config.Resources.ModelServingEndpoints["endpoint1"].Permissions, 2) assert.Contains(t, b.Config.Resources.ModelServingEndpoints["endpoint1"].Permissions, robot) assert.Contains(t, b.Config.Resources.ModelServingEndpoints["endpoint1"].Permissions, bob) // Assert there's no change to the grant. - assert.Equal(t, 1, len(b.Config.Resources.RegisteredModels["registered_model1"].Grants)) + assert.Len(t, b.Config.Resources.RegisteredModels["registered_model1"].Grants, 1) } func TestFilterCurrentServicePrincipal(t *testing.T) { @@ -134,32 +134,32 @@ func TestFilterCurrentServicePrincipal(t *testing.T) { assert.NoError(t, diags.Error()) // Assert current user is filtered out. - assert.Equal(t, 2, len(b.Config.Resources.Jobs["job1"].Permissions)) + assert.Len(t, b.Config.Resources.Jobs["job1"].Permissions, 2) assert.Contains(t, b.Config.Resources.Jobs["job1"].Permissions, alice) assert.Contains(t, b.Config.Resources.Jobs["job1"].Permissions, bob) - assert.Equal(t, 2, len(b.Config.Resources.Jobs["job2"].Permissions)) + assert.Len(t, b.Config.Resources.Jobs["job2"].Permissions, 2) assert.Contains(t, b.Config.Resources.Jobs["job2"].Permissions, alice) assert.Contains(t, b.Config.Resources.Jobs["job2"].Permissions, bob) - assert.Equal(t, 2, len(b.Config.Resources.Pipelines["pipeline1"].Permissions)) + assert.Len(t, b.Config.Resources.Pipelines["pipeline1"].Permissions, 2) assert.Contains(t, b.Config.Resources.Pipelines["pipeline1"].Permissions, alice) assert.Contains(t, b.Config.Resources.Pipelines["pipeline1"].Permissions, bob) - assert.Equal(t, 2, len(b.Config.Resources.Experiments["experiment1"].Permissions)) + assert.Len(t, b.Config.Resources.Experiments["experiment1"].Permissions, 2) assert.Contains(t, b.Config.Resources.Experiments["experiment1"].Permissions, alice) assert.Contains(t, b.Config.Resources.Experiments["experiment1"].Permissions, bob) - assert.Equal(t, 2, len(b.Config.Resources.Models["model1"].Permissions)) + assert.Len(t, b.Config.Resources.Models["model1"].Permissions, 2) assert.Contains(t, b.Config.Resources.Models["model1"].Permissions, alice) assert.Contains(t, b.Config.Resources.Models["model1"].Permissions, bob) - assert.Equal(t, 2, len(b.Config.Resources.ModelServingEndpoints["endpoint1"].Permissions)) + assert.Len(t, b.Config.Resources.ModelServingEndpoints["endpoint1"].Permissions, 2) assert.Contains(t, b.Config.Resources.ModelServingEndpoints["endpoint1"].Permissions, alice) assert.Contains(t, b.Config.Resources.ModelServingEndpoints["endpoint1"].Permissions, bob) // Assert there's no change to the grant. - assert.Equal(t, 1, len(b.Config.Resources.RegisteredModels["registered_model1"].Grants)) + assert.Len(t, b.Config.Resources.RegisteredModels["registered_model1"].Grants, 1) } func TestFilterCurrentUserDoesNotErrorWhenNoResources(t *testing.T) { diff --git a/bundle/permissions/mutator.go b/bundle/permissions/mutator.go index cd7cbf40c..8a0057dee 100644 --- a/bundle/permissions/mutator.go +++ b/bundle/permissions/mutator.go @@ -51,6 +51,10 @@ var ( CAN_MANAGE: "CAN_MANAGE", CAN_VIEW: "CAN_READ", }, + "apps": { + CAN_MANAGE: "CAN_MANAGE", + CAN_VIEW: "CAN_USE", + }, } ) diff --git a/bundle/permissions/mutator_test.go b/bundle/permissions/mutator_test.go index 78703e90f..1f7897cae 100644 --- a/bundle/permissions/mutator_test.go +++ b/bundle/permissions/mutator_test.go @@ -58,6 +58,10 @@ func TestApplyBundlePermissions(t *testing.T) { "dashboard_1": {}, "dashboard_2": {}, }, + Apps: map[string]*resources.App{ + "app_1": {}, + "app_2": {}, + }, }, }, } @@ -114,6 +118,10 @@ func TestApplyBundlePermissions(t *testing.T) { require.Len(t, b.Config.Resources.Dashboards["dashboard_1"].Permissions, 2) require.Contains(t, b.Config.Resources.Dashboards["dashboard_1"].Permissions, resources.Permission{Level: "CAN_MANAGE", UserName: "TestUser"}) require.Contains(t, b.Config.Resources.Dashboards["dashboard_1"].Permissions, resources.Permission{Level: "CAN_READ", GroupName: "TestGroup"}) + + require.Len(t, b.Config.Resources.Apps["app_1"].Permissions, 2) + require.Contains(t, b.Config.Resources.Apps["app_1"].Permissions, resources.Permission{Level: "CAN_MANAGE", UserName: "TestUser"}) + require.Contains(t, b.Config.Resources.Apps["app_1"].Permissions, resources.Permission{Level: "CAN_USE", GroupName: "TestGroup"}) } func TestWarningOnOverlapPermission(t *testing.T) { @@ -164,7 +172,7 @@ func TestAllResourcesExplicitlyDefinedForPermissionsSupport(t *testing.T) { for _, resource := range unsupportedResources { _, ok := levelsMap[resource] - assert.False(t, ok, fmt.Sprintf("Resource %s is defined in both levelsMap and unsupportedResources", resource)) + assert.False(t, ok, "Resource %s is defined in both levelsMap and unsupportedResources", resource) } for _, resource := range r.AllResources() { diff --git a/bundle/permissions/permission_diagnostics_test.go b/bundle/permissions/permission_diagnostics_test.go index 7b0afefa0..6c55ab594 100644 --- a/bundle/permissions/permission_diagnostics_test.go +++ b/bundle/permissions/permission_diagnostics_test.go @@ -28,7 +28,7 @@ func TestPermissionDiagnosticsApplyFail(t *testing.T) { }) diags := permissions.PermissionDiagnostics().Apply(context.Background(), b) - require.Equal(t, diags[0].Severity, diag.Warning) + require.Equal(t, diag.Warning, diags[0].Severity) require.Contains(t, diags[0].Summary, "permissions section should include testuser@databricks.com or one of their groups with CAN_MANAGE permissions") } diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index 4ac0d38a5..828b12f50 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -3,6 +3,7 @@ package permissions import ( "context" "fmt" + "strconv" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/libraries" @@ -78,7 +79,7 @@ func setPermissions(ctx context.Context, w workspace.WorkspaceInterface, path st } _, err = w.SetPermissions(ctx, workspace.WorkspaceObjectPermissionsRequest{ - WorkspaceObjectId: fmt.Sprint(obj.ObjectId), + WorkspaceObjectId: strconv.FormatInt(obj.ObjectId, 10), WorkspaceObjectType: "directories", AccessControlList: permissions, }) diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 2dc9623bd..b59ce9f89 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -2,9 +2,10 @@ package phases import ( "context" - "fmt" + "errors" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/apps" "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" @@ -54,7 +55,7 @@ func filterDeleteOrRecreateActions(changes []*tfjson.ResourceChange, resourceTyp func approvalForDeploy(ctx context.Context, b *bundle.Bundle) (bool, error) { tf := b.Terraform if tf == nil { - return false, fmt.Errorf("terraform not initialized") + return false, errors.New("terraform not initialized") } // read plan file @@ -111,7 +112,7 @@ is removed from the catalog, but the underlying files are not deleted:` } if !cmdio.IsPromptSupported(ctx) { - return false, fmt.Errorf("the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") + return false, errors.New("the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") } cmdio.LogString(ctx, "") @@ -129,12 +130,15 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { // mutators need informed consent if they are potentially destructive. deployCore := bundle.Defer( bundle.Seq( + apps.SlowDeployMessage(), bundle.LogString("Deploying resources..."), terraform.Apply(), ), bundle.Seq( terraform.StatePush(), terraform.Load(), + apps.InterpolateVariables(), + apps.UploadConfig(), metadata.Compute(), metadata.Upload(), bundle.LogString("Deployment complete!"), diff --git a/bundle/phases/destroy.go b/bundle/phases/destroy.go index 6eb8b6a01..05a41dea2 100644 --- a/bundle/phases/destroy.go +++ b/bundle/phases/destroy.go @@ -3,7 +3,6 @@ package phases import ( "context" "errors" - "fmt" "net/http" "github.com/databricks/cli/bundle" @@ -34,7 +33,7 @@ func assertRootPathExists(ctx context.Context, b *bundle.Bundle) (bool, error) { func approvalForDestroy(ctx context.Context, b *bundle.Bundle) (bool, error) { tf := b.Terraform if tf == nil { - return false, fmt.Errorf("terraform not initialized") + return false, errors.New("terraform not initialized") } // read plan file @@ -63,7 +62,7 @@ func approvalForDestroy(ctx context.Context, b *bundle.Bundle) (bool, error) { } - cmdio.LogString(ctx, fmt.Sprintf("All files and directories at the following location will be deleted: %s", b.Config.Workspace.RootPath)) + cmdio.LogString(ctx, "All files and directories at the following location will be deleted: "+b.Config.Workspace.RootPath) cmdio.LogString(ctx, "") if b.AutoApprove { diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 6fa0e5fed..c5b875196 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -2,6 +2,7 @@ package phases import ( "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/apps" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" pythonmutator "github.com/databricks/cli/bundle/config/mutator/python" @@ -33,14 +34,14 @@ func Initialize() bundle.Mutator { // If it is an ancestor, this updates all paths to be relative to the sync root path. mutator.SyncInferRoot(), - mutator.MergeJobClusters(), - mutator.MergeJobParameters(), - mutator.MergeJobTasks(), - mutator.MergePipelineClusters(), mutator.InitializeWorkspaceClient(), mutator.PopulateCurrentUser(), mutator.LoadGitDetails(), + // This mutator needs to be run before variable interpolation and defining default workspace paths + // because it affects how workspace variables are resolved. + mutator.ApplySourceLinkedDeploymentPreset(), + mutator.DefineDefaultWorkspaceRoot(), mutator.ExpandWorkspaceRoot(), mutator.DefineDefaultWorkspacePaths(), @@ -51,18 +52,29 @@ func Initialize() bundle.Mutator { mutator.RewriteWorkspacePrefix(), mutator.SetVariables(), + // Intentionally placed before ResolveVariableReferencesInLookup, ResolveResourceReferences, // ResolveVariableReferencesInComplexVariables and ResolveVariableReferences. // See what is expected in PythonMutatorPhaseInit doc pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseInit), + pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseLoadResources), + pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseApplyMutators), mutator.ResolveVariableReferencesInLookup(), mutator.ResolveResourceReferences(), - mutator.ResolveVariableReferencesInComplexVariables(), mutator.ResolveVariableReferences( "bundle", "workspace", "variables", ), + + mutator.MergeJobClusters(), + mutator.MergeJobParameters(), + mutator.MergeJobTasks(), + mutator.MergePipelineClusters(), + mutator.MergeApps(), + + mutator.CaptureSchemaDependency(), + // Provide permission config errors & warnings after initializing all variables permissions.PermissionDiagnostics(), mutator.SetRunAs(), @@ -80,6 +92,8 @@ func Initialize() bundle.Mutator { mutator.TranslatePaths(), trampoline.WrapperWarning(), + apps.Validate(), + permissions.ValidateSharedRootPermissions(), permissions.ApplyBundlePermissions(), permissions.FilterCurrentUser(), diff --git a/bundle/root.go b/bundle/root.go index efc21e0ca..9ea9a8c13 100644 --- a/bundle/root.go +++ b/bundle/root.go @@ -2,6 +2,7 @@ package bundle import ( "context" + "errors" "fmt" "os" @@ -21,7 +22,7 @@ func getRootEnv(ctx context.Context) (string, error) { } stat, err := os.Stat(path) if err == nil && !stat.IsDir() { - err = fmt.Errorf("not a directory") + err = errors.New("not a directory") } if err != nil { return "", fmt.Errorf(`invalid bundle root %s="%s": %w`, env.RootVariable, path, err) diff --git a/bundle/run/app.go b/bundle/run/app.go new file mode 100644 index 000000000..b15f3f4b6 --- /dev/null +++ b/bundle/run/app.go @@ -0,0 +1,244 @@ +package run + +import ( + "context" + "errors" + "fmt" + "time" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/run/output" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/spf13/cobra" +) + +func logProgress(ctx context.Context, msg string) { + if msg == "" { + return + } + cmdio.LogString(ctx, "✓ "+msg) +} + +type appRunner struct { + key + + bundle *bundle.Bundle + app *resources.App +} + +func (a *appRunner) Name() string { + if a.app == nil { + return "" + } + + return a.app.Name +} + +func isAppStopped(app *apps.App) bool { + return app.ComputeStatus == nil || + (app.ComputeStatus.State == apps.ComputeStateStopped || app.ComputeStatus.State == apps.ComputeStateError) +} + +func (a *appRunner) Run(ctx context.Context, opts *Options) (output.RunOutput, error) { + app := a.app + b := a.bundle + if app == nil { + return nil, errors.New("app is not defined") + } + + logProgress(ctx, "Getting the status of the app "+app.Name) + w := b.WorkspaceClient() + + // Check the status of the app first. + createdApp, err := w.Apps.Get(ctx, apps.GetAppRequest{Name: app.Name}) + if err != nil { + return nil, err + } + + if createdApp.AppStatus != nil { + logProgress(ctx, fmt.Sprintf("App is in %s state", createdApp.AppStatus.State)) + } + + if createdApp.ComputeStatus != nil { + logProgress(ctx, fmt.Sprintf("App compute is in %s state", createdApp.ComputeStatus.State)) + } + + // There could be 2 reasons why the app is not running: + // 1. The app is new and was never deployed yet. + // 2. The app was stopped (compute not running). + // We need to start the app only if the compute is not running. + if isAppStopped(createdApp) { + err := a.start(ctx) + if err != nil { + return nil, err + } + } + + // Deploy the app. + err = a.deploy(ctx) + if err != nil { + return nil, err + } + + cmdio.LogString(ctx, "You can access the app at "+createdApp.Url) + return nil, nil +} + +func (a *appRunner) start(ctx context.Context) error { + app := a.app + b := a.bundle + w := b.WorkspaceClient() + + logProgress(ctx, "Starting the app "+app.Name) + wait, err := w.Apps.Start(ctx, apps.StartAppRequest{Name: app.Name}) + if err != nil { + return err + } + + startedApp, err := wait.OnProgress(func(p *apps.App) { + if p.AppStatus == nil { + return + } + logProgress(ctx, "App is starting...") + }).Get() + if err != nil { + return err + } + + // After the app is started (meaning the compute is running), the API will return the app object with the + // active and pending deployments fields (if any). If there are active or pending deployments, + // we need to wait for them to complete before we can do the new deployment. + // Otherwise, the new deployment will fail. + err = waitForDeploymentToComplete(ctx, w, startedApp) + if err != nil { + return err + } + + logProgress(ctx, "App is started!") + return nil +} + +func waitForDeploymentToComplete(ctx context.Context, w *databricks.WorkspaceClient, app *apps.App) error { + // We first wait for the active deployment to complete. + if app.ActiveDeployment != nil && + app.ActiveDeployment.Status.State == apps.AppDeploymentStateInProgress { + logProgress(ctx, "Waiting for the active deployment to complete...") + _, err := w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, app.ActiveDeployment.DeploymentId, 20*time.Minute, nil) + if err != nil { + return err + } + logProgress(ctx, "Active deployment is completed!") + } + + // Then, we wait for the pending deployment to complete. + if app.PendingDeployment != nil && + app.PendingDeployment.Status.State == apps.AppDeploymentStateInProgress { + logProgress(ctx, "Waiting for the pending deployment to complete...") + _, err := w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, app.PendingDeployment.DeploymentId, 20*time.Minute, nil) + if err != nil { + return err + } + logProgress(ctx, "Pending deployment is completed!") + } + + return nil +} + +func (a *appRunner) deploy(ctx context.Context) error { + app := a.app + b := a.bundle + w := b.WorkspaceClient() + + sourceCodePath := app.SourceCodePath + wait, err := w.Apps.Deploy(ctx, apps.CreateAppDeploymentRequest{ + AppName: app.Name, + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: sourceCodePath, + }, + }) + // If deploy returns an error, then there's an active deployment in progress, wait for it to complete. + // For this we first need to get an app and its acrive and pending deployments and then wait for them. + if err != nil { + app, err := w.Apps.Get(ctx, apps.GetAppRequest{Name: app.Name}) + if err != nil { + return fmt.Errorf("failed to get app %s: %w", app.Name, err) + } + + err = waitForDeploymentToComplete(ctx, w, app) + if err != nil { + return err + } + + // Now we can try to deploy the app again + wait, err = w.Apps.Deploy(ctx, apps.CreateAppDeploymentRequest{ + AppName: app.Name, + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: sourceCodePath, + }, + }) + if err != nil { + return err + } + } + + _, err = wait.OnProgress(func(ad *apps.AppDeployment) { + if ad.Status == nil { + return + } + logProgress(ctx, ad.Status.Message) + }).Get() + if err != nil { + return err + } + + return nil +} + +func (a *appRunner) Cancel(ctx context.Context) error { + // We should cancel the app by stopping it. + app := a.app + b := a.bundle + if app == nil { + return errors.New("app is not defined") + } + + w := b.WorkspaceClient() + + logProgress(ctx, "Stopping app "+app.Name) + wait, err := w.Apps.Stop(ctx, apps.StopAppRequest{Name: app.Name}) + if err != nil { + return err + } + + _, err = wait.OnProgress(func(p *apps.App) { + if p.AppStatus == nil { + return + } + logProgress(ctx, p.AppStatus.Message) + }).Get() + + logProgress(ctx, "App is stopped!") + return err +} + +func (a *appRunner) Restart(ctx context.Context, opts *Options) (output.RunOutput, error) { + // We should restart the app by just running it again meaning a new app deployment will be done. + return a.Run(ctx, opts) +} + +func (a *appRunner) ParseArgs(args []string, opts *Options) error { + if len(args) == 0 { + return nil + } + + return fmt.Errorf("received %d unexpected positional arguments", len(args)) +} + +func (a *appRunner) CompleteArgs(args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + return nil, cobra.ShellCompDirectiveNoFileComp +} diff --git a/bundle/run/app_test.go b/bundle/run/app_test.go new file mode 100644 index 000000000..77f197e8d --- /dev/null +++ b/bundle/run/app_test.go @@ -0,0 +1,276 @@ +package run + +import ( + "context" + "errors" + "os" + "path/filepath" + "testing" + "time" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +type testAppRunner struct { + m *mocks.MockWorkspaceClient + b *bundle.Bundle + ctx context.Context +} + +func (ta *testAppRunner) run(t *testing.T) { + r := appRunner{ + key: "my_app", + bundle: ta.b, + app: ta.b.Config.Resources.Apps["my_app"], + } + + _, err := r.Run(ta.ctx, &Options{}) + require.NoError(t, err) +} + +func setupBundle(t *testing.T) (context.Context, *bundle.Bundle, *mocks.MockWorkspaceClient) { + root := t.TempDir() + err := os.MkdirAll(filepath.Join(root, "my_app"), 0o700) + require.NoError(t, err) + + b := &bundle.Bundle{ + BundleRootPath: root, + SyncRoot: vfs.MustNew(root), + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Users/foo@bar.com/", + }, + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "my_app": { + App: &apps.App{ + Name: "my_app", + }, + SourceCodePath: "./my_app", + Config: map[string]any{ + "command": []string{"echo", "hello"}, + "env": []map[string]string{ + {"name": "MY_APP", "value": "my value"}, + }, + }, + }, + }, + }, + }, + } + + mwc := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(mwc.WorkspaceClient) + bundletest.SetLocation(b, "resources.apps.my_app", []dyn.Location{{File: "./databricks.yml"}}) + + ctx := cmdio.MockDiscard(context.Background()) + + diags := bundle.Apply(ctx, b, bundle.Seq( + mutator.DefineDefaultWorkspacePaths(), + mutator.TranslatePaths(), + )) + require.Empty(t, diags) + + return ctx, b, mwc +} + +func setupTestApp(t *testing.T, initialAppState apps.ApplicationState, initialComputeState apps.ComputeState) *testAppRunner { + ctx, b, mwc := setupBundle(t) + + appApi := mwc.GetMockAppsAPI() + appApi.EXPECT().Get(mock.Anything, apps.GetAppRequest{ + Name: "my_app", + }).Return(&apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: initialAppState, + }, + ComputeStatus: &apps.ComputeStatus{ + State: initialComputeState, + }, + }, nil) + + wait := &apps.WaitGetDeploymentAppSucceeded[apps.AppDeployment]{ + Poll: func(_ time.Duration, _ func(*apps.AppDeployment)) (*apps.AppDeployment, error) { + return nil, nil + }, + } + appApi.EXPECT().Deploy(mock.Anything, apps.CreateAppDeploymentRequest{ + AppName: "my_app", + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: "/Workspace/Users/foo@bar.com/files/my_app", + }, + }).Return(wait, nil) + + return &testAppRunner{ + m: mwc, + b: b, + ctx: ctx, + } +} + +func TestAppRunStartedApp(t *testing.T) { + r := setupTestApp(t, apps.ApplicationStateRunning, apps.ComputeStateActive) + r.run(t) +} + +func TestAppRunStoppedApp(t *testing.T) { + r := setupTestApp(t, apps.ApplicationStateCrashed, apps.ComputeStateStopped) + + appsApi := r.m.GetMockAppsAPI() + appsApi.EXPECT().Start(mock.Anything, apps.StartAppRequest{ + Name: "my_app", + }).Return(&apps.WaitGetAppActive[apps.App]{ + Poll: func(_ time.Duration, _ func(*apps.App)) (*apps.App, error) { + return &apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: apps.ApplicationStateRunning, + }, + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, + }, + }, nil + }, + }, nil) + + r.run(t) +} + +func TestAppRunWithAnActiveDeploymentInProgress(t *testing.T) { + r := setupTestApp(t, apps.ApplicationStateCrashed, apps.ComputeStateStopped) + + appsApi := r.m.GetMockAppsAPI() + appsApi.EXPECT().Start(mock.Anything, apps.StartAppRequest{ + Name: "my_app", + }).Return(&apps.WaitGetAppActive[apps.App]{ + Poll: func(_ time.Duration, _ func(*apps.App)) (*apps.App, error) { + return &apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: apps.ApplicationStateRunning, + }, + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, + }, + ActiveDeployment: &apps.AppDeployment{ + DeploymentId: "active_deployment_id", + Status: &apps.AppDeploymentStatus{ + State: apps.AppDeploymentStateInProgress, + }, + }, + PendingDeployment: &apps.AppDeployment{ + DeploymentId: "pending_deployment_id", + Status: &apps.AppDeploymentStatus{ + State: apps.AppDeploymentStateCancelled, + }, + }, + }, nil + }, + }, nil) + + appsApi.EXPECT().WaitGetDeploymentAppSucceeded(mock.Anything, "my_app", "active_deployment_id", mock.Anything, mock.Anything).Return(nil, nil) + + r.run(t) +} + +func TestAppDeployWithDeploymentInProgress(t *testing.T) { + ctx, b, mwc := setupBundle(t) + + appApi := mwc.GetMockAppsAPI() + appApi.EXPECT().Get(mock.Anything, apps.GetAppRequest{ + Name: "my_app", + }).Return(&apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: apps.ApplicationStateRunning, + }, + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, + }, + }, nil).Once() + + wait := &apps.WaitGetDeploymentAppSucceeded[apps.AppDeployment]{ + Poll: func(_ time.Duration, _ func(*apps.AppDeployment)) (*apps.AppDeployment, error) { + return nil, nil + }, + } + + // First deployment fails + appApi.EXPECT().Deploy(mock.Anything, apps.CreateAppDeploymentRequest{ + AppName: "my_app", + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: "/Workspace/Users/foo@bar.com/files/my_app", + }, + }).Return(nil, errors.New("deployment in progress")).Once() + + // After first deployment fails, we should get the app and wait for the deployment to complete + appApi.EXPECT().Get(mock.Anything, apps.GetAppRequest{ + Name: "my_app", + }).Return(&apps.App{ + Name: "my_app", + ActiveDeployment: &apps.AppDeployment{ + DeploymentId: "active_deployment_id", + Status: &apps.AppDeploymentStatus{ + State: apps.AppDeploymentStateInProgress, + }, + }, + }, nil).Once() + + appApi.EXPECT().WaitGetDeploymentAppSucceeded(mock.Anything, "my_app", "active_deployment_id", mock.Anything, mock.Anything).Return(nil, nil) + + // Second one should succeeed + appApi.EXPECT().Deploy(mock.Anything, apps.CreateAppDeploymentRequest{ + AppName: "my_app", + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: "/Workspace/Users/foo@bar.com/files/my_app", + }, + }).Return(wait, nil).Once() + + r := &testAppRunner{ + m: mwc, + b: b, + ctx: ctx, + } + r.run(t) +} + +func TestStopApp(t *testing.T) { + ctx, b, mwc := setupBundle(t) + appsApi := mwc.GetMockAppsAPI() + appsApi.EXPECT().Stop(mock.Anything, apps.StopAppRequest{ + Name: "my_app", + }).Return(&apps.WaitGetAppStopped[apps.App]{ + Poll: func(_ time.Duration, _ func(*apps.App)) (*apps.App, error) { + return &apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: apps.ApplicationStateUnavailable, + }, + }, nil + }, + }, nil) + + r := appRunner{ + key: "my_app", + bundle: b, + app: b.Config.Resources.Apps["my_app"], + } + + err := r.Cancel(ctx) + require.NoError(t, err) +} diff --git a/bundle/run/job.go b/bundle/run/job.go index b43db9184..2489ca619 100644 --- a/bundle/run/job.go +++ b/bundle/run/job.go @@ -3,6 +3,7 @@ package run import ( "context" "encoding/json" + "errors" "fmt" "strconv" "time" @@ -181,13 +182,13 @@ func (r *jobRunner) Run(ctx context.Context, opts *Options) (output.RunOutput, e // callback to log progress events. Called on every poll request progressLogger, ok := cmdio.FromContext(ctx) if !ok { - return nil, fmt.Errorf("no progress logger found") + return nil, errors.New("no progress logger found") } logProgress := logProgressCallback(ctx, progressLogger) waiter, err := w.Jobs.RunNow(ctx, *req) if err != nil { - return nil, fmt.Errorf("cannot start job") + return nil, errors.New("cannot start job") } if opts.NoWait { @@ -266,7 +267,7 @@ func (r *jobRunner) convertPythonParams(opts *Options) error { if len(opts.Job.pythonParams) > 0 { if _, ok := opts.Job.notebookParams["__python_params"]; ok { - return fmt.Errorf("can't use __python_params as notebook param, the name is reserved for internal use") + return errors.New("can't use __python_params as notebook param, the name is reserved for internal use") } p, err := json.Marshal(opts.Job.pythonParams) if err != nil { diff --git a/bundle/run/job_options.go b/bundle/run/job_options.go index 6a03dff95..7db8e72cd 100644 --- a/bundle/run/job_options.go +++ b/bundle/run/job_options.go @@ -1,7 +1,7 @@ package run import ( - "fmt" + "errors" "strconv" "github.com/databricks/cli/bundle/config/resources" @@ -60,16 +60,16 @@ func (o *JobOptions) hasJobParametersConfigured() bool { // Validate returns if the combination of options is valid. func (o *JobOptions) Validate(job *resources.Job) error { if job == nil { - return fmt.Errorf("job not defined") + return errors.New("job not defined") } // Ensure mutual exclusion on job parameters and task parameters. hasJobParams := len(job.Parameters) > 0 if hasJobParams && o.hasTaskParametersConfigured() { - return fmt.Errorf("the job to run defines job parameters; specifying task parameters is not allowed") + return errors.New("the job to run defines job parameters; specifying task parameters is not allowed") } if !hasJobParams && o.hasJobParametersConfigured() { - return fmt.Errorf("the job to run does not define job parameters; specifying job parameters is not allowed") + return errors.New("the job to run does not define job parameters; specifying job parameters is not allowed") } return nil @@ -80,7 +80,7 @@ func (o *JobOptions) validatePipelineParams() (*jobs.PipelineParams, error) { return nil, nil } - defaultErr := fmt.Errorf("job run argument --pipeline-params only supports `full_refresh=`") + defaultErr := errors.New("job run argument --pipeline-params only supports `full_refresh=`") v, ok := o.pipelineParams["full_refresh"] if !ok { return nil, defaultErr diff --git a/bundle/run/job_test.go b/bundle/run/job_test.go index 5d19ca4ff..daf6cf063 100644 --- a/bundle/run/job_test.go +++ b/bundle/run/job_test.go @@ -1,7 +1,6 @@ package run import ( - "bytes" "context" "testing" "time" @@ -54,7 +53,7 @@ func TestConvertPythonParams(t *testing.T) { err = runner.convertPythonParams(opts) require.NoError(t, err) require.Contains(t, opts.Job.notebookParams, "__python_params") - require.Equal(t, opts.Job.notebookParams["__python_params"], `["param1","param2","param3"]`) + require.Equal(t, `["param1","param2","param3"]`, opts.Job.notebookParams["__python_params"]) } func TestJobRunnerCancel(t *testing.T) { @@ -159,8 +158,8 @@ func TestJobRunnerRestart(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "")) + + ctx := cmdio.MockDiscard(context.Background()) ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) jobApi := m.GetMockJobsAPI() @@ -230,8 +229,8 @@ func TestJobRunnerRestartForContinuousUnpausedJobs(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + + ctx := cmdio.MockDiscard(context.Background()) ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) jobApi := m.GetMockJobsAPI() diff --git a/bundle/run/output/job.go b/bundle/run/output/job.go index 6199ac2f7..2ac974cd5 100644 --- a/bundle/run/output/job.go +++ b/bundle/run/output/job.go @@ -47,7 +47,7 @@ func (out *JobOutput) String() (string, error) { } result.WriteString("=======\n") result.WriteString(fmt.Sprintf("Task %s:\n", v.TaskKey)) - result.WriteString(fmt.Sprintf("%s\n", taskString)) + result.WriteString(taskString + "\n") } return result.String(), nil } diff --git a/bundle/run/output/task.go b/bundle/run/output/task.go index 1ef78a8c3..53b989e88 100644 --- a/bundle/run/output/task.go +++ b/bundle/run/output/task.go @@ -2,7 +2,6 @@ package output import ( "encoding/json" - "fmt" "github.com/databricks/databricks-sdk-go/service/jobs" ) @@ -27,7 +26,7 @@ func structToString(val any) (string, error) { func (out *NotebookOutput) String() (string, error) { if out.Truncated { - return fmt.Sprintf("%s\n[truncated...]\n", out.Result), nil + return out.Result + "\n[truncated...]\n", nil } return out.Result, nil } @@ -42,7 +41,7 @@ func (out *DbtOutput) String() (string, error) { // JSON is used because it's a convenient representation. // If user needs machine parsable output, they can use the --output json // flag - return fmt.Sprintf("Dbt Task Output:\n%s", outputString), nil + return "Dbt Task Output:\n" + outputString, nil } func (out *SqlOutput) String() (string, error) { @@ -55,12 +54,12 @@ func (out *SqlOutput) String() (string, error) { // JSON is used because it's a convenient representation. // If user needs machine parsable output, they can use the --output json // flag - return fmt.Sprintf("SQL Task Output:\n%s", outputString), nil + return "SQL Task Output:\n" + outputString, nil } func (out *LogsOutput) String() (string, error) { if out.LogsTruncated { - return fmt.Sprintf("%s\n[truncated...]\n", out.Logs), nil + return out.Logs + "\n[truncated...]\n", nil } return out.Logs, nil } diff --git a/bundle/run/pipeline.go b/bundle/run/pipeline.go index a0e7d1e1e..bdcf0f142 100644 --- a/bundle/run/pipeline.go +++ b/bundle/run/pipeline.go @@ -2,6 +2,7 @@ package run import ( "context" + "errors" "fmt" "time" @@ -17,7 +18,7 @@ import ( func filterEventsByUpdateId(events []pipelines.PipelineEvent, updateId string) []pipelines.PipelineEvent { result := []pipelines.PipelineEvent{} - for i := 0; i < len(events); i++ { + for i := range events { if events[i].Origin.UpdateId == updateId { result = append(result, events[i]) } @@ -32,8 +33,8 @@ func (r *pipelineRunner) logEvent(ctx context.Context, event pipelines.PipelineE } if event.Error != nil && len(event.Error.Exceptions) > 0 { logString += "trace for most recent exception: \n" - for i := 0; i < len(event.Error.Exceptions); i++ { - logString += fmt.Sprintf("%s\n", event.Error.Exceptions[i].Message) + for i := range len(event.Error.Exceptions) { + logString += event.Error.Exceptions[i].Message + "\n" } } if logString != "" { @@ -90,11 +91,6 @@ func (r *pipelineRunner) Run(ctx context.Context, opts *Options) (output.RunOutp // Include resource key in logger. ctx = log.NewContext(ctx, log.GetLogger(ctx).With("resource", r.Key())) w := r.bundle.WorkspaceClient() - _, err := w.Pipelines.GetByPipelineId(ctx, pipelineID) - if err != nil { - log.Warnf(ctx, "Cannot get pipeline: %s", err) - return nil, err - } req, err := opts.Pipeline.toPayload(r.pipeline, pipelineID) if err != nil { @@ -112,7 +108,7 @@ func (r *pipelineRunner) Run(ctx context.Context, opts *Options) (output.RunOutp updateTracker := progress.NewUpdateTracker(pipelineID, updateID, w) progressLogger, ok := cmdio.FromContext(ctx) if !ok { - return nil, fmt.Errorf("no progress logger found") + return nil, errors.New("no progress logger found") } // Log the pipeline update URL as soon as it is available. @@ -149,7 +145,7 @@ func (r *pipelineRunner) Run(ctx context.Context, opts *Options) (output.RunOutp if state == pipelines.UpdateInfoStateCanceled { log.Infof(ctx, "Update was cancelled!") - return nil, fmt.Errorf("update cancelled") + return nil, errors.New("update cancelled") } if state == pipelines.UpdateInfoStateFailed { log.Infof(ctx, "Update has failed!") @@ -157,7 +153,7 @@ func (r *pipelineRunner) Run(ctx context.Context, opts *Options) (output.RunOutp if err != nil { return nil, err } - return nil, fmt.Errorf("update failed") + return nil, errors.New("update failed") } if state == pipelines.UpdateInfoStateCompleted { log.Infof(ctx, "Update has completed successfully!") diff --git a/bundle/run/pipeline_test.go b/bundle/run/pipeline_test.go index 66f9d86be..56d800d35 100644 --- a/bundle/run/pipeline_test.go +++ b/bundle/run/pipeline_test.go @@ -1,7 +1,6 @@ package run import ( - "bytes" "context" "testing" "time" @@ -75,8 +74,8 @@ func TestPipelineRunnerRestart(t *testing.T) { Host: "https://test.com", } b.SetWorkpaceClient(m.WorkspaceClient) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + + ctx := cmdio.MockDiscard(context.Background()) ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) mockWait := &pipelines.WaitGetPipelineIdle[struct{}]{ @@ -90,8 +89,6 @@ func TestPipelineRunnerRestart(t *testing.T) { PipelineId: "123", }).Return(mockWait, nil) - pipelineApi.EXPECT().GetByPipelineId(mock.Anything, "123").Return(&pipelines.GetPipelineResponse{}, nil) - // Mock runner starting a new update pipelineApi.EXPECT().StartUpdate(mock.Anything, pipelines.StartUpdate{ PipelineId: "123", diff --git a/bundle/run/progress/pipeline.go b/bundle/run/progress/pipeline.go index b82dd7abd..ce92c4cde 100644 --- a/bundle/run/progress/pipeline.go +++ b/bundle/run/progress/pipeline.go @@ -33,7 +33,7 @@ func (event *ProgressEvent) String() string { // construct error string if level=`Error` if event.Level == pipelines.EventLevelError && event.Error != nil { for _, exception := range event.Error.Exceptions { - result.WriteString(fmt.Sprintf("\n%s", exception.Message)) + result.WriteString("\n" + exception.Message) } } return result.String() diff --git a/bundle/run/runner.go b/bundle/run/runner.go index 4c907d068..23c2c0a41 100644 --- a/bundle/run/runner.go +++ b/bundle/run/runner.go @@ -42,7 +42,7 @@ type Runner interface { // IsRunnable returns a filter that only allows runnable resources. func IsRunnable(ref refs.Reference) bool { switch ref.Resource.(type) { - case *resources.Job, *resources.Pipeline: + case *resources.Job, *resources.Pipeline, *resources.App: return true default: return false @@ -56,6 +56,12 @@ func ToRunner(b *bundle.Bundle, ref refs.Reference) (Runner, error) { return &jobRunner{key: key(ref.KeyWithType), bundle: b, job: resource}, nil case *resources.Pipeline: return &pipelineRunner{key: key(ref.KeyWithType), bundle: b, pipeline: resource}, nil + case *resources.App: + return &appRunner{ + key: key(ref.KeyWithType), + bundle: b, + app: resource, + }, nil default: return nil, fmt.Errorf("unsupported resource type: %T", resource) } diff --git a/bundle/schema/embed_test.go b/bundle/schema/embed_test.go index 59f1458cb..03d2165e4 100644 --- a/bundle/schema/embed_test.go +++ b/bundle/schema/embed_test.go @@ -59,8 +59,8 @@ func TestJsonSchema(t *testing.T) { } providers := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.GitProvider") - assert.Contains(t, providers.Enum, "gitHub") - assert.Contains(t, providers.Enum, "bitbucketCloud") - assert.Contains(t, providers.Enum, "gitHubEnterprise") - assert.Contains(t, providers.Enum, "bitbucketServer") + assert.Contains(t, providers.OneOf[0].Enum, "gitHub") + assert.Contains(t, providers.OneOf[0].Enum, "bitbucketCloud") + assert.Contains(t, providers.OneOf[0].Enum, "gitHubEnterprise") + assert.Contains(t, providers.OneOf[0].Enum, "bitbucketServer") } diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 8e8efa7fc..4a3b56814 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -59,6 +59,81 @@ "cli": { "bundle": { "config": { + "resources.App": { + "oneOf": [ + { + "type": "object", + "properties": { + "active_deployment": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeployment" + }, + "app_status": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus" + }, + "compute_status": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus" + }, + "config": { + "$ref": "#/$defs/map/interface" + }, + "create_time": { + "$ref": "#/$defs/string" + }, + "creator": { + "$ref": "#/$defs/string" + }, + "default_source_code_path": { + "$ref": "#/$defs/string" + }, + "description": { + "$ref": "#/$defs/string" + }, + "name": { + "$ref": "#/$defs/string" + }, + "pending_deployment": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeployment" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "resources": { + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/apps.AppResource" + }, + "service_principal_client_id": { + "$ref": "#/$defs/string" + }, + "service_principal_id": { + "$ref": "#/$defs/int64" + }, + "service_principal_name": { + "$ref": "#/$defs/string" + }, + "source_code_path": { + "$ref": "#/$defs/string" + }, + "update_time": { + "$ref": "#/$defs/string" + }, + "updater": { + "$ref": "#/$defs/string" + }, + "url": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "source_code_path", + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "resources.Cluster": { "oneOf": [ { @@ -313,7 +388,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules" }, "job_clusters": { - "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", + "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.\nIf more than 100 job clusters are available, you can paginate through them using :method:jobs/get.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobCluster" }, "max_concurrent_runs": { @@ -351,7 +426,7 @@ "$ref": "#/$defs/map/string" }, "tasks": { - "description": "A list of task specifications to be executed by this job.", + "description": "A list of task specifications to be executed by this job.\nIf more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Task" }, "timeout_seconds": { @@ -1051,6 +1126,7 @@ "$ref": "#/$defs/string" }, "uuid": { + "description": "Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command).", "$ref": "#/$defs/string" } }, @@ -1100,6 +1176,10 @@ "description": "The PyDABs configuration.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.PyDABs" }, + "python": { + "description": "Configures loading of Python code defined with 'databricks-bundles' package.", + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Python" + }, "python_wheel_wrapper": { "description": "Whether to use a Python wheel wrapper", "$ref": "#/$defs/bool" @@ -1234,11 +1314,44 @@ } ] }, + "config.Python": { + "oneOf": [ + { + "type": "object", + "properties": { + "mutators": { + "description": "Mutators contains a list of fully qualified function paths to mutator functions.\n\nExample: [\"my_project.mutators:add_default_cluster\"]", + "$ref": "#/$defs/slice/string" + }, + "resources": { + "description": "Resources contains a list of fully qualified function paths to load resources\ndefined in Python code.\n\nExample: [\"my_project.resources:load_resources\"]", + "$ref": "#/$defs/slice/string" + }, + "venv_path": { + "description": "VEnvPath is path to the virtual environment.\n\nIf enabled, Python code will execute within this environment. If disabled,\nit defaults to using the Python interpreter available in the current shell.", + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "resources", + "mutators" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "config.Resources": { "oneOf": [ { "type": "object", "properties": { + "apps": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.App" + }, "clusters": { "description": "The cluster definitions for the bundle.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Cluster", @@ -1494,6 +1607,391 @@ }, "databricks-sdk-go": { "service": { + "apps.AppDeployment": { + "oneOf": [ + { + "type": "object", + "properties": { + "create_time": { + "$ref": "#/$defs/string" + }, + "creator": { + "$ref": "#/$defs/string" + }, + "deployment_artifacts": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts" + }, + "deployment_id": { + "$ref": "#/$defs/string" + }, + "mode": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentMode" + }, + "source_code_path": { + "$ref": "#/$defs/string" + }, + "status": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus" + }, + "update_time": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppDeploymentArtifacts": { + "oneOf": [ + { + "type": "object", + "properties": { + "source_code_path": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppDeploymentMode": { + "oneOf": [ + { + "type": "string", + "enum": [ + "SNAPSHOT", + "AUTO_SYNC" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppDeploymentState": { + "oneOf": [ + { + "type": "string", + "enum": [ + "SUCCEEDED", + "FAILED", + "IN_PROGRESS", + "CANCELLED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppDeploymentStatus": { + "oneOf": [ + { + "type": "object", + "properties": { + "message": { + "$ref": "#/$defs/string" + }, + "state": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentState" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResource": { + "oneOf": [ + { + "type": "object", + "properties": { + "description": { + "$ref": "#/$defs/string" + }, + "job": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob" + }, + "name": { + "$ref": "#/$defs/string" + }, + "secret": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret" + }, + "serving_endpoint": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint" + }, + "sql_warehouse": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse" + } + }, + "additionalProperties": false, + "required": [ + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceJob": { + "oneOf": [ + { + "type": "object", + "properties": { + "id": { + "$ref": "#/$defs/string" + }, + "permission": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceJobJobPermission" + } + }, + "additionalProperties": false, + "required": [ + "id", + "permission" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceJobJobPermission": { + "oneOf": [ + { + "type": "string", + "enum": [ + "CAN_MANAGE", + "IS_OWNER", + "CAN_MANAGE_RUN", + "CAN_VIEW" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceSecret": { + "oneOf": [ + { + "type": "object", + "properties": { + "key": { + "$ref": "#/$defs/string" + }, + "permission": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecretSecretPermission" + }, + "scope": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "key", + "permission", + "scope" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceSecretSecretPermission": { + "oneOf": [ + { + "type": "string", + "description": "Permission to grant on the secret scope. Supported permissions are: \"READ\", \"WRITE\", \"MANAGE\".", + "enum": [ + "READ", + "WRITE", + "MANAGE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceServingEndpoint": { + "oneOf": [ + { + "type": "object", + "properties": { + "name": { + "$ref": "#/$defs/string" + }, + "permission": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpointServingEndpointPermission" + } + }, + "additionalProperties": false, + "required": [ + "name", + "permission" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceServingEndpointServingEndpointPermission": { + "oneOf": [ + { + "type": "string", + "enum": [ + "CAN_MANAGE", + "CAN_QUERY", + "CAN_VIEW" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceSqlWarehouse": { + "oneOf": [ + { + "type": "object", + "properties": { + "id": { + "$ref": "#/$defs/string" + }, + "permission": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouseSqlWarehousePermission" + } + }, + "additionalProperties": false, + "required": [ + "id", + "permission" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceSqlWarehouseSqlWarehousePermission": { + "oneOf": [ + { + "type": "string", + "enum": [ + "CAN_MANAGE", + "CAN_USE", + "IS_OWNER" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.ApplicationState": { + "oneOf": [ + { + "type": "string", + "enum": [ + "DEPLOYING", + "RUNNING", + "CRASHED", + "UNAVAILABLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.ApplicationStatus": { + "oneOf": [ + { + "type": "object", + "properties": { + "message": { + "$ref": "#/$defs/string" + }, + "state": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.ApplicationState" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.ComputeState": { + "oneOf": [ + { + "type": "string", + "enum": [ + "ERROR", + "DELETING", + "STARTING", + "STOPPING", + "UPDATING", + "STOPPED", + "ACTIVE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.ComputeStatus": { + "oneOf": [ + { + "type": "object", + "properties": { + "message": { + "$ref": "#/$defs/string" + }, + "state": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.ComputeState" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "catalog.MonitorCronSchedule": { "oneOf": [ { @@ -1525,11 +2023,19 @@ ] }, "catalog.MonitorCronSchedulePauseStatus": { - "type": "string", - "description": "Read only field that indicates whether a schedule is paused or not.", - "enum": [ - "UNPAUSED", - "PAUSED" + "oneOf": [ + { + "type": "string", + "description": "Read only field that indicates whether a schedule is paused or not.", + "enum": [ + "UNPAUSED", + "PAUSED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "catalog.MonitorDataClassificationConfig": { @@ -1618,11 +2124,19 @@ ] }, "catalog.MonitorInferenceLogProblemType": { - "type": "string", - "description": "Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed.", - "enum": [ - "PROBLEM_TYPE_CLASSIFICATION", - "PROBLEM_TYPE_REGRESSION" + "oneOf": [ + { + "type": "string", + "description": "Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed.", + "enum": [ + "PROBLEM_TYPE_CLASSIFICATION", + "PROBLEM_TYPE_REGRESSION" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "catalog.MonitorMetric": { @@ -1667,12 +2181,20 @@ ] }, "catalog.MonitorMetricType": { - "type": "string", - "description": "Can only be one of ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"``, ``\"CUSTOM_METRIC_TYPE_DERIVED\"``, or ``\"CUSTOM_METRIC_TYPE_DRIFT\"``.\nThe ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"`` and ``\"CUSTOM_METRIC_TYPE_DERIVED\"`` metrics\nare computed on a single table, whereas the ``\"CUSTOM_METRIC_TYPE_DRIFT\"`` compare metrics across\nbaseline and input table, or across the two consecutive time windows.\n- CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table\n- CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics\n- CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics\n", - "enum": [ - "CUSTOM_METRIC_TYPE_AGGREGATE", - "CUSTOM_METRIC_TYPE_DERIVED", - "CUSTOM_METRIC_TYPE_DRIFT" + "oneOf": [ + { + "type": "string", + "description": "Can only be one of ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"``, ``\"CUSTOM_METRIC_TYPE_DERIVED\"``, or ``\"CUSTOM_METRIC_TYPE_DRIFT\"``.\nThe ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"`` and ``\"CUSTOM_METRIC_TYPE_DERIVED\"`` metrics\nare computed on a single table, whereas the ``\"CUSTOM_METRIC_TYPE_DRIFT\"`` compare metrics across\nbaseline and input table, or across the two consecutive time windows.\n- CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table\n- CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics\n- CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics\n", + "enum": [ + "CUSTOM_METRIC_TYPE_AGGREGATE", + "CUSTOM_METRIC_TYPE_DERIVED", + "CUSTOM_METRIC_TYPE_DRIFT" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "catalog.MonitorNotifications": { @@ -1736,10 +2258,18 @@ ] }, "catalog.VolumeType": { - "type": "string", - "enum": [ - "EXTERNAL", - "MANAGED" + "oneOf": [ + { + "type": "string", + "enum": [ + "EXTERNAL", + "MANAGED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.Adlsgen2Info": { @@ -1838,12 +2368,20 @@ ] }, "compute.AwsAvailability": { - "type": "string", - "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.\n", - "enum": [ - "SPOT", - "ON_DEMAND", - "SPOT_WITH_FALLBACK" + "oneOf": [ + { + "type": "string", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.\n", + "enum": [ + "SPOT", + "ON_DEMAND", + "SPOT_WITH_FALLBACK" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.AzureAttributes": { @@ -1876,12 +2414,20 @@ ] }, "compute.AzureAvailability": { - "type": "string", - "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero (which only happens on pool clusters), this availability\ntype will be used for the entire cluster.", - "enum": [ - "SPOT_AZURE", - "ON_DEMAND_AZURE", - "SPOT_WITH_FALLBACK_AZURE" + "oneOf": [ + { + "type": "string", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero (which only happens on pool clusters), this availability\ntype will be used for the entire cluster.", + "enum": [ + "SPOT_AZURE", + "ON_DEMAND_AZURE", + "SPOT_WITH_FALLBACK_AZURE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.ClientsTypes": { @@ -2058,19 +2604,27 @@ ] }, "compute.DataSecurityMode": { - "type": "string", - "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used with `kind`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.\n", - "enum": [ - "DATA_SECURITY_MODE_AUTO", - "DATA_SECURITY_MODE_STANDARD", - "DATA_SECURITY_MODE_DEDICATED", - "NONE", - "SINGLE_USER", - "USER_ISOLATION", - "LEGACY_TABLE_ACL", - "LEGACY_PASSTHROUGH", - "LEGACY_SINGLE_USER", - "LEGACY_SINGLE_USER_STANDARD" + "oneOf": [ + { + "type": "string", + "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used with `kind`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.\n", + "enum": [ + "DATA_SECURITY_MODE_AUTO", + "DATA_SECURITY_MODE_STANDARD", + "DATA_SECURITY_MODE_DEDICATED", + "NONE", + "SINGLE_USER", + "USER_ISOLATION", + "LEGACY_TABLE_ACL", + "LEGACY_PASSTHROUGH", + "LEGACY_SINGLE_USER", + "LEGACY_SINGLE_USER_STANDARD" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.DbfsStorageInfo": { @@ -2138,11 +2692,19 @@ ] }, "compute.EbsVolumeType": { - "type": "string", - "description": "The type of EBS volumes that will be launched with this cluster.", - "enum": [ - "GENERAL_PURPOSE_SSD", - "THROUGHPUT_OPTIMIZED_HDD" + "oneOf": [ + { + "type": "string", + "description": "The type of EBS volumes that will be launched with this cluster.", + "enum": [ + "GENERAL_PURPOSE_SSD", + "THROUGHPUT_OPTIMIZED_HDD" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.Environment": { @@ -2209,12 +2771,20 @@ ] }, "compute.GcpAvailability": { - "type": "string", - "description": "This field determines whether the instance pool will contain preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", - "enum": [ - "PREEMPTIBLE_GCP", - "ON_DEMAND_GCP", - "PREEMPTIBLE_WITH_FALLBACK_GCP" + "oneOf": [ + { + "type": "string", + "description": "This field determines whether the instance pool will contain preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", + "enum": [ + "PREEMPTIBLE_GCP", + "ON_DEMAND_GCP", + "PREEMPTIBLE_WITH_FALLBACK_GCP" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.GcsStorageInfo": { @@ -2448,12 +3018,20 @@ ] }, "compute.RuntimeEngine": { - "type": "string", - "description": "Determines the cluster's runtime engine, either standard or Photon.\n\nThis field is not compatible with legacy `spark_version` values that contain `-photon-`.\nRemove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.\n\nIf left unspecified, the runtime engine defaults to standard unless the spark_version\ncontains -photon-, in which case Photon will be used.\n", - "enum": [ - "NULL", - "STANDARD", - "PHOTON" + "oneOf": [ + { + "type": "string", + "description": "Determines the cluster's runtime engine, either standard or Photon.\n\nThis field is not compatible with legacy `spark_version` values that contain `-photon-`.\nRemove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.\n\nIf left unspecified, the runtime engine defaults to standard unless the spark_version\ncontains -photon-, in which case Photon will be used.\n", + "enum": [ + "NULL", + "STANDARD", + "PHOTON" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.S3StorageInfo": { @@ -2565,10 +3143,18 @@ ] }, "dashboards.LifecycleState": { - "type": "string", - "enum": [ - "ACTIVE", - "TRASHED" + "oneOf": [ + { + "type": "string", + "enum": [ + "ACTIVE", + "TRASHED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.CleanRoomsNotebookTask": { @@ -2606,10 +3192,18 @@ ] }, "jobs.Condition": { - "type": "string", - "enum": [ - "ANY_UPDATED", - "ALL_UPDATED" + "oneOf": [ + { + "type": "string", + "enum": [ + "ANY_UPDATED", + "ALL_UPDATED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.ConditionTask": { @@ -2644,15 +3238,23 @@ ] }, "jobs.ConditionTaskOp": { - "type": "string", - "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", - "enum": [ - "EQUAL_TO", - "GREATER_THAN", - "GREATER_THAN_OR_EQUAL", - "LESS_THAN", - "LESS_THAN_OR_EQUAL", - "NOT_EQUAL" + "oneOf": [ + { + "type": "string", + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", + "enum": [ + "EQUAL_TO", + "GREATER_THAN", + "GREATER_THAN_OR_EQUAL", + "LESS_THAN", + "LESS_THAN_OR_EQUAL", + "NOT_EQUAL" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.Continuous": { @@ -2808,23 +3410,39 @@ ] }, "jobs.Format": { - "type": "string", - "enum": [ - "SINGLE_TASK", - "MULTI_TASK" + "oneOf": [ + { + "type": "string", + "enum": [ + "SINGLE_TASK", + "MULTI_TASK" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.GitProvider": { - "type": "string", - "enum": [ - "gitHub", - "bitbucketCloud", - "azureDevOpsServices", - "gitHubEnterprise", - "bitbucketServer", - "gitLab", - "gitLabEnterpriseEdition", - "awsCodeCommit" + "oneOf": [ + { + "type": "string", + "enum": [ + "gitHub", + "bitbucketCloud", + "azureDevOpsServices", + "gitHubEnterprise", + "bitbucketServer", + "gitLab", + "gitLabEnterpriseEdition", + "awsCodeCommit" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.GitSnapshot": { @@ -2937,18 +3555,34 @@ ] }, "jobs.JobDeploymentKind": { - "type": "string", - "description": "* `BUNDLE`: The job is managed by Databricks Asset Bundle.", - "enum": [ - "BUNDLE" + "oneOf": [ + { + "type": "string", + "description": "* `BUNDLE`: The job is managed by Databricks Asset Bundle.", + "enum": [ + "BUNDLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobEditMode": { - "type": "string", - "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", - "enum": [ - "UI_LOCKED", - "EDITABLE" + "oneOf": [ + { + "type": "string", + "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", + "enum": [ + "UI_LOCKED", + "EDITABLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobEmailNotifications": { @@ -3116,29 +3750,53 @@ ] }, "jobs.JobSourceDirtyState": { - "type": "string", - "description": "Dirty state indicates the job is not fully synced with the job specification\nin the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", - "enum": [ - "NOT_SYNCED", - "DISCONNECTED" + "oneOf": [ + { + "type": "string", + "description": "Dirty state indicates the job is not fully synced with the job specification\nin the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", + "enum": [ + "NOT_SYNCED", + "DISCONNECTED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobsHealthMetric": { - "type": "string", - "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview.", - "enum": [ - "RUN_DURATION_SECONDS", - "STREAMING_BACKLOG_BYTES", - "STREAMING_BACKLOG_RECORDS", - "STREAMING_BACKLOG_SECONDS", - "STREAMING_BACKLOG_FILES" + "oneOf": [ + { + "type": "string", + "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview.", + "enum": [ + "RUN_DURATION_SECONDS", + "STREAMING_BACKLOG_BYTES", + "STREAMING_BACKLOG_RECORDS", + "STREAMING_BACKLOG_SECONDS", + "STREAMING_BACKLOG_FILES" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobsHealthOperator": { - "type": "string", - "description": "Specifies the operator used to compare the health metric value with the specified threshold.", - "enum": [ - "GREATER_THAN" + "oneOf": [ + { + "type": "string", + "description": "Specifies the operator used to compare the health metric value with the specified threshold.", + "enum": [ + "GREATER_THAN" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobsHealthRule": { @@ -3222,10 +3880,18 @@ ] }, "jobs.PauseStatus": { - "type": "string", - "enum": [ - "UNPAUSED", - "PAUSED" + "oneOf": [ + { + "type": "string", + "enum": [ + "UNPAUSED", + "PAUSED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.PeriodicTriggerConfiguration": { @@ -3255,11 +3921,19 @@ ] }, "jobs.PeriodicTriggerConfigurationTimeUnit": { - "type": "string", - "enum": [ - "HOURS", - "DAYS", - "WEEKS" + "oneOf": [ + { + "type": "string", + "enum": [ + "HOURS", + "DAYS", + "WEEKS" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.PipelineParams": { @@ -3361,15 +4035,23 @@ ] }, "jobs.RunIf": { - "type": "string", - "description": "An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\nPossible values are:\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", - "enum": [ - "ALL_SUCCESS", - "ALL_DONE", - "NONE_FAILED", - "AT_LEAST_ONE_SUCCESS", - "ALL_FAILED", - "AT_LEAST_ONE_FAILED" + "oneOf": [ + { + "type": "string", + "description": "An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\nPossible values are:\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", + "enum": [ + "ALL_SUCCESS", + "ALL_DONE", + "NONE_FAILED", + "AT_LEAST_ONE_SUCCESS", + "ALL_FAILED", + "AT_LEAST_ONE_FAILED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.RunJobTask": { @@ -3429,11 +4111,19 @@ ] }, "jobs.Source": { - "type": "string", - "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\\\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", - "enum": [ - "WORKSPACE", - "GIT" + "oneOf": [ + { + "type": "string", + "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\\\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", + "enum": [ + "WORKSPACE", + "GIT" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.SparkJarTask": { @@ -4133,12 +4823,20 @@ ] }, "ml.ModelVersionStatus": { - "type": "string", - "description": "Current status of `model_version`", - "enum": [ - "PENDING_REGISTRATION", - "FAILED_REGISTRATION", - "READY" + "oneOf": [ + { + "type": "string", + "description": "Current status of `model_version`", + "enum": [ + "PENDING_REGISTRATION", + "FAILED_REGISTRATION", + "READY" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "ml.ModelVersionTag": { @@ -4183,11 +4881,40 @@ } ] }, + "pipelines.DayOfWeek": { + "oneOf": [ + { + "type": "string", + "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", + "enum": [ + "MONDAY", + "TUESDAY", + "WEDNESDAY", + "THURSDAY", + "FRIDAY", + "SATURDAY", + "SUNDAY" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "pipelines.DeploymentKind": { - "type": "string", - "description": "The deployment method that manages the pipeline:\n- BUNDLE: The pipeline is managed by a Databricks Asset Bundle.\n", - "enum": [ - "BUNDLE" + "oneOf": [ + { + "type": "string", + "description": "The deployment method that manages the pipeline:\n- BUNDLE: The pipeline is managed by a Databricks Asset Bundle.\n", + "enum": [ + "BUNDLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "pipelines.FileLibrary": { @@ -4493,11 +5220,19 @@ ] }, "pipelines.PipelineClusterAutoscaleMode": { - "type": "string", - "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n", - "enum": [ - "ENHANCED", - "LEGACY" + "oneOf": [ + { + "type": "string", + "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n", + "enum": [ + "ENHANCED", + "LEGACY" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "pipelines.PipelineDeployment": { @@ -4617,7 +5352,7 @@ "properties": { "days_of_week": { "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", - "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek" + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek" }, "start_hour": { "description": "An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day.\nContinuous pipeline restart is triggered only within a five-hour window starting at this hour.", @@ -4639,19 +5374,6 @@ } ] }, - "pipelines.RestartWindowDaysOfWeek": { - "type": "string", - "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", - "enum": [ - "MONDAY", - "TUESDAY", - "WEDNESDAY", - "THURSDAY", - "FRIDAY", - "SATURDAY", - "SUNDAY" - ] - }, "pipelines.SchemaSpec": { "oneOf": [ { @@ -4759,11 +5481,19 @@ ] }, "pipelines.TableSpecificConfigScdType": { - "type": "string", - "description": "The SCD type to use to ingest the table.", - "enum": [ - "SCD_TYPE_1", - "SCD_TYPE_2" + "oneOf": [ + { + "type": "string", + "description": "The SCD type to use to ingest the table.", + "enum": [ + "SCD_TYPE_1", + "SCD_TYPE_2" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.Ai21LabsConfig": { @@ -4772,9 +5502,11 @@ "type": "object", "properties": { "ai21labs_api_key": { + "description": "The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`.", "$ref": "#/$defs/string" }, "ai21labs_api_key_plaintext": { + "description": "An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`.", "$ref": "#/$defs/string" } }, @@ -4868,11 +5600,19 @@ ] }, "serving.AiGatewayGuardrailPiiBehaviorBehavior": { - "type": "string", - "description": "Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned.", - "enum": [ - "NONE", - "BLOCK" + "oneOf": [ + { + "type": "string", + "description": "Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned.", + "enum": [ + "NONE", + "BLOCK" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AiGatewayGuardrails": { @@ -4958,18 +5698,34 @@ ] }, "serving.AiGatewayRateLimitKey": { - "type": "string", - "description": "Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", - "enum": [ - "user", - "endpoint" + "oneOf": [ + { + "type": "string", + "description": "Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", + "enum": [ + "user", + "endpoint" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AiGatewayRateLimitRenewalPeriod": { - "type": "string", - "description": "Renewal period field for a rate limit. Currently, only 'minute' is supported.", - "enum": [ - "minute" + "oneOf": [ + { + "type": "string", + "description": "Renewal period field for a rate limit. Currently, only 'minute' is supported.", + "enum": [ + "minute" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AiGatewayUsageTrackingConfig": { @@ -5033,13 +5789,21 @@ ] }, "serving.AmazonBedrockConfigBedrockProvider": { - "type": "string", - "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.", - "enum": [ - "anthropic", - "cohere", - "ai21labs", - "amazon" + "oneOf": [ + { + "type": "string", + "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.", + "enum": [ + "anthropic", + "cohere", + "ai21labs", + "amazon" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AnthropicConfig": { @@ -5268,17 +6032,25 @@ ] }, "serving.ExternalModelProvider": { - "type": "string", - "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n", - "enum": [ - "ai21labs", - "anthropic", - "amazon-bedrock", - "cohere", - "databricks-model-serving", - "google-cloud-vertex-ai", - "openai", - "palm" + "oneOf": [ + { + "type": "string", + "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n", + "enum": [ + "ai21labs", + "anthropic", + "amazon-bedrock", + "cohere", + "databricks-model-serving", + "google-cloud-vertex-ai", + "openai", + "palm" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.GoogleCloudVertexAiConfig": { @@ -5287,15 +6059,19 @@ "type": "object", "properties": { "private_key": { + "description": "The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`", "$ref": "#/$defs/string" }, "private_key_plaintext": { + "description": "The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`.", "$ref": "#/$defs/string" }, "project_id": { + "description": "This is the Google Cloud project id that the service account is associated with.", "$ref": "#/$defs/string" }, "region": { + "description": "This is the region for the Google Cloud Vertex AI Service. See [supported regions](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. Some models are only available in specific regions.", "$ref": "#/$defs/string" } }, @@ -5313,36 +6089,47 @@ "type": "object", "properties": { "microsoft_entra_client_id": { + "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.\n", "$ref": "#/$defs/string" }, "microsoft_entra_client_secret": { + "description": "The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.\nIf you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n", "$ref": "#/$defs/string" }, "microsoft_entra_client_secret_plaintext": { + "description": "The client secret used for Microsoft Entra ID authentication provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n", "$ref": "#/$defs/string" }, "microsoft_entra_tenant_id": { + "description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.\n", "$ref": "#/$defs/string" }, "openai_api_base": { + "description": "This is a field to provide a customized base URl for the OpenAI API.\nFor Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service\nprovided by Azure.\nFor other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used.\n", "$ref": "#/$defs/string" }, "openai_api_key": { + "description": "The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`.", "$ref": "#/$defs/string" }, "openai_api_key_plaintext": { + "description": "The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`.", "$ref": "#/$defs/string" }, "openai_api_type": { + "description": "This is an optional field to specify the type of OpenAI API to use.\nFor Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security\naccess validation protocol. For access token validation, use azure. For authentication using Azure Active\nDirectory (Azure AD) use, azuread.\n", "$ref": "#/$defs/string" }, "openai_api_version": { + "description": "This is an optional field to specify the OpenAI API version.\nFor Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to\nutilize, specified by a date.\n", "$ref": "#/$defs/string" }, "openai_deployment_name": { + "description": "This field is only required for Azure OpenAI and is the name of the deployment resource for the\nAzure OpenAI service.\n", "$ref": "#/$defs/string" }, "openai_organization": { + "description": "This is an optional field to specify the organization in OpenAI or Azure OpenAI.\n", "$ref": "#/$defs/string" } }, @@ -5360,9 +6147,11 @@ "type": "object", "properties": { "palm_api_key": { + "description": "The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`.", "$ref": "#/$defs/string" }, "palm_api_key_plaintext": { + "description": "The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`.", "$ref": "#/$defs/string" } }, @@ -5405,18 +6194,34 @@ ] }, "serving.RateLimitKey": { - "type": "string", - "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", - "enum": [ - "user", - "endpoint" + "oneOf": [ + { + "type": "string", + "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", + "enum": [ + "user", + "endpoint" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.RateLimitRenewalPeriod": { - "type": "string", - "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.", - "enum": [ - "minute" + "oneOf": [ + { + "type": "string", + "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.", + "enum": [ + "minute" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.Route": { @@ -5563,23 +6368,39 @@ ] }, "serving.ServedModelInputWorkloadSize": { - "type": "string", - "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n", - "enum": [ - "Small", - "Medium", - "Large" + "oneOf": [ + { + "type": "string", + "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n", + "enum": [ + "Small", + "Medium", + "Large" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.ServedModelInputWorkloadType": { - "type": "string", - "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", - "enum": [ - "CPU", - "GPU_SMALL", - "GPU_MEDIUM", - "GPU_LARGE", - "MULTIGPU_MEDIUM" + "oneOf": [ + { + "type": "string", + "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", + "enum": [ + "CPU", + "GPU_SMALL", + "GPU_MEDIUM", + "GPU_LARGE", + "MULTIGPU_MEDIUM" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.TrafficConfig": { @@ -5665,6 +6486,20 @@ "cli": { "bundle": { "config": { + "resources.App": { + "oneOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.App" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "resources.Cluster": { "oneOf": [ { @@ -5894,6 +6729,20 @@ } } }, + "interface": { + "oneOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/interface" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "string": { "oneOf": [ { @@ -5962,6 +6811,20 @@ }, "databricks-sdk-go": { "service": { + "apps.AppResource": { + "oneOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResource" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "catalog.MonitorMetric": { "oneOf": [ { @@ -6172,6 +7035,20 @@ } ] }, + "pipelines.DayOfWeek": { + "oneOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "pipelines.IngestionConfig": { "oneOf": [ { @@ -6228,20 +7105,6 @@ } ] }, - "pipelines.RestartWindowDaysOfWeek": { - "oneOf": [ - { - "type": "array", - "items": { - "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek" - } - }, - { - "type": "string", - "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" - } - ] - }, "serving.AiGatewayRateLimit": { "oneOf": [ { @@ -6406,5 +7269,5 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" } }, - "additionalProperties": false + "additionalProperties": {} } \ No newline at end of file diff --git a/bundle/tests/apps/databricks.yml b/bundle/tests/apps/databricks.yml new file mode 100644 index 000000000..ad7e93006 --- /dev/null +++ b/bundle/tests/apps/databricks.yml @@ -0,0 +1,71 @@ +bundle: + name: apps + +workspace: + host: https://acme.cloud.databricks.com/ + +variables: + app_config: + type: complex + default: + command: + - "python" + - "app.py" + env: + - name: SOME_ENV_VARIABLE + value: "Some value" + +resources: + apps: + my_app: + name: "my-app" + description: "My App" + source_code_path: ./app + config: ${var.app_config} + + resources: + - name: "my-sql-warehouse" + sql_warehouse: + id: 1234 + permission: "CAN_USE" + - name: "my-job" + job: + id: 5678 + permission: "CAN_MANAGE_RUN" + permissions: + - user_name: "foo@bar.com" + level: "CAN_VIEW" + - service_principal_name: "my_sp" + level: "CAN_MANAGE" + + +targets: + default: + + development: + variables: + app_config: + command: + - "python" + - "dev.py" + env: + - name: SOME_ENV_VARIABLE_2 + value: "Some value 2" + resources: + apps: + my_app: + source_code_path: ./app-dev + resources: + - name: "my-sql-warehouse" + sql_warehouse: + id: 1234 + permission: "CAN_MANAGE" + - name: "my-job" + job: + id: 5678 + permission: "CAN_MANAGE" + - name: "my-secret" + secret: + key: "key" + scope: "scope" + permission: "CAN_USE" diff --git a/bundle/tests/apps_test.go b/bundle/tests/apps_test.go new file mode 100644 index 000000000..7fee60d14 --- /dev/null +++ b/bundle/tests/apps_test.go @@ -0,0 +1,60 @@ +package config_tests + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/stretchr/testify/assert" +) + +func TestApps(t *testing.T) { + b := load(t, "./apps") + assert.Equal(t, "apps", b.Config.Bundle.Name) + + diags := bundle.Apply(context.Background(), b, + bundle.Seq( + mutator.SetVariables(), + mutator.ResolveVariableReferences("variables"), + )) + assert.Empty(t, diags) + + app := b.Config.Resources.Apps["my_app"] + assert.Equal(t, "my-app", app.Name) + assert.Equal(t, "My App", app.Description) + assert.Equal(t, []any{"python", "app.py"}, app.Config["command"]) + assert.Equal(t, []any{map[string]any{"name": "SOME_ENV_VARIABLE", "value": "Some value"}}, app.Config["env"]) + + assert.Len(t, app.Resources, 2) + assert.Equal(t, "1234", app.Resources[0].SqlWarehouse.Id) + assert.Equal(t, "CAN_USE", string(app.Resources[0].SqlWarehouse.Permission)) + assert.Equal(t, "5678", app.Resources[1].Job.Id) + assert.Equal(t, "CAN_MANAGE_RUN", string(app.Resources[1].Job.Permission)) +} + +func TestAppsOverride(t *testing.T) { + b := loadTarget(t, "./apps", "development") + assert.Equal(t, "apps", b.Config.Bundle.Name) + + diags := bundle.Apply(context.Background(), b, + bundle.Seq( + mutator.SetVariables(), + mutator.ResolveVariableReferences("variables"), + )) + assert.Empty(t, diags) + app := b.Config.Resources.Apps["my_app"] + assert.Equal(t, "my-app", app.Name) + assert.Equal(t, "My App", app.Description) + assert.Equal(t, []any{"python", "dev.py"}, app.Config["command"]) + assert.Equal(t, []any{map[string]any{"name": "SOME_ENV_VARIABLE_2", "value": "Some value 2"}}, app.Config["env"]) + + assert.Len(t, app.Resources, 3) + assert.Equal(t, "1234", app.Resources[0].SqlWarehouse.Id) + assert.Equal(t, "CAN_MANAGE", string(app.Resources[0].SqlWarehouse.Permission)) + assert.Equal(t, "5678", app.Resources[1].Job.Id) + assert.Equal(t, "CAN_MANAGE", string(app.Resources[1].Job.Permission)) + assert.Equal(t, "key", app.Resources[2].Secret.Key) + assert.Equal(t, "scope", app.Resources[2].Secret.Scope) + assert.Equal(t, "CAN_USE", string(app.Resources[2].Secret.Permission)) +} diff --git a/bundle/tests/clusters_test.go b/bundle/tests/clusters_test.go deleted file mode 100644 index def8a2a31..000000000 --- a/bundle/tests/clusters_test.go +++ /dev/null @@ -1,36 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestClusters(t *testing.T) { - b := load(t, "./clusters") - assert.Equal(t, "clusters", b.Config.Bundle.Name) - - cluster := b.Config.Resources.Clusters["foo"] - assert.Equal(t, "foo", cluster.ClusterName) - assert.Equal(t, "13.3.x-scala2.12", cluster.SparkVersion) - assert.Equal(t, "i3.xlarge", cluster.NodeTypeId) - assert.Equal(t, 2, cluster.NumWorkers) - assert.Equal(t, "2g", cluster.SparkConf["spark.executor.memory"]) - assert.Equal(t, 2, cluster.Autoscale.MinWorkers) - assert.Equal(t, 7, cluster.Autoscale.MaxWorkers) -} - -func TestClustersOverride(t *testing.T) { - b := loadTarget(t, "./clusters", "development") - assert.Equal(t, "clusters", b.Config.Bundle.Name) - - cluster := b.Config.Resources.Clusters["foo"] - assert.Equal(t, "foo-override", cluster.ClusterName) - assert.Equal(t, "15.2.x-scala2.12", cluster.SparkVersion) - assert.Equal(t, "m5.xlarge", cluster.NodeTypeId) - assert.Equal(t, 3, cluster.NumWorkers) - assert.Equal(t, "4g", cluster.SparkConf["spark.executor.memory"]) - assert.Equal(t, "4g", cluster.SparkConf["spark.executor.memory2"]) - assert.Equal(t, 1, cluster.Autoscale.MinWorkers) - assert.Equal(t, 3, cluster.Autoscale.MaxWorkers) -} diff --git a/bundle/tests/complex_variables_test.go b/bundle/tests/complex_variables_test.go deleted file mode 100644 index e68823c33..000000000 --- a/bundle/tests/complex_variables_test.go +++ /dev/null @@ -1,108 +0,0 @@ -package config_tests - -import ( - "context" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/stretchr/testify/require" -) - -func TestComplexVariables(t *testing.T) { - b, diags := loadTargetWithDiags("variables/complex", "default") - require.Empty(t, diags) - - diags = bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SetVariables(), - mutator.ResolveVariableReferencesInComplexVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - require.NoError(t, diags.Error()) - - require.Equal(t, "13.2.x-scala2.11", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkVersion) - require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NodeTypeId) - require.Equal(t, "some-policy-id", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.PolicyId) - require.Equal(t, 2, b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NumWorkers) - require.Equal(t, "true", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.speculation"]) - require.Equal(t, "true", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.random"]) - - require.Equal(t, 3, len(b.Config.Resources.Jobs["my_job"].Tasks[0].Libraries)) - require.Contains(t, b.Config.Resources.Jobs["my_job"].Tasks[0].Libraries, compute.Library{ - Jar: "/path/to/jar", - }) - require.Contains(t, b.Config.Resources.Jobs["my_job"].Tasks[0].Libraries, compute.Library{ - Egg: "/path/to/egg", - }) - require.Contains(t, b.Config.Resources.Jobs["my_job"].Tasks[0].Libraries, compute.Library{ - Whl: "/path/to/whl", - }) - - require.Equal(t, "task with spark version 13.2.x-scala2.11 and jar /path/to/jar", b.Config.Resources.Jobs["my_job"].Tasks[0].TaskKey) -} - -func TestComplexVariablesOverride(t *testing.T) { - b, diags := loadTargetWithDiags("variables/complex", "dev") - require.Empty(t, diags) - - diags = bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SetVariables(), - mutator.ResolveVariableReferencesInComplexVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - require.NoError(t, diags.Error()) - - require.Equal(t, "14.2.x-scala2.11", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkVersion) - require.Equal(t, "Standard_DS3_v3", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NodeTypeId) - require.Equal(t, 4, b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NumWorkers) - require.Equal(t, "false", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.speculation"]) - - // Making sure the variable is overriden and not merged / extended - // These properties are set in the default target but not set in override target - // So they should be empty - require.Equal(t, "", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.random"]) - require.Equal(t, "", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.PolicyId) -} - -func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) { - b, diags := loadTargetWithDiags("variables/complex_multiple_files", "dev") - require.Empty(t, diags) - - diags = bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SetVariables(), - mutator.ResolveVariableReferencesInComplexVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - require.NoError(t, diags.Error()) - for _, cluster := range b.Config.Resources.Jobs["my_job"].JobClusters { - require.Equalf(t, "14.2.x-scala2.11", cluster.NewCluster.SparkVersion, "cluster: %v", cluster.JobClusterKey) - require.Equalf(t, "Standard_DS3_v2", cluster.NewCluster.NodeTypeId, "cluster: %v", cluster.JobClusterKey) - require.Equalf(t, 4, cluster.NewCluster.NumWorkers, "cluster: %v", cluster.JobClusterKey) - require.Equalf(t, "false", cluster.NewCluster.SparkConf["spark.speculation"], "cluster: %v", cluster.JobClusterKey) - } -} - -func TestComplexVariablesOverrideWithFullSyntax(t *testing.T) { - b, diags := loadTargetWithDiags("variables/complex", "dev") - require.Empty(t, diags) - - diags = bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SetVariables(), - mutator.ResolveVariableReferencesInComplexVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - require.NoError(t, diags.Error()) - require.Empty(t, diags) - - complexvar := b.Config.Variables["complexvar"].Value - require.Equal(t, map[string]any{"key1": "1", "key2": "2", "key3": "3"}, complexvar) -} diff --git a/bundle/tests/environment_git_test.go b/bundle/tests/environment_git_test.go index d4695c78d..848b972b1 100644 --- a/bundle/tests/environment_git_test.go +++ b/bundle/tests/environment_git_test.go @@ -2,7 +2,6 @@ package config_tests import ( "context" - "fmt" "strings" "testing" @@ -16,7 +15,7 @@ func TestGitAutoLoadWithEnvironment(t *testing.T) { bundle.Apply(context.Background(), b, mutator.LoadGitDetails()) assert.True(t, b.Config.Bundle.Git.Inferred) validUrl := strings.Contains(b.Config.Bundle.Git.OriginURL, "/cli") || strings.Contains(b.Config.Bundle.Git.OriginURL, "/bricks") - assert.True(t, validUrl, fmt.Sprintf("Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL)) + assert.True(t, validUrl, "Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL) } func TestGitManuallySetBranchWithEnvironment(t *testing.T) { @@ -25,5 +24,5 @@ func TestGitManuallySetBranchWithEnvironment(t *testing.T) { assert.False(t, b.Config.Bundle.Git.Inferred) assert.Equal(t, "main", b.Config.Bundle.Git.Branch) validUrl := strings.Contains(b.Config.Bundle.Git.OriginURL, "/cli") || strings.Contains(b.Config.Bundle.Git.OriginURL, "/bricks") - assert.True(t, validUrl, fmt.Sprintf("Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL)) + assert.True(t, validUrl, "Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL) } diff --git a/bundle/tests/environment_overrides_test.go b/bundle/tests/environment_overrides_test.go index 4a1115048..b68b083ff 100644 --- a/bundle/tests/environment_overrides_test.go +++ b/bundle/tests/environment_overrides_test.go @@ -21,8 +21,8 @@ func TestEnvironmentOverridesResourcesDev(t *testing.T) { assert.Equal(t, "base job", b.Config.Resources.Jobs["job1"].Name) // Base values are preserved in the development environment. - assert.Equal(t, true, b.Config.Resources.Pipelines["boolean1"].Photon) - assert.Equal(t, false, b.Config.Resources.Pipelines["boolean2"].Photon) + assert.True(t, b.Config.Resources.Pipelines["boolean1"].Photon) + assert.False(t, b.Config.Resources.Pipelines["boolean2"].Photon) } func TestEnvironmentOverridesResourcesStaging(t *testing.T) { @@ -30,6 +30,6 @@ func TestEnvironmentOverridesResourcesStaging(t *testing.T) { assert.Equal(t, "staging job", b.Config.Resources.Jobs["job1"].Name) // Override values are applied in the staging environment. - assert.Equal(t, false, b.Config.Resources.Pipelines["boolean1"].Photon) - assert.Equal(t, true, b.Config.Resources.Pipelines["boolean2"].Photon) + assert.False(t, b.Config.Resources.Pipelines["boolean1"].Photon) + assert.True(t, b.Config.Resources.Pipelines["boolean2"].Photon) } diff --git a/bundle/tests/environments_job_and_pipeline_test.go b/bundle/tests/environments_job_and_pipeline_test.go index 218d2e470..423b14c07 100644 --- a/bundle/tests/environments_job_and_pipeline_test.go +++ b/bundle/tests/environments_job_and_pipeline_test.go @@ -10,11 +10,11 @@ import ( func TestJobAndPipelineDevelopmentWithEnvironment(t *testing.T) { b := loadTarget(t, "./environments_job_and_pipeline", "development") - assert.Len(t, b.Config.Resources.Jobs, 0) + assert.Empty(t, b.Config.Resources.Jobs) assert.Len(t, b.Config.Resources.Pipelines, 1) p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"] - assert.Equal(t, b.Config.Bundle.Mode, config.Development) + assert.Equal(t, config.Development, b.Config.Bundle.Mode) assert.True(t, p.Development) require.Len(t, p.Libraries, 1) assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path) @@ -23,7 +23,7 @@ func TestJobAndPipelineDevelopmentWithEnvironment(t *testing.T) { func TestJobAndPipelineStagingWithEnvironment(t *testing.T) { b := loadTarget(t, "./environments_job_and_pipeline", "staging") - assert.Len(t, b.Config.Resources.Jobs, 0) + assert.Empty(t, b.Config.Resources.Jobs) assert.Len(t, b.Config.Resources.Pipelines, 1) p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"] diff --git a/bundle/tests/git_test.go b/bundle/tests/git_test.go index dec6c268a..41293e450 100644 --- a/bundle/tests/git_test.go +++ b/bundle/tests/git_test.go @@ -2,7 +2,6 @@ package config_tests import ( "context" - "fmt" "strings" "testing" @@ -17,7 +16,7 @@ func TestGitAutoLoad(t *testing.T) { bundle.Apply(context.Background(), b, mutator.LoadGitDetails()) assert.True(t, b.Config.Bundle.Git.Inferred) validUrl := strings.Contains(b.Config.Bundle.Git.OriginURL, "/cli") || strings.Contains(b.Config.Bundle.Git.OriginURL, "/bricks") - assert.True(t, validUrl, fmt.Sprintf("Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL)) + assert.True(t, validUrl, "Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL) } func TestGitManuallySetBranch(t *testing.T) { @@ -26,7 +25,7 @@ func TestGitManuallySetBranch(t *testing.T) { assert.False(t, b.Config.Bundle.Git.Inferred) assert.Equal(t, "main", b.Config.Bundle.Git.Branch) validUrl := strings.Contains(b.Config.Bundle.Git.OriginURL, "/cli") || strings.Contains(b.Config.Bundle.Git.OriginURL, "/bricks") - assert.True(t, validUrl, fmt.Sprintf("Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL)) + assert.True(t, validUrl, "Expected URL to contain '/cli' or '/bricks', got %s", b.Config.Bundle.Git.OriginURL) } func TestGitBundleBranchValidation(t *testing.T) { diff --git a/bundle/tests/issue_1828_test.go b/bundle/tests/issue_1828_test.go index 5f2becce5..31fcfeb8e 100644 --- a/bundle/tests/issue_1828_test.go +++ b/bundle/tests/issue_1828_test.go @@ -35,7 +35,7 @@ func TestIssue1828(t *testing.T) { } if assert.Contains(t, b.Config.Variables, "float") { - assert.Equal(t, 3.14, b.Config.Variables["float"].Default) + assert.InDelta(t, 3.14, b.Config.Variables["float"].Default, 0.0001) } if assert.Contains(t, b.Config.Variables, "time") { @@ -43,6 +43,6 @@ func TestIssue1828(t *testing.T) { } if assert.Contains(t, b.Config.Variables, "nil") { - assert.Equal(t, nil, b.Config.Variables["nil"].Default) + assert.Nil(t, b.Config.Variables["nil"].Default) } } diff --git a/bundle/tests/job_and_pipeline_test.go b/bundle/tests/job_and_pipeline_test.go index 65aa5bdc4..408e3e3ef 100644 --- a/bundle/tests/job_and_pipeline_test.go +++ b/bundle/tests/job_and_pipeline_test.go @@ -10,11 +10,11 @@ import ( func TestJobAndPipelineDevelopment(t *testing.T) { b := loadTarget(t, "./job_and_pipeline", "development") - assert.Len(t, b.Config.Resources.Jobs, 0) + assert.Empty(t, b.Config.Resources.Jobs) assert.Len(t, b.Config.Resources.Pipelines, 1) p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"] - assert.Equal(t, b.Config.Bundle.Mode, config.Development) + assert.Equal(t, config.Development, b.Config.Bundle.Mode) assert.True(t, p.Development) require.Len(t, p.Libraries, 1) assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path) @@ -23,7 +23,7 @@ func TestJobAndPipelineDevelopment(t *testing.T) { func TestJobAndPipelineStaging(t *testing.T) { b := loadTarget(t, "./job_and_pipeline", "staging") - assert.Len(t, b.Config.Resources.Jobs, 0) + assert.Empty(t, b.Config.Resources.Jobs) assert.Len(t, b.Config.Resources.Pipelines, 1) p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"] diff --git a/bundle/tests/job_cluster_key_test.go b/bundle/tests/job_cluster_key_test.go index 5a8b368e5..6a08da89c 100644 --- a/bundle/tests/job_cluster_key_test.go +++ b/bundle/tests/job_cluster_key_test.go @@ -16,13 +16,13 @@ func TestJobClusterKeyNotDefinedTest(t *testing.T) { diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), validate.JobClusterKeyDefined()) require.Len(t, diags, 1) require.NoError(t, diags.Error()) - require.Equal(t, diags[0].Severity, diag.Warning) - require.Equal(t, diags[0].Summary, "job_cluster_key key is not defined") + require.Equal(t, diag.Warning, diags[0].Severity) + require.Equal(t, "job_cluster_key key is not defined", diags[0].Summary) } func TestJobClusterKeyDefinedTest(t *testing.T) { b := loadTarget(t, "./job_cluster_key", "development") diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), validate.JobClusterKeyDefined()) - require.Len(t, diags, 0) + require.Empty(t, diags) } diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index 5c48d81cb..9b246b7cc 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -7,6 +7,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/phases" + "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/diag" "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/experimental/mocks" @@ -46,6 +47,7 @@ func loadTargetWithDiags(path, env string) (*bundle.Bundle, diag.Diagnostics) { mutator.MergeJobParameters(), mutator.MergeJobTasks(), mutator.MergePipelineClusters(), + mutator.MergeApps(), )) return b, diags } @@ -66,7 +68,7 @@ func initializeTarget(t *testing.T, path, env string) (*bundle.Bundle, diag.Diag b := load(t, path) configureMock(t, b) - ctx := context.Background() + ctx := dbr.MockRuntime(context.Background(), false) diags := bundle.Apply(ctx, b, bundle.Seq( mutator.SelectTarget(env), phases.Initialize(), diff --git a/bundle/tests/model_serving_endpoint_test.go b/bundle/tests/model_serving_endpoint_test.go index b8b800863..f779a07e6 100644 --- a/bundle/tests/model_serving_endpoint_test.go +++ b/bundle/tests/model_serving_endpoint_test.go @@ -20,7 +20,7 @@ func assertExpected(t *testing.T, p *resources.ModelServingEndpoint) { func TestModelServingEndpointDevelopment(t *testing.T) { b := loadTarget(t, "./model_serving_endpoint", "development") assert.Len(t, b.Config.Resources.ModelServingEndpoints, 1) - assert.Equal(t, b.Config.Bundle.Mode, config.Development) + assert.Equal(t, config.Development, b.Config.Bundle.Mode) p := b.Config.Resources.ModelServingEndpoints["my_model_serving_endpoint"] assert.Equal(t, "my-dev-endpoint", p.Name) diff --git a/bundle/tests/override_job_cluster_test.go b/bundle/tests/override_job_cluster_test.go deleted file mode 100644 index 1393e03e5..000000000 --- a/bundle/tests/override_job_cluster_test.go +++ /dev/null @@ -1,29 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestOverrideJobClusterDev(t *testing.T) { - b := loadTarget(t, "./override_job_cluster", "development") - assert.Equal(t, "job", b.Config.Resources.Jobs["foo"].Name) - assert.Len(t, b.Config.Resources.Jobs["foo"].JobClusters, 1) - - c := b.Config.Resources.Jobs["foo"].JobClusters[0] - assert.Equal(t, "13.3.x-scala2.12", c.NewCluster.SparkVersion) - assert.Equal(t, "i3.xlarge", c.NewCluster.NodeTypeId) - assert.Equal(t, 1, c.NewCluster.NumWorkers) -} - -func TestOverrideJobClusterStaging(t *testing.T) { - b := loadTarget(t, "./override_job_cluster", "staging") - assert.Equal(t, "job", b.Config.Resources.Jobs["foo"].Name) - assert.Len(t, b.Config.Resources.Jobs["foo"].JobClusters, 1) - - c := b.Config.Resources.Jobs["foo"].JobClusters[0] - assert.Equal(t, "13.3.x-scala2.12", c.NewCluster.SparkVersion) - assert.Equal(t, "i3.2xlarge", c.NewCluster.NodeTypeId) - assert.Equal(t, 4, c.NewCluster.NumWorkers) -} diff --git a/bundle/tests/override_job_tasks_test.go b/bundle/tests/override_job_tasks_test.go deleted file mode 100644 index 82da04da2..000000000 --- a/bundle/tests/override_job_tasks_test.go +++ /dev/null @@ -1,39 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestOverrideTasksDev(t *testing.T) { - b := loadTarget(t, "./override_job_tasks", "development") - assert.Equal(t, "job", b.Config.Resources.Jobs["foo"].Name) - assert.Len(t, b.Config.Resources.Jobs["foo"].Tasks, 2) - - tasks := b.Config.Resources.Jobs["foo"].Tasks - assert.Equal(t, tasks[0].TaskKey, "key1") - assert.Equal(t, tasks[0].NewCluster.NodeTypeId, "i3.xlarge") - assert.Equal(t, tasks[0].NewCluster.NumWorkers, 1) - assert.Equal(t, tasks[0].SparkPythonTask.PythonFile, "./test1.py") - - assert.Equal(t, tasks[1].TaskKey, "key2") - assert.Equal(t, tasks[1].NewCluster.SparkVersion, "13.3.x-scala2.12") - assert.Equal(t, tasks[1].SparkPythonTask.PythonFile, "./test2.py") -} - -func TestOverrideTasksStaging(t *testing.T) { - b := loadTarget(t, "./override_job_tasks", "staging") - assert.Equal(t, "job", b.Config.Resources.Jobs["foo"].Name) - assert.Len(t, b.Config.Resources.Jobs["foo"].Tasks, 2) - - tasks := b.Config.Resources.Jobs["foo"].Tasks - assert.Equal(t, tasks[0].TaskKey, "key1") - assert.Equal(t, tasks[0].NewCluster.SparkVersion, "13.3.x-scala2.12") - assert.Equal(t, tasks[0].SparkPythonTask.PythonFile, "./test1.py") - - assert.Equal(t, tasks[1].TaskKey, "key2") - assert.Equal(t, tasks[1].NewCluster.NodeTypeId, "i3.2xlarge") - assert.Equal(t, tasks[1].NewCluster.NumWorkers, 4) - assert.Equal(t, tasks[1].SparkPythonTask.PythonFile, "./test3.py") -} diff --git a/bundle/tests/override_pipeline_cluster_test.go b/bundle/tests/override_pipeline_cluster_test.go deleted file mode 100644 index 591fe423d..000000000 --- a/bundle/tests/override_pipeline_cluster_test.go +++ /dev/null @@ -1,29 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestOverridePipelineClusterDev(t *testing.T) { - b := loadTarget(t, "./override_pipeline_cluster", "development") - assert.Equal(t, "job", b.Config.Resources.Pipelines["foo"].Name) - assert.Len(t, b.Config.Resources.Pipelines["foo"].Clusters, 1) - - c := b.Config.Resources.Pipelines["foo"].Clusters[0] - assert.Equal(t, map[string]string{"foo": "bar"}, c.SparkConf) - assert.Equal(t, "i3.xlarge", c.NodeTypeId) - assert.Equal(t, 1, c.NumWorkers) -} - -func TestOverridePipelineClusterStaging(t *testing.T) { - b := loadTarget(t, "./override_pipeline_cluster", "staging") - assert.Equal(t, "job", b.Config.Resources.Pipelines["foo"].Name) - assert.Len(t, b.Config.Resources.Pipelines["foo"].Clusters, 1) - - c := b.Config.Resources.Pipelines["foo"].Clusters[0] - assert.Equal(t, map[string]string{"foo": "bar"}, c.SparkConf) - assert.Equal(t, "i3.2xlarge", c.NodeTypeId) - assert.Equal(t, 4, c.NumWorkers) -} diff --git a/bundle/tests/path_translation_test.go b/bundle/tests/path_translation_test.go deleted file mode 100644 index 05702d2a2..000000000 --- a/bundle/tests/path_translation_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package config_tests - -import ( - "context" - "path/filepath" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestPathTranslationFallback(t *testing.T) { - b := loadTarget(t, "./path_translation/fallback", "development") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - require.NoError(t, diags.Error()) - - j := b.Config.Resources.Jobs["my_job"] - assert.Len(t, j.Tasks, 6) - - assert.Equal(t, "notebook_example", filepath.ToSlash(j.Tasks[0].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[0].NotebookTask.NotebookPath)) - - assert.Equal(t, "spark_python_example", filepath.ToSlash(j.Tasks[1].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[1].SparkPythonTask.PythonFile)) - - assert.Equal(t, "dbt_example", filepath.ToSlash(j.Tasks[2].TaskKey)) - assert.Equal(t, "src/dbt_project", filepath.ToSlash(j.Tasks[2].DbtTask.ProjectDirectory)) - - assert.Equal(t, "sql_example", filepath.ToSlash(j.Tasks[3].TaskKey)) - assert.Equal(t, "src/sql.sql", filepath.ToSlash(j.Tasks[3].SqlTask.File.Path)) - - assert.Equal(t, "python_wheel_example", filepath.ToSlash(j.Tasks[4].TaskKey)) - assert.Equal(t, "dist/wheel1.whl", filepath.ToSlash(j.Tasks[4].Libraries[0].Whl)) - assert.Equal(t, "dist/wheel2.whl", filepath.ToSlash(j.Tasks[4].Libraries[1].Whl)) - - assert.Equal(t, "spark_jar_example", filepath.ToSlash(j.Tasks[5].TaskKey)) - assert.Equal(t, "target/jar1.jar", filepath.ToSlash(j.Tasks[5].Libraries[0].Jar)) - assert.Equal(t, "target/jar2.jar", filepath.ToSlash(j.Tasks[5].Libraries[1].Jar)) - - p := b.Config.Resources.Pipelines["my_pipeline"] - assert.Len(t, p.Libraries, 4) - - assert.Equal(t, "src/file1.py", filepath.ToSlash(p.Libraries[0].File.Path)) - assert.Equal(t, "src/notebook1", filepath.ToSlash(p.Libraries[1].Notebook.Path)) - assert.Equal(t, "src/file2.py", filepath.ToSlash(p.Libraries[2].File.Path)) - assert.Equal(t, "src/notebook2", filepath.ToSlash(p.Libraries[3].Notebook.Path)) -} - -func TestPathTranslationFallbackError(t *testing.T) { - b := loadTarget(t, "./path_translation/fallback", "error") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.ErrorContains(t, diags.Error(), `notebook this value is overridden not found`) -} - -func TestPathTranslationNominal(t *testing.T) { - b := loadTarget(t, "./path_translation/nominal", "development") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.NoError(t, diags.Error()) - - j := b.Config.Resources.Jobs["my_job"] - assert.Len(t, j.Tasks, 8) - - assert.Equal(t, "notebook_example", filepath.ToSlash(j.Tasks[0].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[0].NotebookTask.NotebookPath)) - - assert.Equal(t, "spark_python_example", filepath.ToSlash(j.Tasks[1].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[1].SparkPythonTask.PythonFile)) - - assert.Equal(t, "dbt_example", filepath.ToSlash(j.Tasks[2].TaskKey)) - assert.Equal(t, "src/dbt_project", filepath.ToSlash(j.Tasks[2].DbtTask.ProjectDirectory)) - - assert.Equal(t, "sql_example", filepath.ToSlash(j.Tasks[3].TaskKey)) - assert.Equal(t, "src/sql.sql", filepath.ToSlash(j.Tasks[3].SqlTask.File.Path)) - - assert.Equal(t, "python_wheel_example", filepath.ToSlash(j.Tasks[4].TaskKey)) - assert.Equal(t, "dist/wheel1.whl", filepath.ToSlash(j.Tasks[4].Libraries[0].Whl)) - assert.Equal(t, "dist/wheel2.whl", filepath.ToSlash(j.Tasks[4].Libraries[1].Whl)) - - assert.Equal(t, "spark_jar_example", filepath.ToSlash(j.Tasks[5].TaskKey)) - assert.Equal(t, "target/jar1.jar", filepath.ToSlash(j.Tasks[5].Libraries[0].Jar)) - assert.Equal(t, "target/jar2.jar", filepath.ToSlash(j.Tasks[5].Libraries[1].Jar)) - - assert.Equal(t, "for_each_notebook_example", filepath.ToSlash(j.Tasks[6].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[6].ForEachTask.Task.NotebookTask.NotebookPath)) - - assert.Equal(t, "for_each_spark_python_example", filepath.ToSlash(j.Tasks[7].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[7].ForEachTask.Task.SparkPythonTask.PythonFile)) - - p := b.Config.Resources.Pipelines["my_pipeline"] - assert.Len(t, p.Libraries, 4) - - assert.Equal(t, "src/file1.py", filepath.ToSlash(p.Libraries[0].File.Path)) - assert.Equal(t, "src/notebook1", filepath.ToSlash(p.Libraries[1].Notebook.Path)) - assert.Equal(t, "src/file2.py", filepath.ToSlash(p.Libraries[2].File.Path)) - assert.Equal(t, "src/notebook2", filepath.ToSlash(p.Libraries[3].Notebook.Path)) -} - -func TestPathTranslationNominalError(t *testing.T) { - b := loadTarget(t, "./path_translation/nominal", "error") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.ErrorContains(t, diags.Error(), `notebook this value is overridden not found`) -} diff --git a/bundle/tests/presets_test.go b/bundle/tests/presets_test.go index 5fcb5d95b..c2cbe497b 100644 --- a/bundle/tests/presets_test.go +++ b/bundle/tests/presets_test.go @@ -13,7 +13,7 @@ func TestPresetsDev(t *testing.T) { assert.Equal(t, "myprefix", b.Config.Presets.NamePrefix) assert.Equal(t, config.Paused, b.Config.Presets.TriggerPauseStatus) assert.Equal(t, 10, b.Config.Presets.JobsMaxConcurrentRuns) - assert.Equal(t, true, *b.Config.Presets.PipelinesDevelopment) + assert.True(t, *b.Config.Presets.PipelinesDevelopment) assert.Equal(t, "true", b.Config.Presets.Tags["dev"]) assert.Equal(t, "finance", b.Config.Presets.Tags["team"]) assert.Equal(t, "false", b.Config.Presets.Tags["prod"]) @@ -22,7 +22,7 @@ func TestPresetsDev(t *testing.T) { func TestPresetsProd(t *testing.T) { b := loadTarget(t, "./presets", "prod") - assert.Equal(t, false, *b.Config.Presets.PipelinesDevelopment) + assert.False(t, *b.Config.Presets.PipelinesDevelopment) assert.Equal(t, "finance", b.Config.Presets.Tags["team"]) assert.Equal(t, "true", b.Config.Presets.Tags["prod"]) } diff --git a/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py b/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py b/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py b/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel_test.go b/bundle/tests/python_wheel_test.go index c982c09d6..06cb05270 100644 --- a/bundle/tests/python_wheel_test.go +++ b/bundle/tests/python_wheel_test.go @@ -23,7 +23,7 @@ func TestPythonWheelBuild(t *testing.T) { matches, err := filepath.Glob("./python_wheel/python_wheel/my_test_code/dist/my_test_code-*.whl") require.NoError(t, err) - require.Equal(t, 1, len(matches)) + require.Len(t, matches, 1) match := libraries.ExpandGlobReferences() diags = bundle.Apply(ctx, b, match) @@ -39,7 +39,7 @@ func TestPythonWheelBuildAutoDetect(t *testing.T) { matches, err := filepath.Glob("./python_wheel/python_wheel_no_artifact/dist/my_test_code-*.whl") require.NoError(t, err) - require.Equal(t, 1, len(matches)) + require.Len(t, matches, 1) match := libraries.ExpandGlobReferences() diags = bundle.Apply(ctx, b, match) @@ -55,7 +55,7 @@ func TestPythonWheelBuildAutoDetectWithNotebookTask(t *testing.T) { matches, err := filepath.Glob("./python_wheel/python_wheel_no_artifact_notebook/dist/my_test_code-*.whl") require.NoError(t, err) - require.Equal(t, 1, len(matches)) + require.Len(t, matches, 1) match := libraries.ExpandGlobReferences() diags = bundle.Apply(ctx, b, match) @@ -108,7 +108,7 @@ func TestPythonWheelBuildWithEnvironmentKey(t *testing.T) { matches, err := filepath.Glob("./python_wheel/environment_key/my_test_code/dist/my_test_code-*.whl") require.NoError(t, err) - require.Equal(t, 1, len(matches)) + require.Len(t, matches, 1) match := libraries.ExpandGlobReferences() diags = bundle.Apply(ctx, b, match) @@ -124,7 +124,7 @@ func TestPythonWheelBuildMultiple(t *testing.T) { matches, err := filepath.Glob("./python_wheel/python_wheel_multiple/my_test_code/dist/my_test_code*.whl") require.NoError(t, err) - require.Equal(t, 2, len(matches)) + require.Len(t, matches, 2) match := libraries.ExpandGlobReferences() diags = bundle.Apply(ctx, b, match) diff --git a/bundle/tests/quality_monitor_test.go b/bundle/tests/quality_monitor_test.go deleted file mode 100644 index 9b91052f5..000000000 --- a/bundle/tests/quality_monitor_test.go +++ /dev/null @@ -1,59 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/databricks-sdk-go/service/catalog" - "github.com/stretchr/testify/assert" -) - -func assertExpectedMonitor(t *testing.T, p *resources.QualityMonitor) { - assert.Equal(t, "timestamp", p.InferenceLog.TimestampCol) - assert.Equal(t, "prediction", p.InferenceLog.PredictionCol) - assert.Equal(t, "model_id", p.InferenceLog.ModelIdCol) - assert.Equal(t, catalog.MonitorInferenceLogProblemType("PROBLEM_TYPE_REGRESSION"), p.InferenceLog.ProblemType) -} - -func TestMonitorTableNames(t *testing.T) { - b := loadTarget(t, "./quality_monitor", "development") - assert.Len(t, b.Config.Resources.QualityMonitors, 1) - assert.Equal(t, b.Config.Bundle.Mode, config.Development) - - p := b.Config.Resources.QualityMonitors["my_monitor"] - assert.Equal(t, "main.test.dev", p.TableName) - assert.Equal(t, "/Shared/provider-test/databricks_monitoring/main.test.thing1", p.AssetsDir) - assert.Equal(t, "main.dev", p.OutputSchemaName) - - assertExpectedMonitor(t, p) -} - -func TestMonitorStaging(t *testing.T) { - b := loadTarget(t, "./quality_monitor", "staging") - assert.Len(t, b.Config.Resources.QualityMonitors, 1) - - p := b.Config.Resources.QualityMonitors["my_monitor"] - assert.Equal(t, "main.test.staging", p.TableName) - assert.Equal(t, "/Shared/provider-test/databricks_monitoring/main.test.thing1", p.AssetsDir) - assert.Equal(t, "main.staging", p.OutputSchemaName) - - assertExpectedMonitor(t, p) -} - -func TestMonitorProduction(t *testing.T) { - b := loadTarget(t, "./quality_monitor", "production") - assert.Len(t, b.Config.Resources.QualityMonitors, 1) - - p := b.Config.Resources.QualityMonitors["my_monitor"] - assert.Equal(t, "main.test.prod", p.TableName) - assert.Equal(t, "/Shared/provider-test/databricks_monitoring/main.test.thing1", p.AssetsDir) - assert.Equal(t, "main.prod", p.OutputSchemaName) - - inferenceLog := p.InferenceLog - assert.Equal(t, []string{"1 day", "1 hour"}, inferenceLog.Granularities) - assert.Equal(t, "timestamp_prod", p.InferenceLog.TimestampCol) - assert.Equal(t, "prediction_prod", p.InferenceLog.PredictionCol) - assert.Equal(t, "model_id_prod", p.InferenceLog.ModelIdCol) - assert.Equal(t, catalog.MonitorInferenceLogProblemType("PROBLEM_TYPE_REGRESSION"), p.InferenceLog.ProblemType) -} diff --git a/bundle/tests/registered_model_test.go b/bundle/tests/registered_model_test.go index 008db8bdd..e9d572a3a 100644 --- a/bundle/tests/registered_model_test.go +++ b/bundle/tests/registered_model_test.go @@ -19,7 +19,7 @@ func assertExpectedModel(t *testing.T, p *resources.RegisteredModel) { func TestRegisteredModelDevelopment(t *testing.T) { b := loadTarget(t, "./registered_model", "development") assert.Len(t, b.Config.Resources.RegisteredModels, 1) - assert.Equal(t, b.Config.Bundle.Mode, config.Development) + assert.Equal(t, config.Development, b.Config.Bundle.Mode) p := b.Config.Resources.RegisteredModels["my_registered_model"] assert.Equal(t, "my-dev-model", p.Name) diff --git a/bundle/tests/relative_path_translation_test.go b/bundle/tests/relative_path_translation_test.go deleted file mode 100644 index 0f553ac3d..000000000 --- a/bundle/tests/relative_path_translation_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestRelativePathTranslationDefault(t *testing.T) { - b, diags := initializeTarget(t, "./relative_path_translation", "default") - require.NoError(t, diags.Error()) - - t0 := b.Config.Resources.Jobs["job"].Tasks[0] - assert.Equal(t, "/Workspace/remote/src/file1.py", t0.SparkPythonTask.PythonFile) - t1 := b.Config.Resources.Jobs["job"].Tasks[1] - assert.Equal(t, "/Workspace/remote/src/file1.py", t1.SparkPythonTask.PythonFile) -} - -func TestRelativePathTranslationOverride(t *testing.T) { - b, diags := initializeTarget(t, "./relative_path_translation", "override") - require.NoError(t, diags.Error()) - - t0 := b.Config.Resources.Jobs["job"].Tasks[0] - assert.Equal(t, "/Workspace/remote/src/file2.py", t0.SparkPythonTask.PythonFile) - t1 := b.Config.Resources.Jobs["job"].Tasks[1] - assert.Equal(t, "/Workspace/remote/src/file2.py", t1.SparkPythonTask.PythonFile) -} diff --git a/bundle/tests/relative_path_with_includes/artifact_a/.gitkeep b/bundle/tests/relative_path_with_includes/artifact_a/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bundle/tests/relative_path_with_includes/subfolder/artifact_b/.gitkeep b/bundle/tests/relative_path_with_includes/subfolder/artifact_b/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bundle/tests/relative_path_with_includes_test.go b/bundle/tests/relative_path_with_includes_test.go index 6e13628be..7249cac1f 100644 --- a/bundle/tests/relative_path_with_includes_test.go +++ b/bundle/tests/relative_path_with_includes_test.go @@ -17,8 +17,8 @@ func TestRelativePathsWithIncludes(t *testing.T) { diags := bundle.Apply(context.Background(), b, m) assert.NoError(t, diags.Error()) - assert.Equal(t, "artifact_a", b.Config.Artifacts["test_a"].Path) - assert.Equal(t, filepath.Join("subfolder", "artifact_b"), b.Config.Artifacts["test_b"].Path) + assert.Equal(t, b.SyncRootPath+"/artifact_a", b.Config.Artifacts["test_a"].Path) + assert.Equal(t, b.SyncRootPath+"/subfolder/artifact_b", b.Config.Artifacts["test_b"].Path) assert.ElementsMatch( t, @@ -37,6 +37,6 @@ func TestRelativePathsWithIncludes(t *testing.T) { b.Config.Sync.Exclude, ) - assert.Equal(t, filepath.Join("dist", "job_a.whl"), b.Config.Resources.Jobs["job_a"].Tasks[0].Libraries[0].Whl) - assert.Equal(t, filepath.Join("subfolder", "dist", "job_b.whl"), b.Config.Resources.Jobs["job_b"].Tasks[0].Libraries[0].Whl) + assert.Equal(t, "dist/job_a.whl", b.Config.Resources.Jobs["job_a"].Tasks[0].Libraries[0].Whl) + assert.Equal(t, "subfolder/dist/job_b.whl", b.Config.Resources.Jobs["job_b"].Tasks[0].Libraries[0].Whl) } diff --git a/bundle/tests/run_as_test.go b/bundle/tests/run_as_test.go index 113a6140b..03ff51ec5 100644 --- a/bundle/tests/run_as_test.go +++ b/bundle/tests/run_as_test.go @@ -2,7 +2,6 @@ package config_tests import ( "context" - "fmt" "testing" "github.com/databricks/cli/bundle" @@ -219,7 +218,7 @@ func TestRunAsErrorNeitherUserOrSpSpecified(t *testing.T) { for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { - bundlePath := fmt.Sprintf("./run_as/not_allowed/neither_sp_nor_user/%s", tc.name) + bundlePath := "./run_as/not_allowed/neither_sp_nor_user/" + tc.name b := load(t, bundlePath) ctx := context.Background() diff --git a/bundle/tests/sync_include_exclude_no_matches_test.go b/bundle/tests/sync_include_exclude_no_matches_test.go index 0192b61e6..c206e7471 100644 --- a/bundle/tests/sync_include_exclude_no_matches_test.go +++ b/bundle/tests/sync_include_exclude_no_matches_test.go @@ -20,26 +20,26 @@ func TestSyncIncludeExcludeNoMatchesTest(t *testing.T) { require.Len(t, diags, 3) require.NoError(t, diags.Error()) - require.Equal(t, diags[0].Severity, diag.Warning) - require.Equal(t, diags[0].Summary, "Pattern dist does not match any files") + require.Equal(t, diag.Warning, diags[0].Severity) + require.Equal(t, "Pattern dist does not match any files", diags[0].Summary) require.Len(t, diags[0].Paths, 1) - require.Equal(t, diags[0].Paths[0].String(), "sync.exclude[0]") + require.Equal(t, "sync.exclude[0]", diags[0].Paths[0].String()) assert.Len(t, diags[0].Locations, 1) require.Equal(t, diags[0].Locations[0].File, filepath.Join("sync", "override", "databricks.yml")) - require.Equal(t, diags[0].Locations[0].Line, 17) - require.Equal(t, diags[0].Locations[0].Column, 11) + require.Equal(t, 17, diags[0].Locations[0].Line) + require.Equal(t, 11, diags[0].Locations[0].Column) summaries := []string{ fmt.Sprintf("Pattern %s does not match any files", filepath.Join("src", "*")), fmt.Sprintf("Pattern %s does not match any files", filepath.Join("tests", "*")), } - require.Equal(t, diags[1].Severity, diag.Warning) + require.Equal(t, diag.Warning, diags[1].Severity) require.Contains(t, summaries, diags[1].Summary) - require.Equal(t, diags[2].Severity, diag.Warning) + require.Equal(t, diag.Warning, diags[2].Severity) require.Contains(t, summaries, diags[2].Summary) } @@ -47,7 +47,7 @@ func TestSyncIncludeWithNegate(t *testing.T) { b := loadTarget(t, "./sync/negate", "default") diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), validate.ValidateSyncPatterns()) - require.Len(t, diags, 0) + require.Empty(t, diags) require.NoError(t, diags.Error()) } @@ -58,6 +58,6 @@ func TestSyncIncludeWithNegateNoMatches(t *testing.T) { require.Len(t, diags, 1) require.NoError(t, diags.Error()) - require.Equal(t, diags[0].Severity, diag.Warning) - require.Equal(t, diags[0].Summary, "Pattern !*.txt2 does not match any files") + require.Equal(t, diag.Warning, diags[0].Severity) + require.Equal(t, "Pattern !*.txt2 does not match any files", diags[0].Summary) } diff --git a/bundle/tests/sync_test.go b/bundle/tests/sync_test.go index 15644b67e..f5a0296a9 100644 --- a/bundle/tests/sync_test.go +++ b/bundle/tests/sync_test.go @@ -115,12 +115,12 @@ func TestSyncPathsNoRoot(t *testing.T) { // If set to nil, it won't sync anything. b = loadTarget(t, "./sync/paths_no_root", "nil") assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) - assert.Len(t, b.Config.Sync.Paths, 0) + assert.Empty(t, b.Config.Sync.Paths) // If set to an empty sequence, it won't sync anything. b = loadTarget(t, "./sync/paths_no_root", "empty") assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) - assert.Len(t, b.Config.Sync.Paths, 0) + assert.Empty(t, b.Config.Sync.Paths) } func TestSyncSharedCode(t *testing.T) { diff --git a/bundle/tests/undefined_resources_test.go b/bundle/tests/undefined_resources_test.go deleted file mode 100644 index 3dbacbc25..000000000 --- a/bundle/tests/undefined_resources_test.go +++ /dev/null @@ -1,50 +0,0 @@ -package config_tests - -import ( - "context" - "path/filepath" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/validate" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" - "github.com/stretchr/testify/assert" -) - -func TestUndefinedResourcesLoadWithError(t *testing.T) { - b := load(t, "./undefined_resources") - diags := bundle.Apply(context.Background(), b, validate.AllResourcesHaveValues()) - - assert.Len(t, diags, 3) - assert.Contains(t, diags, diag.Diagnostic{ - Severity: diag.Error, - Summary: "job undefined-job is not defined", - Locations: []dyn.Location{{ - File: filepath.FromSlash("undefined_resources/databricks.yml"), - Line: 6, - Column: 19, - }}, - Paths: []dyn.Path{dyn.MustPathFromString("resources.jobs.undefined-job")}, - }) - assert.Contains(t, diags, diag.Diagnostic{ - Severity: diag.Error, - Summary: "experiment undefined-experiment is not defined", - Locations: []dyn.Location{{ - File: filepath.FromSlash("undefined_resources/databricks.yml"), - Line: 11, - Column: 26, - }}, - Paths: []dyn.Path{dyn.MustPathFromString("resources.experiments.undefined-experiment")}, - }) - assert.Contains(t, diags, diag.Diagnostic{ - Severity: diag.Error, - Summary: "pipeline undefined-pipeline is not defined", - Locations: []dyn.Location{{ - File: filepath.FromSlash("undefined_resources/databricks.yml"), - Line: 14, - Column: 24, - }}, - Paths: []dyn.Path{dyn.MustPathFromString("resources.pipelines.undefined-pipeline")}, - }) -} diff --git a/bundle/tests/variables_test.go b/bundle/tests/variables_test.go deleted file mode 100644 index 37d488fad..000000000 --- a/bundle/tests/variables_test.go +++ /dev/null @@ -1,206 +0,0 @@ -package config_tests - -import ( - "context" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/databricks/databricks-sdk-go/experimental/mocks" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" -) - -func TestVariables(t *testing.T) { - t.Setenv("BUNDLE_VAR_b", "def") - b := load(t, "./variables/vanilla") - diags := bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SetVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - require.NoError(t, diags.Error()) - assert.Equal(t, "abc def", b.Config.Bundle.Name) -} - -func TestVariablesLoadingFailsWhenRequiredVariableIsNotSpecified(t *testing.T) { - b := load(t, "./variables/vanilla") - diags := bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SetVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - assert.ErrorContains(t, diags.Error(), "no value assigned to required variable b. Assignment can be done through the \"--var\" flag or by setting the BUNDLE_VAR_b environment variable") -} - -func TestVariablesTargetsBlockOverride(t *testing.T) { - b := load(t, "./variables/env_overrides") - diags := bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SelectTarget("env-with-single-variable-override"), - mutator.SetVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - require.NoError(t, diags.Error()) - assert.Equal(t, "default-a dev-b", b.Config.Workspace.Profile) -} - -func TestVariablesTargetsBlockOverrideForMultipleVariables(t *testing.T) { - b := load(t, "./variables/env_overrides") - diags := bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SelectTarget("env-with-two-variable-overrides"), - mutator.SetVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - require.NoError(t, diags.Error()) - assert.Equal(t, "prod-a prod-b", b.Config.Workspace.Profile) -} - -func TestVariablesTargetsBlockOverrideWithProcessEnvVars(t *testing.T) { - t.Setenv("BUNDLE_VAR_b", "env-var-b") - b := load(t, "./variables/env_overrides") - diags := bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SelectTarget("env-with-two-variable-overrides"), - mutator.SetVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - require.NoError(t, diags.Error()) - assert.Equal(t, "prod-a env-var-b", b.Config.Workspace.Profile) -} - -func TestVariablesTargetsBlockOverrideWithMissingVariables(t *testing.T) { - b := load(t, "./variables/env_overrides") - diags := bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SelectTarget("env-missing-a-required-variable-assignment"), - mutator.SetVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - assert.ErrorContains(t, diags.Error(), "no value assigned to required variable b. Assignment can be done through the \"--var\" flag or by setting the BUNDLE_VAR_b environment variable") -} - -func TestVariablesTargetsBlockOverrideWithUndefinedVariables(t *testing.T) { - b := load(t, "./variables/env_overrides") - diags := bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SelectTarget("env-using-an-undefined-variable"), - mutator.SetVariables(), - mutator.ResolveVariableReferences( - "variables", - ), - )) - assert.ErrorContains(t, diags.Error(), "variable c is not defined but is assigned a value") -} - -func TestVariablesWithoutDefinition(t *testing.T) { - t.Setenv("BUNDLE_VAR_a", "foo") - t.Setenv("BUNDLE_VAR_b", "bar") - b := load(t, "./variables/without_definition") - diags := bundle.Apply(context.Background(), b, mutator.SetVariables()) - require.NoError(t, diags.Error()) - require.True(t, b.Config.Variables["a"].HasValue()) - require.True(t, b.Config.Variables["b"].HasValue()) - assert.Equal(t, "foo", b.Config.Variables["a"].Value) - assert.Equal(t, "bar", b.Config.Variables["b"].Value) -} - -func TestVariablesWithTargetLookupOverrides(t *testing.T) { - b := load(t, "./variables/env_overrides") - - mockWorkspaceClient := mocks.NewMockWorkspaceClient(t) - b.SetWorkpaceClient(mockWorkspaceClient.WorkspaceClient) - instancePoolApi := mockWorkspaceClient.GetMockInstancePoolsAPI() - instancePoolApi.EXPECT().GetByInstancePoolName(mock.Anything, "some-test-instance-pool").Return(&compute.InstancePoolAndStats{ - InstancePoolId: "1234", - }, nil) - - clustersApi := mockWorkspaceClient.GetMockClustersAPI() - clustersApi.EXPECT().ListAll(mock.Anything, compute.ListClustersRequest{ - FilterBy: &compute.ListClustersFilterBy{ - ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, - }, - }).Return([]compute.ClusterDetails{ - {ClusterId: "4321", ClusterName: "some-test-cluster"}, - {ClusterId: "9876", ClusterName: "some-other-cluster"}, - }, nil) - - clusterPoliciesApi := mockWorkspaceClient.GetMockClusterPoliciesAPI() - clusterPoliciesApi.EXPECT().GetByName(mock.Anything, "some-test-cluster-policy").Return(&compute.Policy{ - PolicyId: "9876", - }, nil) - - diags := bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SelectTarget("env-overrides-lookup"), - mutator.SetVariables(), - mutator.ResolveResourceReferences(), - )) - - require.NoError(t, diags.Error()) - assert.Equal(t, "4321", b.Config.Variables["d"].Value) - assert.Equal(t, "1234", b.Config.Variables["e"].Value) - assert.Equal(t, "9876", b.Config.Variables["f"].Value) -} - -func TestVariableTargetOverrides(t *testing.T) { - tcases := []struct { - targetName string - pipelineName string - pipelineContinuous bool - pipelineNumWorkers int - }{ - { - "use-default-variable-values", - "a_string", - true, - 42, - }, - { - "override-string-variable", - "overridden_string", - true, - 42, - }, - { - "override-int-variable", - "a_string", - true, - 43, - }, - { - "override-both-bool-and-string-variables", - "overridden_string", - false, - 42, - }, - } - - for _, tcase := range tcases { - t.Run(tcase.targetName, func(t *testing.T) { - b := loadTarget(t, "./variables/variable_overrides_in_target", tcase.targetName) - diags := bundle.Apply(context.Background(), b, bundle.Seq( - mutator.SetVariables(), - mutator.ResolveVariableReferences("variables")), - ) - require.NoError(t, diags.Error()) - - assert.Equal(t, tcase.pipelineName, b.Config.Resources.Pipelines["my_pipeline"].Name) - assert.Equal(t, tcase.pipelineContinuous, b.Config.Resources.Pipelines["my_pipeline"].Continuous) - assert.Equal(t, tcase.pipelineNumWorkers, b.Config.Resources.Pipelines["my_pipeline"].Clusters[0].NumWorkers) - }) - } -} - -func TestBundleWithEmptyVariableLoads(t *testing.T) { - b := load(t, "./variables/empty") - diags := bundle.Apply(context.Background(), b, mutator.SetVariables()) - require.ErrorContains(t, diags.Error(), "no value assigned to required variable a") -} diff --git a/bundle/trampoline/python_wheel.go b/bundle/trampoline/python_wheel.go index 8e309a625..075804479 100644 --- a/bundle/trampoline/python_wheel.go +++ b/bundle/trampoline/python_wheel.go @@ -2,6 +2,7 @@ package trampoline import ( "context" + "errors" "fmt" "strconv" "strings" @@ -147,7 +148,7 @@ func (t *pythonTrampoline) GetTemplateData(task *jobs.Task) (map[string]any, err func (t *pythonTrampoline) generateParameters(task *jobs.PythonWheelTask) (string, error) { if task.Parameters != nil && task.NamedParameters != nil { - return "", fmt.Errorf("not allowed to pass both paramaters and named_parameters") + return "", errors.New("not allowed to pass both paramaters and named_parameters") } params := append([]string{task.PackageName}, task.Parameters...) for k, v := range task.NamedParameters { diff --git a/bundle/trampoline/trampoline_test.go b/bundle/trampoline/trampoline_test.go index 3c5d18570..6e6b8db48 100644 --- a/bundle/trampoline/trampoline_test.go +++ b/bundle/trampoline/trampoline_test.go @@ -2,7 +2,7 @@ package trampoline import ( "context" - "fmt" + "errors" "os" "path/filepath" "testing" @@ -30,7 +30,7 @@ func (f *functions) GetTasks(b *bundle.Bundle) []TaskWithJobKey { func (f *functions) GetTemplateData(task *jobs.Task) (map[string]any, error) { if task.PythonWheelTask == nil { - return nil, fmt.Errorf("PythonWheelTask cannot be nil") + return nil, errors.New("PythonWheelTask cannot be nil") } data := make(map[string]any) diff --git a/cmd/account/federation-policy/federation-policy.go b/cmd/account/federation-policy/federation-policy.go index d78ac709a..e47bf8324 100755 --- a/cmd/account/federation-policy/federation-policy.go +++ b/cmd/account/federation-policy/federation-policy.go @@ -110,8 +110,9 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&createReq.PolicyId, "policy-id", createReq.PolicyId, `The identifier for the federation policy.`) cmd.Flags().StringVar(&createReq.Policy.Description, "description", createReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy cmd.Use = "create" @@ -180,7 +181,10 @@ func newDelete() *cobra.Command { cmd.Use = "delete POLICY_ID" cmd.Short = `Delete account federation policy.` - cmd.Long = `Delete account federation policy.` + cmd.Long = `Delete account federation policy. + + Arguments: + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -233,7 +237,10 @@ func newGet() *cobra.Command { cmd.Use = "get POLICY_ID" cmd.Short = `Get account federation policy.` - cmd.Long = `Get account federation policy.` + cmd.Long = `Get account federation policy. + + Arguments: + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -338,25 +345,22 @@ func newUpdate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&updateReq.UpdateMask, "update-mask", updateReq.UpdateMask, `The field mask specifies which fields of the policy to update.`) cmd.Flags().StringVar(&updateReq.Policy.Description, "description", updateReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy - cmd.Use = "update POLICY_ID UPDATE_MASK" + cmd.Use = "update POLICY_ID" cmd.Short = `Update account federation policy.` cmd.Long = `Update account federation policy. Arguments: - POLICY_ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - check := root.ExactArgs(2) + check := root.ExactArgs(1) return check(cmd, args) } @@ -378,7 +382,6 @@ func newUpdate() *cobra.Command { } } updateReq.PolicyId = args[0] - updateReq.UpdateMask = args[1] response, err := a.FederationPolicy.Update(ctx, updateReq) if err != nil { diff --git a/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go b/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go index 77f73bcd0..df36de239 100755 --- a/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go +++ b/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go @@ -117,8 +117,9 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&createReq.PolicyId, "policy-id", createReq.PolicyId, `The identifier for the federation policy.`) cmd.Flags().StringVar(&createReq.Policy.Description, "description", createReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy cmd.Use = "create SERVICE_PRINCIPAL_ID" @@ -198,7 +199,7 @@ func newDelete() *cobra.Command { Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: ` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -259,7 +260,7 @@ func newGet() *cobra.Command { Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: ` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -376,26 +377,23 @@ func newUpdate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&updateReq.UpdateMask, "update-mask", updateReq.UpdateMask, `The field mask specifies which fields of the policy to update.`) cmd.Flags().StringVar(&updateReq.Policy.Description, "description", updateReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy - cmd.Use = "update SERVICE_PRINCIPAL_ID POLICY_ID UPDATE_MASK" + cmd.Use = "update SERVICE_PRINCIPAL_ID POLICY_ID" cmd.Short = `Update service principal federation policy.` cmd.Long = `Update service principal federation policy. Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - check := root.ExactArgs(3) + check := root.ExactArgs(2) return check(cmd, args) } @@ -421,7 +419,6 @@ func newUpdate() *cobra.Command { return fmt.Errorf("invalid SERVICE_PRINCIPAL_ID: %s", args[0]) } updateReq.PolicyId = args[1] - updateReq.UpdateMask = args[2] response, err := a.ServicePrincipalFederationPolicy.Update(ctx, updateReq) if err != nil { diff --git a/cmd/auth/auth.go b/cmd/auth/auth.go index ceceae25c..4261e93e7 100644 --- a/cmd/auth/auth.go +++ b/cmd/auth/auth.go @@ -2,7 +2,7 @@ package auth import ( "context" - "fmt" + "errors" "github.com/databricks/cli/libs/auth" "github.com/databricks/cli/libs/cmdio" @@ -36,7 +36,7 @@ GCP: https://docs.gcp.databricks.com/dev-tools/auth/index.html`, func promptForHost(ctx context.Context) (string, error) { if !cmdio.IsInTTY(ctx) { - return "", fmt.Errorf("the command is being run in a non-interactive environment, please specify a host using --host") + return "", errors.New("the command is being run in a non-interactive environment, please specify a host using --host") } prompt := cmdio.Prompt(ctx) @@ -46,7 +46,7 @@ func promptForHost(ctx context.Context) (string, error) { func promptForAccountID(ctx context.Context) (string, error) { if !cmdio.IsInTTY(ctx) { - return "", fmt.Errorf("the command is being run in a non-interactive environment, please specify an account ID using --account-id") + return "", errors.New("the command is being run in a non-interactive environment, please specify an account ID using --account-id") } prompt := cmdio.Prompt(ctx) diff --git a/cmd/auth/describe_test.go b/cmd/auth/describe_test.go index 7f5f900d4..35e0c6e64 100644 --- a/cmd/auth/describe_test.go +++ b/cmd/auth/describe_test.go @@ -2,7 +2,7 @@ package auth import ( "context" - "fmt" + "errors" "testing" "github.com/databricks/cli/cmd/root" @@ -102,7 +102,7 @@ func TestGetWorkspaceAuthStatusError(t *testing.T) { "token": "test-token", "auth_type": "azure-cli", }) - return cfg, false, fmt.Errorf("auth error") + return cfg, false, errors.New("auth error") }) require.NoError(t, err) require.NotNil(t, status) @@ -151,7 +151,7 @@ func TestGetWorkspaceAuthStatusSensitive(t *testing.T) { "token": "test-token", "auth_type": "azure-cli", }) - return cfg, false, fmt.Errorf("auth error") + return cfg, false, errors.New("auth error") }) require.NoError(t, err) require.NotNil(t, status) diff --git a/cmd/auth/env.go b/cmd/auth/env.go index 52b7cbbfd..11149af8c 100644 --- a/cmd/auth/env.go +++ b/cmd/auth/env.go @@ -23,9 +23,9 @@ func canonicalHost(host string) (string, error) { } // If the host is empty, assume the scheme wasn't included. if parsedHost.Host == "" { - return fmt.Sprintf("https://%s", host), nil + return "https://" + host, nil } - return fmt.Sprintf("https://%s", parsedHost.Host), nil + return "https://" + parsedHost.Host, nil } var ErrNoMatchingProfiles = errors.New("no matching profiles found") diff --git a/cmd/auth/login.go b/cmd/auth/login.go index c98676599..a6d0bf4cc 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -176,7 +176,7 @@ depends on the existing profiles you have set in your configuration file func setHostAndAccountId(ctx context.Context, profileName string, persistentAuth *auth.PersistentAuth, args []string) error { // If both [HOST] and --host are provided, return an error. if len(args) > 0 && persistentAuth.Host != "" { - return fmt.Errorf("please only provide a host as an argument or a flag, not both") + return errors.New("please only provide a host as an argument or a flag, not both") } profiler := profile.GetProfiler(ctx) diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index a25e02f6c..560b07e39 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/validate" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/bundle/render" "github.com/databricks/cli/cmd/bundle/utils" @@ -71,6 +72,7 @@ func newDeployCommand() *cobra.Command { diags = diags.Extend( bundle.Apply(ctx, b, bundle.Seq( phases.Initialize(), + validate.FastValidate(), phases.Build(), phases.Deploy(outputHandler), )), diff --git a/cmd/bundle/destroy.go b/cmd/bundle/destroy.go index 711abbcd7..0b2f14875 100644 --- a/cmd/bundle/destroy.go +++ b/cmd/bundle/destroy.go @@ -2,7 +2,7 @@ package bundle import ( "context" - "fmt" + "errors" "os" "github.com/databricks/cli/bundle" @@ -49,16 +49,16 @@ func newDestroyCommand() *cobra.Command { // we require auto-approve for non tty terminals since interactive consent // is not possible if !term.IsTerminal(int(os.Stderr.Fd())) && !autoApprove { - return fmt.Errorf("please specify --auto-approve to skip interactive confirmation checks for non tty consoles") + return errors.New("please specify --auto-approve to skip interactive confirmation checks for non tty consoles") } // Check auto-approve is selected for json logging logger, ok := cmdio.FromContext(ctx) if !ok { - return fmt.Errorf("progress logger not found") + return errors.New("progress logger not found") } if logger.Mode == flags.ModeJson && !autoApprove { - return fmt.Errorf("please specify --auto-approve since selected logging format is json") + return errors.New("please specify --auto-approve since selected logging format is json") } diags = bundle.Apply(ctx, b, bundle.Seq( diff --git a/cmd/bundle/generate.go b/cmd/bundle/generate.go index 7dea19ff9..d09c6feb4 100644 --- a/cmd/bundle/generate.go +++ b/cmd/bundle/generate.go @@ -17,6 +17,7 @@ func newGenerateCommand() *cobra.Command { cmd.AddCommand(generate.NewGenerateJobCommand()) cmd.AddCommand(generate.NewGeneratePipelineCommand()) cmd.AddCommand(generate.NewGenerateDashboardCommand()) + cmd.AddCommand(generate.NewGenerateAppCommand()) cmd.PersistentFlags().StringVar(&key, "key", "", `resource key to use for the generated configuration`) return cmd } diff --git a/cmd/bundle/generate/app.go b/cmd/bundle/generate/app.go new file mode 100644 index 000000000..9dbd4fe46 --- /dev/null +++ b/cmd/bundle/generate/app.go @@ -0,0 +1,166 @@ +package generate + +import ( + "context" + "errors" + "fmt" + "io" + "io/fs" + "path/filepath" + + "github.com/databricks/cli/bundle/config/generate" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/yamlsaver" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/textutil" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/spf13/cobra" + + "gopkg.in/yaml.v3" +) + +func NewGenerateAppCommand() *cobra.Command { + var configDir string + var sourceDir string + var appName string + var force bool + + cmd := &cobra.Command{ + Use: "app", + Short: "Generate bundle configuration for a Databricks app", + } + + cmd.Flags().StringVar(&appName, "existing-app-name", "", `App name to generate config for`) + cmd.MarkFlagRequired("existing-app-name") + + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Directory path where the output bundle config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src/app", `Directory path where the app files will be stored`) + cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) + + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + b, diags := root.MustConfigureBundle(cmd) + if err := diags.Error(); err != nil { + return diags.Error() + } + + w := b.WorkspaceClient() + cmdio.LogString(ctx, fmt.Sprintf("Loading app '%s' configuration", appName)) + app, err := w.Apps.Get(ctx, apps.GetAppRequest{Name: appName}) + if err != nil { + return err + } + + // Making sure the config directory and source directory are absolute paths. + if !filepath.IsAbs(configDir) { + configDir = filepath.Join(b.BundleRootPath, configDir) + } + + if !filepath.IsAbs(sourceDir) { + sourceDir = filepath.Join(b.BundleRootPath, sourceDir) + } + + downloader := newDownloader(w, sourceDir, configDir) + + sourceCodePath := app.DefaultSourceCodePath + err = downloader.markDirectoryForDownload(ctx, &sourceCodePath) + if err != nil { + return err + } + + appConfig, err := getAppConfig(ctx, app, w) + if err != nil { + return fmt.Errorf("failed to get app config: %w", err) + } + + // Making sure the source code path is relative to the config directory. + rel, err := filepath.Rel(configDir, sourceDir) + if err != nil { + return err + } + + v, err := generate.ConvertAppToValue(app, filepath.ToSlash(rel), appConfig) + if err != nil { + return err + } + + appKey := cmd.Flag("key").Value.String() + if appKey == "" { + appKey = textutil.NormalizeString(app.Name) + } + + result := map[string]dyn.Value{ + "resources": dyn.V(map[string]dyn.Value{ + "apps": dyn.V(map[string]dyn.Value{ + appKey: v, + }), + }), + } + + // If there are app.yaml or app.yml files in the source code path, they will be downloaded but we don't want to include them in the bundle. + // We include this configuration inline, so we need to remove these files. + for _, configFile := range []string{"app.yml", "app.yaml"} { + delete(downloader.files, filepath.Join(sourceDir, configFile)) + } + + err = downloader.FlushToDisk(ctx, force) + if err != nil { + return err + } + + filename := filepath.Join(configDir, appKey+".app.yml") + + saver := yamlsaver.NewSaver() + err = saver.SaveAsYAML(result, filename, force) + if err != nil { + return err + } + + cmdio.LogString(ctx, "App configuration successfully saved to "+filename) + return nil + } + + return cmd +} + +func getAppConfig(ctx context.Context, app *apps.App, w *databricks.WorkspaceClient) (map[string]any, error) { + sourceCodePath := app.DefaultSourceCodePath + + f, err := filer.NewWorkspaceFilesClient(w, sourceCodePath) + if err != nil { + return nil, err + } + + // The app config is stored in app.yml or app.yaml file in the source code path. + configFileNames := []string{"app.yml", "app.yaml"} + for _, configFile := range configFileNames { + r, err := f.Read(ctx, configFile) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + continue + } + return nil, err + } + defer r.Close() + + cmdio.LogString(ctx, "Reading app configuration from "+configFile) + content, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + var appConfig map[string]any + err = yaml.Unmarshal(content, &appConfig) + if err != nil { + cmdio.LogString(ctx, fmt.Sprintf("Failed to parse app configuration:\n%s\nerr: %v", string(content), err)) + return nil, nil + } + + return appConfig, nil + } + + return nil, nil +} diff --git a/cmd/bundle/generate/dashboard.go b/cmd/bundle/generate/dashboard.go index f196bbe62..d56d246c2 100644 --- a/cmd/bundle/generate/dashboard.go +++ b/cmd/bundle/generate/dashboard.go @@ -96,7 +96,7 @@ func (d *dashboard) resolveFromPath(ctx context.Context, b *bundle.Bundle) (stri return "", diag.Diagnostics{ { Severity: diag.Error, - Summary: fmt.Sprintf("expected a dashboard, found a %s", found), + Summary: "expected a dashboard, found a " + found, }, } } @@ -188,7 +188,7 @@ func (d *dashboard) saveSerializedDashboard(_ context.Context, b *bundle.Bundle, func (d *dashboard) saveConfiguration(ctx context.Context, b *bundle.Bundle, dashboard *dashboards.Dashboard, key string) error { // Save serialized dashboard definition to the dashboard directory. - dashboardBasename := fmt.Sprintf("%s.lvdash.json", key) + dashboardBasename := key + ".lvdash.json" dashboardPath := filepath.Join(d.dashboardDir, dashboardBasename) err := d.saveSerializedDashboard(ctx, b, dashboard, dashboardPath) if err != nil { @@ -215,7 +215,7 @@ func (d *dashboard) saveConfiguration(ctx context.Context, b *bundle.Bundle, das } // Save the configuration to the resource directory. - resourcePath := filepath.Join(d.resourceDir, fmt.Sprintf("%s.dashboard.yml", key)) + resourcePath := filepath.Join(d.resourceDir, key+".dashboard.yml") saver := yamlsaver.NewSaverWithStyle(map[string]yaml.Style{ "display_name": yaml.DoubleQuotedStyle, }) @@ -441,8 +441,8 @@ func NewGenerateDashboardCommand() *cobra.Command { cmd.Flags().MarkHidden("existing-dashboard-id") // Output flags. - cmd.Flags().StringVarP(&d.resourceDir, "resource-dir", "d", "./resources", `directory to write the configuration to`) - cmd.Flags().StringVarP(&d.dashboardDir, "dashboard-dir", "s", "./src", `directory to write the dashboard representation to`) + cmd.Flags().StringVarP(&d.resourceDir, "resource-dir", "d", "resources", `directory to write the configuration to`) + cmd.Flags().StringVarP(&d.dashboardDir, "dashboard-dir", "s", "src", `directory to write the dashboard representation to`) cmd.Flags().BoolVarP(&d.force, "force", "f", false, `force overwrite existing files in the output directory`) // Exactly one of the lookup flags must be provided. diff --git a/cmd/bundle/generate/dashboard_test.go b/cmd/bundle/generate/dashboard_test.go index f1161950b..33a463ea0 100644 --- a/cmd/bundle/generate/dashboard_test.go +++ b/cmd/bundle/generate/dashboard_test.go @@ -44,7 +44,7 @@ func TestDashboard_ErrorOnLegacyDashboard(t *testing.T) { _, diags := d.resolveID(ctx, b) require.Len(t, diags, 1) - assert.Equal(t, diags[0].Summary, "dashboard \"legacy dashboard\" is a legacy dashboard") + assert.Equal(t, "dashboard \"legacy dashboard\" is a legacy dashboard", diags[0].Summary) } func TestDashboard_ExistingID_Nominal(t *testing.T) { diff --git a/cmd/bundle/generate/generate_test.go b/cmd/bundle/generate/generate_test.go index 896b7de51..395d4ebd4 100644 --- a/cmd/bundle/generate/generate_test.go +++ b/cmd/bundle/generate/generate_test.go @@ -3,7 +3,6 @@ package generate import ( "bytes" "context" - "errors" "fmt" "io" "io/fs" @@ -302,7 +301,7 @@ func TestGenerateJobCommandOldFileRename(t *testing.T) { // Make sure file do not exists after the run _, err = os.Stat(oldFilename) - require.True(t, errors.Is(err, fs.ErrNotExist)) + require.ErrorIs(t, err, fs.ErrNotExist) data, err := os.ReadFile(filepath.Join(configDir, "test_job.job.yml")) require.NoError(t, err) diff --git a/cmd/bundle/generate/job.go b/cmd/bundle/generate/job.go index 9ac41e3cb..d97891cd5 100644 --- a/cmd/bundle/generate/job.go +++ b/cmd/bundle/generate/job.go @@ -32,13 +32,8 @@ func NewGenerateJobCommand() *cobra.Command { cmd.Flags().Int64Var(&jobId, "existing-job-id", 0, `Job ID of the job to generate config for`) cmd.MarkFlagRequired("existing-job-id") - wd, err := os.Getwd() - if err != nil { - wd = "." - } - - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join(wd, "resources"), `Dir path where the output config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join(wd, "src"), `Dir path where the downloaded files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Dir path where the output config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src", `Dir path where the downloaded files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { @@ -85,8 +80,8 @@ func NewGenerateJobCommand() *cobra.Command { return err } - oldFilename := filepath.Join(configDir, fmt.Sprintf("%s.yml", jobKey)) - filename := filepath.Join(configDir, fmt.Sprintf("%s.job.yml", jobKey)) + oldFilename := filepath.Join(configDir, jobKey+".yml") + filename := filepath.Join(configDir, jobKey+".job.yml") // User might continuously run generate command to update their bundle jobs with any changes made in Databricks UI. // Due to changing in the generated file names, we need to first rename existing resource file to the new name. @@ -107,7 +102,7 @@ func NewGenerateJobCommand() *cobra.Command { return err } - cmdio.LogString(ctx, fmt.Sprintf("Job configuration successfully saved to %s", filename)) + cmdio.LogString(ctx, "Job configuration successfully saved to "+filename) return nil } diff --git a/cmd/bundle/generate/pipeline.go b/cmd/bundle/generate/pipeline.go index 910baa45f..1d2c345d6 100644 --- a/cmd/bundle/generate/pipeline.go +++ b/cmd/bundle/generate/pipeline.go @@ -32,13 +32,8 @@ func NewGeneratePipelineCommand() *cobra.Command { cmd.Flags().StringVar(&pipelineId, "existing-pipeline-id", "", `ID of the pipeline to generate config for`) cmd.MarkFlagRequired("existing-pipeline-id") - wd, err := os.Getwd() - if err != nil { - wd = "." - } - - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join(wd, "resources"), `Dir path where the output config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join(wd, "src"), `Dir path where the downloaded files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Dir path where the output config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src", `Dir path where the downloaded files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { @@ -85,8 +80,8 @@ func NewGeneratePipelineCommand() *cobra.Command { return err } - oldFilename := filepath.Join(configDir, fmt.Sprintf("%s.yml", pipelineKey)) - filename := filepath.Join(configDir, fmt.Sprintf("%s.pipeline.yml", pipelineKey)) + oldFilename := filepath.Join(configDir, pipelineKey+".yml") + filename := filepath.Join(configDir, pipelineKey+".pipeline.yml") // User might continuously run generate command to update their bundle jobs with any changes made in Databricks UI. // Due to changing in the generated file names, we need to first rename existing resource file to the new name. @@ -109,7 +104,7 @@ func NewGeneratePipelineCommand() *cobra.Command { return err } - cmdio.LogString(ctx, fmt.Sprintf("Pipeline configuration successfully saved to %s", filename)) + cmdio.LogString(ctx, "Pipeline configuration successfully saved to "+filename) return nil } diff --git a/cmd/bundle/generate/utils.go b/cmd/bundle/generate/utils.go index 8e3764e35..c2c9bbb55 100644 --- a/cmd/bundle/generate/utils.go +++ b/cmd/bundle/generate/utils.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/databricks/databricks-sdk-go/service/workspace" "golang.org/x/sync/errgroup" ) @@ -63,6 +64,37 @@ func (n *downloader) markFileForDownload(ctx context.Context, filePath *string) return nil } +func (n *downloader) markDirectoryForDownload(ctx context.Context, dirPath *string) error { + _, err := n.w.Workspace.GetStatusByPath(ctx, *dirPath) + if err != nil { + return err + } + + objects, err := n.w.Workspace.RecursiveList(ctx, *dirPath) + if err != nil { + return err + } + + for _, obj := range objects { + if obj.ObjectType == workspace.ObjectTypeDirectory { + continue + } + + err := n.markFileForDownload(ctx, &obj.Path) + if err != nil { + return err + } + } + + rel, err := filepath.Rel(n.configDir, n.sourceDir) + if err != nil { + return err + } + + *dirPath = rel + return nil +} + func (n *downloader) markNotebookForDownload(ctx context.Context, notebookPath *string) error { info, err := n.w.Workspace.GetStatusByPath(ctx, *notebookPath) if err != nil { @@ -106,9 +138,7 @@ func (n *downloader) FlushToDisk(ctx context.Context, force bool) error { } errs, errCtx := errgroup.WithContext(ctx) - for k, v := range n.files { - targetPath := k - filePath := v + for targetPath, filePath := range n.files { errs.Go(func() error { reader, err := n.w.Workspace.Download(errCtx, filePath) if err != nil { @@ -126,7 +156,7 @@ func (n *downloader) FlushToDisk(ctx context.Context, force bool) error { return err } - cmdio.LogString(errCtx, fmt.Sprintf("File successfully saved to %s", targetPath)) + cmdio.LogString(errCtx, "File successfully saved to "+targetPath) return reader.Close() }) } diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 687c141ec..1911abe19 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -1,175 +1,15 @@ package bundle import ( - "context" "errors" "fmt" - "io/fs" - "os" - "path/filepath" - "slices" - "strings" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/dbr" - "github.com/databricks/cli/libs/filer" - "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) -var gitUrlPrefixes = []string{ - "https://", - "git@", -} - -type nativeTemplate struct { - name string - gitUrl string - description string - aliases []string - hidden bool -} - -const customTemplate = "custom..." - -var nativeTemplates = []nativeTemplate{ - { - name: "default-python", - description: "The default Python template for Notebooks / Delta Live Tables / Workflows", - }, - { - name: "default-sql", - description: "The default SQL template for .sql files that run with Databricks SQL", - }, - { - name: "dbt-sql", - description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", - }, - { - name: "mlops-stacks", - gitUrl: "https://github.com/databricks/mlops-stacks", - description: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)", - aliases: []string{"mlops-stack"}, - }, - { - name: "default-pydabs", - gitUrl: "https://databricks.github.io/workflows-authoring-toolkit/pydabs-template.git", - hidden: true, - description: "The default PyDABs template", - }, - { - name: customTemplate, - description: "Bring your own template", - }, -} - -// Return template descriptions for command-line help -func nativeTemplateHelpDescriptions() string { - var lines []string - for _, template := range nativeTemplates { - if template.name != customTemplate && !template.hidden { - lines = append(lines, fmt.Sprintf("- %s: %s", template.name, template.description)) - } - } - return strings.Join(lines, "\n") -} - -// Return template options for an interactive prompt -func nativeTemplateOptions() []cmdio.Tuple { - names := make([]cmdio.Tuple, 0, len(nativeTemplates)) - for _, template := range nativeTemplates { - if template.hidden { - continue - } - tuple := cmdio.Tuple{ - Name: template.name, - Id: template.description, - } - names = append(names, tuple) - } - return names -} - -func getNativeTemplateByDescription(description string) string { - for _, template := range nativeTemplates { - if template.description == description { - return template.name - } - } - return "" -} - -func getUrlForNativeTemplate(name string) string { - for _, template := range nativeTemplates { - if template.name == name { - return template.gitUrl - } - if slices.Contains(template.aliases, name) { - return template.gitUrl - } - } - return "" -} - -func getFsForNativeTemplate(name string) (fs.FS, error) { - builtin, err := template.Builtin() - if err != nil { - return nil, err - } - - // If this is a built-in template, the return value will be non-nil. - var templateFS fs.FS - for _, entry := range builtin { - if entry.Name == name { - templateFS = entry.FS - break - } - } - - return templateFS, nil -} - -func isRepoUrl(url string) bool { - result := false - for _, prefix := range gitUrlPrefixes { - if strings.HasPrefix(url, prefix) { - result = true - break - } - } - return result -} - -// Computes the repo name from the repo URL. Treats the last non empty word -// when splitting at '/' as the repo name. For example: for url git@github.com:databricks/cli.git -// the name would be "cli.git" -func repoName(url string) string { - parts := strings.Split(strings.TrimRight(url, "/"), "/") - return parts[len(parts)-1] -} - -func constructOutputFiler(ctx context.Context, outputDir string) (filer.Filer, error) { - outputDir, err := filepath.Abs(outputDir) - if err != nil { - return nil, err - } - - // If the CLI is running on DBR and we're writing to the workspace file system, - // use the extension-aware workspace filesystem filer to instantiate the template. - // - // It is not possible to write notebooks through the workspace filesystem's FUSE mount. - // Therefore this is the only way we can initialize templates that contain notebooks - // when running the CLI on DBR and initializing a template to the workspace. - // - if strings.HasPrefix(outputDir, "/Workspace/") && dbr.RunsOnRuntime(ctx) { - return filer.NewWorkspaceFilesExtensionsClient(root.WorkspaceClient(ctx), outputDir) - } - - return filer.NewLocalClient(outputDir) -} - func newInitCommand() *cobra.Command { cmd := &cobra.Command{ Use: "init [TEMPLATE_PATH]", @@ -182,7 +22,7 @@ TEMPLATE_PATH optionally specifies which template to use. It can be one of the f - a local file system path with a template directory - a Git repository URL, e.g. https://github.com/my/repository -See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, nativeTemplateHelpDescriptions()), +See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, template.HelpDescriptions()), } var configFile string @@ -202,88 +42,32 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf return errors.New("only one of --tag or --branch can be specified") } - // Git ref to use for template initialization - ref := branch - if tag != "" { - ref = tag + var templatePathOrUrl string + if len(args) > 0 { + templatePathOrUrl = args[0] + } + r := template.Resolver{ + TemplatePathOrUrl: templatePathOrUrl, + ConfigFile: configFile, + OutputDir: outputDir, + TemplateDir: templateDir, + Tag: tag, + Branch: branch, } ctx := cmd.Context() - var templatePath string - if len(args) > 0 { - templatePath = args[0] - } else { - var err error - if !cmdio.IsPromptSupported(ctx) { - return errors.New("please specify a template") - } - description, err := cmdio.SelectOrdered(ctx, nativeTemplateOptions(), "Template to use") - if err != nil { - return err - } - templatePath = getNativeTemplateByDescription(description) - } - - outputFiler, err := constructOutputFiler(ctx, outputDir) - if err != nil { - return err - } - - if templatePath == customTemplate { + tmpl, err := r.Resolve(ctx) + if errors.Is(err, template.ErrCustomSelected) { cmdio.LogString(ctx, "Please specify a path or Git repository to use a custom template.") cmdio.LogString(ctx, "See https://docs.databricks.com/en/dev-tools/bundles/templates.html to learn more about custom templates.") return nil } - - // Expand templatePath to a git URL if it's an alias for a known native template - // and we know it's git URL. - if gitUrl := getUrlForNativeTemplate(templatePath); gitUrl != "" { - templatePath = gitUrl - } - - if !isRepoUrl(templatePath) { - if templateDir != "" { - return errors.New("--template-dir can only be used with a Git repository URL") - } - - templateFS, err := getFsForNativeTemplate(templatePath) - if err != nil { - return err - } - - // If this is not a built-in template, then it must be a local file system path. - if templateFS == nil { - templateFS = os.DirFS(templatePath) - } - - // skip downloading the repo because input arg is not a URL. We assume - // it's a path on the local file system in that case - return template.Materialize(ctx, configFile, templateFS, outputFiler) - } - - // Create a temporary directory with the name of the repository. The '*' - // character is replaced by a random string in the generated temporary directory. - repoDir, err := os.MkdirTemp("", repoName(templatePath)+"-*") if err != nil { return err } + defer tmpl.Reader.Cleanup(ctx) - // start the spinner - promptSpinner := cmdio.Spinner(ctx) - promptSpinner <- "Downloading the template\n" - - // TODO: Add automated test that the downloaded git repo is cleaned up. - // Clone the repository in the temporary directory - err = git.Clone(ctx, templatePath, ref, repoDir) - close(promptSpinner) - if err != nil { - return err - } - - // Clean up downloaded repository once the template is materialized. - defer os.RemoveAll(repoDir) - templateFS := os.DirFS(filepath.Join(repoDir, templateDir)) - return template.Materialize(ctx, configFile, templateFS, outputFiler) + return tmpl.Writer.Materialize(ctx, tmpl.Reader) } return cmd } diff --git a/cmd/bundle/launch.go b/cmd/bundle/launch.go index 0d2b4233b..3fea839c9 100644 --- a/cmd/bundle/launch.go +++ b/cmd/bundle/launch.go @@ -1,7 +1,7 @@ package bundle import ( - "fmt" + "errors" "github.com/databricks/cli/cmd/root" "github.com/spf13/cobra" @@ -19,7 +19,7 @@ func newLaunchCommand() *cobra.Command { } cmd.RunE = func(cmd *cobra.Command, args []string) error { - return fmt.Errorf("TODO") + return errors.New("TODO") // contents, err := os.ReadFile(args[0]) // if err != nil { // return err diff --git a/cmd/bundle/open.go b/cmd/bundle/open.go index a2ad32fd8..5a26e1ea7 100644 --- a/cmd/bundle/open.go +++ b/cmd/bundle/open.go @@ -44,7 +44,7 @@ func resolveOpenArgument(ctx context.Context, b *bundle.Bundle, args []string) ( } if len(args) < 1 { - return "", fmt.Errorf("expected a KEY of the resource to open") + return "", errors.New("expected a KEY of the resource to open") } return args[0], nil @@ -113,7 +113,7 @@ func newOpenCommand() *cobra.Command { // Confirm that the resource has a URL. url := ref.Resource.GetURL() if url == "" { - return fmt.Errorf("resource does not have a URL associated with it (has it been deployed?)") + return errors.New("resource does not have a URL associated with it (has it been deployed?)") } return browser.OpenURL(url) diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 3bcebddd5..df35d7222 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -3,6 +3,7 @@ package bundle import ( "context" "encoding/json" + "errors" "fmt" "github.com/databricks/cli/bundle" @@ -48,7 +49,7 @@ func resolveRunArgument(ctx context.Context, b *bundle.Bundle, args []string) (s } if len(args) < 1 { - return "", nil, fmt.Errorf("expected a KEY of the resource to run") + return "", nil, errors.New("expected a KEY of the resource to run") } return args[0], args[1:], nil diff --git a/cmd/bundle/test.go b/cmd/bundle/test.go index 4d30e727d..794575220 100644 --- a/cmd/bundle/test.go +++ b/cmd/bundle/test.go @@ -1,7 +1,7 @@ package bundle import ( - "fmt" + "errors" "github.com/spf13/cobra" ) @@ -17,7 +17,7 @@ func newTestCommand() *cobra.Command { } cmd.RunE = func(cmd *cobra.Command, args []string) error { - return fmt.Errorf("TODO") + return errors.New("TODO") // results := project.RunPythonOnDev(cmd.Context(), `return 1`) // if results.Failed() { // return results.Err() diff --git a/cmd/bundle/validate.go b/cmd/bundle/validate.go index 3b50cc258..41fa87f30 100644 --- a/cmd/bundle/validate.go +++ b/cmd/bundle/validate.go @@ -2,6 +2,7 @@ package bundle import ( "encoding/json" + "errors" "fmt" "github.com/databricks/cli/bundle" @@ -10,18 +11,17 @@ import ( "github.com/databricks/cli/bundle/render" "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" - "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/flags" "github.com/spf13/cobra" ) -func renderJsonOutput(cmd *cobra.Command, b *bundle.Bundle, diags diag.Diagnostics) error { +func renderJsonOutput(cmd *cobra.Command, b *bundle.Bundle) error { buf, err := json.MarshalIndent(b.Config.Value().AsAny(), "", " ") if err != nil { return err } _, _ = cmd.OutOrStdout().Write(buf) - return diags.Error() + return nil } func newValidateCommand() *cobra.Command { @@ -39,7 +39,7 @@ func newValidateCommand() *cobra.Command { if err := diags.Error(); err != nil { return diags.Error() } else { - return fmt.Errorf("invariant failed: returned bundle is nil") + return errors.New("invariant failed: returned bundle is nil") } } @@ -65,7 +65,23 @@ func newValidateCommand() *cobra.Command { return nil case flags.OutputJSON: - return renderJsonOutput(cmd, b, diags) + renderOpts := render.RenderOptions{RenderSummaryTable: false} + err1 := render.RenderDiagnostics(cmd.ErrOrStderr(), b, diags, renderOpts) + err2 := renderJsonOutput(cmd, b) + + if err2 != nil { + return err2 + } + + if err1 != nil { + return err1 + } + + if diags.HasError() { + return root.ErrAlreadyPrinted + } + + return nil default: return fmt.Errorf("unknown output type %s", root.OutputType(cmd)) } diff --git a/cmd/cmd.go b/cmd/cmd.go index 5b53a4ae5..5d835409f 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -15,7 +15,6 @@ import ( "github.com/databricks/cli/cmd/sync" "github.com/databricks/cli/cmd/version" "github.com/databricks/cli/cmd/workspace" - "github.com/databricks/cli/cmd/workspace/apps" "github.com/spf13/cobra" ) @@ -68,7 +67,6 @@ func New(ctx context.Context) *cobra.Command { // Add other subcommands. cli.AddCommand(api.New()) - cli.AddCommand(apps.New()) cli.AddCommand(auth.New()) cli.AddCommand(bundle.New()) cli.AddCommand(configure.New()) diff --git a/cmd/configure/configure.go b/cmd/configure/configure.go index 895a5902c..4a6568d06 100644 --- a/cmd/configure/configure.go +++ b/cmd/configure/configure.go @@ -1,6 +1,7 @@ package configure import ( + "errors" "fmt" "github.com/databricks/cli/libs/cmdio" @@ -62,12 +63,12 @@ func configureInteractive(cmd *cobra.Command, flags *configureFlags, cfg *config func configureNonInteractive(cmd *cobra.Command, flags *configureFlags, cfg *config.Config) error { if cfg.Host == "" { - return fmt.Errorf("host must be set in non-interactive mode") + return errors.New("host must be set in non-interactive mode") } // Check presence of cluster ID before reading token to fail fast. if flags.ConfigureCluster && cfg.ClusterID == "" { - return fmt.Errorf("cluster ID must be set in non-interactive mode") + return errors.New("cluster ID must be set in non-interactive mode") } // Read token from stdin if not already set. diff --git a/cmd/configure/configure_test.go b/cmd/configure/configure_test.go index e2f6c1e29..14eb0674a 100644 --- a/cmd/configure/configure_test.go +++ b/cmd/configure/configure_test.go @@ -148,9 +148,9 @@ func TestEnvVarsConfigureNoInteractive(t *testing.T) { // We should only save host and token for a profile, other env variables should not be saved _, err = defaultSection.GetKey("auth_type") - assert.NotNil(t, err) + assert.Error(t, err) _, err = defaultSection.GetKey("metadata_service_url") - assert.NotNil(t, err) + assert.Error(t, err) } func TestEnvVarsConfigureNoArgsNoInteractive(t *testing.T) { diff --git a/cmd/configure/host.go b/cmd/configure/host.go index 781c12387..0a454c6d1 100644 --- a/cmd/configure/host.go +++ b/cmd/configure/host.go @@ -1,7 +1,7 @@ package configure import ( - "fmt" + "errors" "net/url" ) @@ -11,10 +11,10 @@ func validateHost(s string) error { return err } if u.Host == "" || u.Scheme != "https" { - return fmt.Errorf("must start with https://") + return errors.New("must start with https://") } if u.Path != "" && u.Path != "/" { - return fmt.Errorf("must use empty path") + return errors.New("must use empty path") } return nil } diff --git a/cmd/labs/github/ref_test.go b/cmd/labs/github/ref_test.go index cc27d1e81..9668cd7ec 100644 --- a/cmd/labs/github/ref_test.go +++ b/cmd/labs/github/ref_test.go @@ -7,14 +7,15 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestFileFromRef(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/databrickslabs/ucx/main/README.md" { _, err := w.Write([]byte(`abc`)) - require.NoError(t, err) + if !assert.NoError(t, err) { + return + } return } t.Logf("Requested: %s", r.URL.Path) @@ -34,7 +35,9 @@ func TestDownloadZipball(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/repos/databrickslabs/ucx/zipball/main" { _, err := w.Write([]byte(`abc`)) - require.NoError(t, err) + if !assert.NoError(t, err) { + return + } return } t.Logf("Requested: %s", r.URL.Path) diff --git a/cmd/labs/github/releases_test.go b/cmd/labs/github/releases_test.go index 9c3d7a959..93ac33aee 100644 --- a/cmd/labs/github/releases_test.go +++ b/cmd/labs/github/releases_test.go @@ -7,14 +7,15 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestLoadsReleasesForCLI(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/repos/databricks/cli/releases" { _, err := w.Write([]byte(`[{"tag_name": "v1.2.3"}, {"tag_name": "v1.2.2"}]`)) - require.NoError(t, err) + if !assert.NoError(t, err) { + return + } return } t.Logf("Requested: %s", r.URL.Path) diff --git a/cmd/labs/github/repositories.go b/cmd/labs/github/repositories.go index 850cdb1cb..afdf7aeb2 100644 --- a/cmd/labs/github/repositories.go +++ b/cmd/labs/github/repositories.go @@ -12,7 +12,7 @@ import ( const repositoryCacheTTL = 24 * time.Hour func NewRepositoryCache(org, cacheDir string) *repositoryCache { - filename := fmt.Sprintf("%s-repositories", org) + filename := org + "-repositories" return &repositoryCache{ cache: localcache.NewLocalCache[Repositories](cacheDir, filename, repositoryCacheTTL), Org: org, diff --git a/cmd/labs/github/repositories_test.go b/cmd/labs/github/repositories_test.go index 412b440bc..29ec2ce03 100644 --- a/cmd/labs/github/repositories_test.go +++ b/cmd/labs/github/repositories_test.go @@ -7,14 +7,13 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestRepositories(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/users/databrickslabs/repos" { _, err := w.Write([]byte(`[{"name": "x"}]`)) - require.NoError(t, err) + assert.NoError(t, err) return } t.Logf("Requested: %s", r.URL.Path) @@ -28,5 +27,5 @@ func TestRepositories(t *testing.T) { r := NewRepositoryCache("databrickslabs", t.TempDir()) all, err := r.Load(ctx) assert.NoError(t, err) - assert.True(t, len(all) > 0) + assert.NotEmpty(t, all) } diff --git a/cmd/labs/installed.go b/cmd/labs/installed.go index e4249c9ff..9982cc1f0 100644 --- a/cmd/labs/installed.go +++ b/cmd/labs/installed.go @@ -1,6 +1,7 @@ package labs import ( + "errors" "fmt" "github.com/databricks/cli/cmd/labs/project" @@ -49,7 +50,7 @@ func newInstalledCommand() *cobra.Command { }) } if len(info.Projects) == 0 { - return fmt.Errorf("no projects installed") + return errors.New("no projects installed") } return cmdio.Render(ctx, info) }, diff --git a/cmd/labs/localcache/jsonfile.go b/cmd/labs/localcache/jsonfile.go index 6540e4ac2..50ed372f5 100644 --- a/cmd/labs/localcache/jsonfile.go +++ b/cmd/labs/localcache/jsonfile.go @@ -93,7 +93,7 @@ func (r *LocalCache[T]) writeCache(ctx context.Context, data T) (T, error) { } func (r *LocalCache[T]) FileName() string { - return filepath.Join(r.dir, fmt.Sprintf("%s.json", r.name)) + return filepath.Join(r.dir, r.name+".json") } func (r *LocalCache[T]) loadCache() (*cached[T], error) { diff --git a/cmd/labs/localcache/jsonfile_test.go b/cmd/labs/localcache/jsonfile_test.go index 0d852174c..8172b7d14 100644 --- a/cmd/labs/localcache/jsonfile_test.go +++ b/cmd/labs/localcache/jsonfile_test.go @@ -3,7 +3,6 @@ package localcache import ( "context" "errors" - "fmt" "net/url" "runtime" "testing" @@ -22,7 +21,7 @@ func TestCreatesDirectoryIfNeeded(t *testing.T) { } first, err := c.Load(ctx, tick) assert.NoError(t, err) - assert.Equal(t, first, int64(1)) + assert.Equal(t, int64(1), first) } func TestImpossibleToCreateDir(t *testing.T) { @@ -115,7 +114,7 @@ func TestFolderDisappears(t *testing.T) { func TestRefreshFails(t *testing.T) { c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) tick := func() (int64, error) { - return 0, fmt.Errorf("nope") + return 0, errors.New("nope") } ctx := context.Background() _, err := c.Load(ctx, tick) diff --git a/cmd/labs/project/installer.go b/cmd/labs/project/installer.go index 041415964..7d31623bb 100644 --- a/cmd/labs/project/installer.go +++ b/cmd/labs/project/installer.go @@ -175,7 +175,7 @@ func (i *installer) login(ctx context.Context) (*databricks.WorkspaceClient, err return nil, fmt.Errorf("valid: %w", err) } if !i.HasAccountLevelCommands() && cfg.IsAccountClient() { - return nil, fmt.Errorf("got account-level client, but no account-level commands") + return nil, errors.New("got account-level client, but no account-level commands") } lc := &loginConfig{Entrypoint: i.Installer.Entrypoint} w, err := lc.askWorkspace(ctx, cfg) @@ -200,10 +200,10 @@ func (i *installer) downloadLibrary(ctx context.Context) error { libTarget := i.LibDir() // we may support wheels, jars, and golang binaries. but those are not zipballs if i.IsZipball() { - feedback <- fmt.Sprintf("Downloading and unpacking zipball for %s", i.version) + feedback <- "Downloading and unpacking zipball for " + i.version return i.downloadAndUnpackZipball(ctx, libTarget) } - return fmt.Errorf("we only support zipballs for now") + return errors.New("we only support zipballs for now") } func (i *installer) downloadAndUnpackZipball(ctx context.Context, libTarget string) error { @@ -234,7 +234,7 @@ func (i *installer) setupPythonVirtualEnvironment(ctx context.Context, w *databr log.Debugf(ctx, "Detected Python %s at: %s", py.Version, py.Path) venvPath := i.virtualEnvPath(ctx) log.Debugf(ctx, "Creating Python Virtual Environment at: %s", venvPath) - feedback <- fmt.Sprintf("Creating Virtual Environment with Python %s", py.Version) + feedback <- "Creating Virtual Environment with Python " + py.Version _, err = process.Background(ctx, []string{py.Path, "-m", "venv", venvPath}) if err != nil { return fmt.Errorf("create venv: %w", err) @@ -251,8 +251,8 @@ func (i *installer) setupPythonVirtualEnvironment(ctx context.Context, w *databr if !ok { return fmt.Errorf("unsupported runtime: %s", cluster.SparkVersion) } - feedback <- fmt.Sprintf("Installing Databricks Connect v%s", runtimeVersion) - pipSpec := fmt.Sprintf("databricks-connect==%s", runtimeVersion) + feedback <- "Installing Databricks Connect v" + runtimeVersion + pipSpec := "databricks-connect==" + runtimeVersion err = i.installPythonDependencies(ctx, pipSpec) if err != nil { return fmt.Errorf("dbconnect: %w", err) diff --git a/cmd/labs/project/installer_test.go b/cmd/labs/project/installer_test.go index a69389b31..a01ba864a 100644 --- a/cmd/labs/project/installer_test.go +++ b/cmd/labs/project/installer_test.go @@ -26,6 +26,7 @@ import ( "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/sql" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -169,17 +170,17 @@ func TestInstallerWorksForReleases(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/databrickslabs/blueprint/v0.3.15/labs.yml" { raw, err := os.ReadFile("testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml") - require.NoError(t, err) + assert.NoError(t, err) _, err = w.Write(raw) - require.NoError(t, err) + assert.NoError(t, err) return } if r.URL.Path == "/repos/databrickslabs/blueprint/zipball/v0.3.15" { raw, err := zipballFromFolder("testdata/installed-in-home/.databricks/labs/blueprint/lib") - require.NoError(t, err) + assert.NoError(t, err) w.Header().Add("Content-Type", "application/octet-stream") _, err = w.Write(raw) - require.NoError(t, err) + assert.NoError(t, err) return } if r.URL.Path == "/api/2.1/clusters/get" { @@ -376,17 +377,17 @@ func TestUpgraderWorksForReleases(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path == "/databrickslabs/blueprint/v0.4.0/labs.yml" { raw, err := os.ReadFile("testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml") - require.NoError(t, err) + assert.NoError(t, err) _, err = w.Write(raw) - require.NoError(t, err) + assert.NoError(t, err) return } if r.URL.Path == "/repos/databrickslabs/blueprint/zipball/v0.4.0" { raw, err := zipballFromFolder("testdata/installed-in-home/.databricks/labs/blueprint/lib") - require.NoError(t, err) + assert.NoError(t, err) w.Header().Add("Content-Type", "application/octet-stream") _, err = w.Write(raw) - require.NoError(t, err) + assert.NoError(t, err) return } if r.URL.Path == "/api/2.1/clusters/get" { diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py index 6873257d5..a162da342 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py @@ -1 +1 @@ -print(f'setting up important infrastructure') +print(f"setting up important infrastructure") diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py index 769ee73ee..e5866d6ae 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py @@ -2,26 +2,34 @@ import os, sys, json payload = json.loads(sys.argv[1]) -if 'echo' == payload['command']: - json.dump({ - 'command': payload['command'], - 'flags': payload['flags'], - 'env': {k:v for k,v in os.environ.items()} - }, sys.stdout) +if "echo" == payload["command"]: + json.dump( + { + "command": payload["command"], + "flags": payload["flags"], + "env": {k: v for k, v in os.environ.items()}, + }, + sys.stdout, + ) sys.exit(0) -if 'table' == payload['command']: +if "table" == payload["command"]: sys.stderr.write("some intermediate info\n") - json.dump({'records': [ - {'key': 'First', 'value': 'Second'}, - {'key': 'Third', 'value': 'Fourth'}, - ]}, sys.stdout) + json.dump( + { + "records": [ + {"key": "First", "value": "Second"}, + {"key": "Third", "value": "Fourth"}, + ] + }, + sys.stdout, + ) sys.exit(0) -print(f'host is {os.environ["DATABRICKS_HOST"]}') +print(f"host is {os.environ['DATABRICKS_HOST']}") -print(f'[{payload["command"]}] command flags are {payload["flags"]}') +print(f"[{payload['command']}] command flags are {payload['flags']}") -answer = input('What is your name? ') +answer = input("What is your name? ") -print(f'Hello, {answer}!') +print(f"Hello, {answer}!") diff --git a/cmd/labs/show.go b/cmd/labs/show.go index c36f0bda3..e8c876d8b 100644 --- a/cmd/labs/show.go +++ b/cmd/labs/show.go @@ -1,7 +1,7 @@ package labs import ( - "fmt" + "errors" "github.com/databricks/cli/cmd/labs/project" "github.com/databricks/cli/cmd/root" @@ -34,7 +34,7 @@ func newShowCommand() *cobra.Command { return err } if len(installed) == 0 { - return fmt.Errorf("no projects found") + return errors.New("no projects found") } name := args[0] for _, v := range installed { diff --git a/cmd/root/auth.go b/cmd/root/auth.go index 07ab48399..49abfd414 100644 --- a/cmd/root/auth.go +++ b/cmd/root/auth.go @@ -26,7 +26,7 @@ type ErrNoWorkspaceProfiles struct { } func (e ErrNoWorkspaceProfiles) Error() string { - return fmt.Sprintf("%s does not contain workspace profiles; please create one by running 'databricks configure'", e.path) + return e.path + " does not contain workspace profiles; please create one by running 'databricks configure'" } type ErrNoAccountProfiles struct { @@ -34,7 +34,7 @@ type ErrNoAccountProfiles struct { } func (e ErrNoAccountProfiles) Error() string { - return fmt.Sprintf("%s does not contain account profiles", e.path) + return e.path + " does not contain account profiles" } func initProfileFlag(cmd *cobra.Command) { @@ -253,7 +253,7 @@ func AskForWorkspaceProfile(ctx context.Context) (string, error) { return profiles[0].Name, nil } i, _, err := cmdio.RunSelect(ctx, &promptui.Select{ - Label: fmt.Sprintf("Workspace profiles defined in %s", path), + Label: "Workspace profiles defined in " + path, Items: profiles, Searcher: profiles.SearchCaseInsensitive, StartInSearchMode: true, @@ -287,7 +287,7 @@ func AskForAccountProfile(ctx context.Context) (string, error) { return profiles[0].Name, nil } i, _, err := cmdio.RunSelect(ctx, &promptui.Select{ - Label: fmt.Sprintf("Account profiles defined in %s", path), + Label: "Account profiles defined in " + path, Items: profiles, Searcher: profiles.SearchCaseInsensitive, StartInSearchMode: true, diff --git a/cmd/root/progress_logger.go b/cmd/root/progress_logger.go index 1458de13a..0cc49b2ac 100644 --- a/cmd/root/progress_logger.go +++ b/cmd/root/progress_logger.go @@ -2,7 +2,7 @@ package root import ( "context" - "fmt" + "errors" "os" "github.com/databricks/cli/libs/cmdio" @@ -37,7 +37,7 @@ func (f *progressLoggerFlag) initializeContext(ctx context.Context) (context.Con if f.log.level.String() != "disabled" && f.log.file.String() == "stderr" && f.ProgressLogFormat == flags.ModeInplace { - return nil, fmt.Errorf("inplace progress logging cannot be used when log-file is stderr") + return nil, errors.New("inplace progress logging cannot be used when log-file is stderr") } format := f.ProgressLogFormat diff --git a/cmd/sync/completion.go b/cmd/sync/completion.go index 422147713..5a65dd528 100644 --- a/cmd/sync/completion.go +++ b/cmd/sync/completion.go @@ -2,7 +2,6 @@ package sync import ( "context" - "fmt" "path" "strings" @@ -52,8 +51,8 @@ func completeRemotePath( } prefixes := []string{ - path.Clean(fmt.Sprintf("/Users/%s", me.UserName)) + "/", - path.Clean(fmt.Sprintf("/Repos/%s", me.UserName)) + "/", + path.Clean("/Users/"+me.UserName) + "/", + path.Clean("/Repos/"+me.UserName) + "/", } validPrefix := false diff --git a/cmd/sync/sync.go b/cmd/sync/sync.go index cd2167a19..dea40f96a 100644 --- a/cmd/sync/sync.go +++ b/cmd/sync/sync.go @@ -2,6 +2,7 @@ package sync import ( "context" + "errors" "flag" "fmt" "io" @@ -29,7 +30,7 @@ type syncFlags struct { func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, args []string, b *bundle.Bundle) (*sync.SyncOptions, error) { if len(args) > 0 { - return nil, fmt.Errorf("SRC and DST are not configurable in the context of a bundle") + return nil, errors.New("SRC and DST are not configurable in the context of a bundle") } opts, err := files.GetSyncOptions(cmd.Context(), bundle.ReadOnly(b)) diff --git a/cmd/workspace/apps/apps.go b/cmd/workspace/apps/apps.go index a103ba7a8..f7c08ece1 100755 --- a/cmd/workspace/apps/apps.go +++ b/cmd/workspace/apps/apps.go @@ -78,6 +78,7 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().BoolVar(&createReq.NoCompute, "no-compute", createReq.NoCompute, `If true, the app will not be started after creation.`) // TODO: complex arg: active_deployment // TODO: complex arg: app_status // TODO: complex arg: compute_status diff --git a/cmd/workspace/apps/overrides.go b/cmd/workspace/apps/overrides.go new file mode 100644 index 000000000..e14068717 --- /dev/null +++ b/cmd/workspace/apps/overrides.go @@ -0,0 +1,28 @@ +package apps + +import ( + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/spf13/cobra" +) + +func listOverride(listCmd *cobra.Command, listReq *apps.ListAppsRequest) { + listCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` + {{header "Name"}} {{header "Url"}} {{header "ComputeStatus"}} {{header "DeploymentStatus"}}`) + listCmd.Annotations["template"] = cmdio.Heredoc(` + {{range .}}{{.Name | green}} {{.Url}} {{if .ComputeStatus}}{{if eq .ComputeStatus.State "ACTIVE"}}{{green "%s" .ComputeStatus.State }}{{else}}{{blue "%s" .ComputeStatus.State}}{{end}}{{end}} {{if .ActiveDeployment}}{{if eq .ActiveDeployment.Status.State "SUCCEEDED"}}{{green "%s" .ActiveDeployment.Status.State }}{{else}}{{blue "%s" .ActiveDeployment.Status.State}}{{end}}{{end}} + {{end}}`) +} + +func listDeploymentsOverride(listDeploymentsCmd *cobra.Command, listDeploymentsReq *apps.ListAppDeploymentsRequest) { + listDeploymentsCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` + {{header "DeploymentId"}} {{header "State"}} {{header "CreatedAt"}}`) + listDeploymentsCmd.Annotations["template"] = cmdio.Heredoc(` + {{range .}}{{.DeploymentId}} {{if eq .Status.State "SUCCEEDED"}}{{green "%s" .Status.State }}{{else}}{{blue "%s" .Status.State}}{{end}} {{.CreateTime}} + {{end}}`) +} + +func init() { + listOverrides = append(listOverrides, listOverride) + listDeploymentsOverrides = append(listDeploymentsOverrides, listDeploymentsOverride) +} diff --git a/cmd/workspace/jobs/jobs.go b/cmd/workspace/jobs/jobs.go index b067937e2..38a88f014 100755 --- a/cmd/workspace/jobs/jobs.go +++ b/cmd/workspace/jobs/jobs.go @@ -625,11 +625,19 @@ func newGet() *cobra.Command { // TODO: short flags + cmd.Flags().StringVar(&getReq.PageToken, "page-token", getReq.PageToken, `Use next_page_token returned from the previous GetJob to request the next page of the job's sub-resources.`) + cmd.Use = "get JOB_ID" cmd.Short = `Get a single job.` cmd.Long = `Get a single job. Retrieves the details for a single job. + + In Jobs API 2.2, requests for a single job support pagination of tasks and + job_clusters when either exceeds 100 elements. Use the next_page_token + field to check for more results and pass its value as the page_token in + subsequent requests. Arrays with fewer than 100 elements in a page will be + empty on later pages. Arguments: JOB_ID: The canonical identifier of the job to retrieve information about. This @@ -847,13 +855,19 @@ func newGetRun() *cobra.Command { cmd.Flags().BoolVar(&getRunReq.IncludeHistory, "include-history", getRunReq.IncludeHistory, `Whether to include the repair history in the response.`) cmd.Flags().BoolVar(&getRunReq.IncludeResolvedValues, "include-resolved-values", getRunReq.IncludeResolvedValues, `Whether to include resolved parameter values in the response.`) - cmd.Flags().StringVar(&getRunReq.PageToken, "page-token", getRunReq.PageToken, `To list the next page of job tasks, set this field to the value of the next_page_token returned in the GetJob response.`) + cmd.Flags().StringVar(&getRunReq.PageToken, "page-token", getRunReq.PageToken, `Use next_page_token returned from the previous GetRun to request the next page of the run's sub-resources.`) cmd.Use = "get-run RUN_ID" cmd.Short = `Get a single job run.` cmd.Long = `Get a single job run. - Retrieve the metadata of a run. + Retrieves the metadata of a run. + + In Jobs API 2.2, requests for a single job run support pagination of tasks + and job_clusters when either exceeds 100 elements. Use the next_page_token + field to check for more results and pass its value as the page_token in + subsequent requests. Arrays with fewer than 100 elements in a page will be + empty on later pages. Arguments: RUN_ID: The canonical identifier of the run for which to retrieve the metadata. diff --git a/cmd/workspace/pipelines/pipelines.go b/cmd/workspace/pipelines/pipelines.go index 38636e83b..e94d4c5a8 100755 --- a/cmd/workspace/pipelines/pipelines.go +++ b/cmd/workspace/pipelines/pipelines.go @@ -974,6 +974,7 @@ func newUpdate() *cobra.Command { cmd.Flags().BoolVar(&updateReq.Photon, "photon", updateReq.Photon, `Whether Photon is enabled for this pipeline.`) cmd.Flags().StringVar(&updateReq.PipelineId, "pipeline-id", updateReq.PipelineId, `Unique identifier for this pipeline.`) // TODO: complex arg: restart_window + // TODO: complex arg: run_as cmd.Flags().StringVar(&updateReq.Schema, "schema", updateReq.Schema, `The default schema (database) where tables are read from or published to.`) cmd.Flags().BoolVar(&updateReq.Serverless, "serverless", updateReq.Serverless, `Whether serverless compute is enabled for this pipeline.`) cmd.Flags().StringVar(&updateReq.Storage, "storage", updateReq.Storage, `DBFS root directory for storing checkpoints and tables.`) diff --git a/cmd/workspace/repos/overrides.go b/cmd/workspace/repos/overrides.go index aad38ecc7..561921623 100644 --- a/cmd/workspace/repos/overrides.go +++ b/cmd/workspace/repos/overrides.go @@ -2,6 +2,7 @@ package repos import ( "context" + "errors" "fmt" "strconv" @@ -153,7 +154,7 @@ func repoArgumentToRepoID(ctx context.Context, w *databricks.WorkspaceClient, ar args = append(args, id) } if len(args) != 1 { - return 0, fmt.Errorf("expected to have the id for the corresponding repo to access") + return 0, errors.New("expected to have the id for the corresponding repo to access") } // ---- End copy from cmd/workspace/repos/repos.go ---- diff --git a/cmd/workspace/secrets/put_secret.go b/cmd/workspace/secrets/put_secret.go index f24814f05..b446524f7 100644 --- a/cmd/workspace/secrets/put_secret.go +++ b/cmd/workspace/secrets/put_secret.go @@ -2,7 +2,7 @@ package secrets import ( "encoding/base64" - "fmt" + "errors" "io" "os" @@ -67,7 +67,7 @@ func newPutSecret() *cobra.Command { bytesValueChanged := cmd.Flags().Changed("bytes-value") stringValueChanged := cmd.Flags().Changed("string-value") if bytesValueChanged && stringValueChanged { - return fmt.Errorf("cannot specify both --bytes-value and --string-value") + return errors.New("cannot specify both --bytes-value and --string-value") } if cmd.Flags().Changed("json") { diff --git a/cmd/workspace/shares/shares.go b/cmd/workspace/shares/shares.go index f70963f29..62c3407f4 100755 --- a/cmd/workspace/shares/shares.go +++ b/cmd/workspace/shares/shares.go @@ -391,6 +391,7 @@ func newUpdate() *cobra.Command { cmd.Flags().StringVar(&updateReq.Comment, "comment", updateReq.Comment, `User-provided free-form text description.`) cmd.Flags().StringVar(&updateReq.NewName, "new-name", updateReq.NewName, `New name for the share.`) + cmd.Flags().StringVar(&updateReq.Owner, "owner", updateReq.Owner, `Username of current owner of share.`) cmd.Flags().StringVar(&updateReq.StorageRoot, "storage-root", updateReq.StorageRoot, `Storage root URL for the share.`) // TODO: array: updates diff --git a/cmd/workspace/workspace/overrides.go b/cmd/workspace/workspace/overrides.go index 216e9b5d8..53438a764 100644 --- a/cmd/workspace/workspace/overrides.go +++ b/cmd/workspace/workspace/overrides.go @@ -36,7 +36,7 @@ func exportOverride(exportCmd *cobra.Command, exportReq *workspace.ExportRequest ctx := cmd.Context() w := root.WorkspaceClient(ctx) if len(args) != 1 { - return fmt.Errorf("expected to have the absolute path of the object or directory") + return errors.New("expected to have the absolute path of the object or directory") } exportReq.Path = args[0] diff --git a/go.mod b/go.mod index 2dda0cd60..930963f89 100644 --- a/go.mod +++ b/go.mod @@ -5,13 +5,14 @@ go 1.23 toolchain go1.23.4 require ( + github.com/BurntSushi/toml v1.4.0 // MIT github.com/Masterminds/semver/v3 v3.3.1 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 - github.com/databricks/databricks-sdk-go v0.54.0 // Apache 2.0 + github.com/databricks/databricks-sdk-go v0.55.0 // Apache 2.0 github.com/fatih/color v1.18.0 // MIT github.com/google/uuid v1.6.0 // BSD-3-Clause github.com/hashicorp/go-version v1.7.0 // MPL 2.0 - github.com/hashicorp/hc-install v0.9.0 // MPL 2.0 + github.com/hashicorp/hc-install v0.9.1 // MPL 2.0 github.com/hashicorp/terraform-exec v0.21.0 // MPL 2.0 github.com/hashicorp/terraform-json v0.23.0 // MPL 2.0 github.com/hexops/gotextdiff v1.0.3 // BSD 3-Clause "New" or "Revised" License @@ -26,9 +27,9 @@ require ( github.com/wI2L/jsondiff v0.6.1 // MIT golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/mod v0.22.0 - golang.org/x/oauth2 v0.24.0 + golang.org/x/oauth2 v0.25.0 golang.org/x/sync v0.10.0 - golang.org/x/term v0.27.0 + golang.org/x/term v0.28.0 golang.org/x/text v0.21.0 gopkg.in/ini.v1 v1.67.0 // Apache 2.0 gopkg.in/yaml.v3 v3.0.1 @@ -38,7 +39,7 @@ require ( cloud.google.com/go/auth v0.4.2 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/compute/metadata v0.3.0 // indirect - github.com/ProtonMail/go-crypto v1.1.0-alpha.2 // indirect + github.com/ProtonMail/go-crypto v1.1.3 // indirect github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect github.com/cloudflare/circl v1.3.7 // indirect @@ -68,8 +69,8 @@ require ( go.opentelemetry.io/otel/metric v1.24.0 // indirect go.opentelemetry.io/otel/trace v1.24.0 // indirect golang.org/x/crypto v0.31.0 // indirect - golang.org/x/net v0.26.0 // indirect - golang.org/x/sys v0.28.0 // indirect + golang.org/x/net v0.33.0 // indirect + golang.org/x/sys v0.29.0 // indirect golang.org/x/time v0.5.0 // indirect google.golang.org/api v0.182.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240521202816-d264139d666e // indirect diff --git a/go.sum b/go.sum index 1e806ea03..d025b3947 100644 --- a/go.sum +++ b/go.sum @@ -8,12 +8,14 @@ cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1h dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0= +github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/Masterminds/semver/v3 v3.3.1 h1:QtNSWtVZ3nBfk8mAOu/B6v7FMJ+NHTIgUPi7rj+4nv4= github.com/Masterminds/semver/v3 v3.3.1/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= -github.com/ProtonMail/go-crypto v1.1.0-alpha.2 h1:bkyFVUP+ROOARdgCiJzNQo2V2kiB97LyUpzH9P6Hrlg= -github.com/ProtonMail/go-crypto v1.1.0-alpha.2/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= +github.com/ProtonMail/go-crypto v1.1.3 h1:nRBOetoydLeUb4nHajyO2bKqMLfWQ/ZPwkXqXxPxCFk= +github.com/ProtonMail/go-crypto v1.1.3/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew1u1fNQOlOtuGxQY= github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4= github.com/briandowns/spinner v1.23.1 h1:t5fDPmScwUjozhDj4FA46p5acZWIPXYE30qW2Ptu650= @@ -30,10 +32,10 @@ github.com/cloudflare/circl v1.3.7 h1:qlCDlTPz2n9fu58M0Nh1J/JzcFpfgkFHHX3O35r5vc github.com/cloudflare/circl v1.3.7/go.mod h1:sRTcRWXGLrKw6yIGJ+l7amYJFfAXbZG0kBSc8r4zxgA= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg= -github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/databricks/databricks-sdk-go v0.54.0 h1:L8gsA3NXs+uYU3QtW/OUgjxMQxOH24k0MT9JhB3zLlM= -github.com/databricks/databricks-sdk-go v0.54.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= +github.com/cyphar/filepath-securejoin v0.2.5 h1:6iR5tXJ/e6tJZzzdMc1km3Sa7RRIVBKAK32O2s7AYfo= +github.com/cyphar/filepath-securejoin v0.2.5/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= +github.com/databricks/databricks-sdk-go v0.55.0 h1:ReziD6spzTDltM0ml80LggKo27F3oUjgTinCFDJDnak= +github.com/databricks/databricks-sdk-go v0.55.0/go.mod h1:JpLizplEs+up9/Z4Xf2x++o3sM9eTTWFGzIXAptKJzI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -50,10 +52,10 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= -github.com/go-git/go-billy/v5 v5.5.0 h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+mTU= -github.com/go-git/go-billy/v5 v5.5.0/go.mod h1:hmexnoNsr2SJU1Ju67OaNz5ASJY3+sHgFRpCtpDCKow= -github.com/go-git/go-git/v5 v5.12.0 h1:7Md+ndsjrzZxbddRDZjF14qK+NN56sy6wkqaVrjZtys= -github.com/go-git/go-git/v5 v5.12.0/go.mod h1:FTM9VKtnI2m65hNI/TenDDDnUf2Q9FHnXYjuz9i5OEY= +github.com/go-git/go-billy/v5 v5.6.0 h1:w2hPNtoehvJIxR00Vb4xX94qHQi/ApZfX+nBE2Cjio8= +github.com/go-git/go-billy/v5 v5.6.0/go.mod h1:sFDq7xD3fn3E0GOwUSZqHo9lrkmx8xJhA0ZrfvjBRGM= +github.com/go-git/go-git/v5 v5.13.0 h1:vLn5wlGIh/X78El6r3Jr+30W16Blk0CTcxTYcYPWi5E= +github.com/go-git/go-git/v5 v5.13.0/go.mod h1:Wjo7/JyVKtQgUNdXYXIepzWfJQkUEIGvkvVkiXRR/zw= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -103,8 +105,8 @@ github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISH github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= -github.com/hashicorp/hc-install v0.9.0 h1:2dIk8LcvANwtv3QZLckxcjyF5w8KVtiMxu6G6eLhghE= -github.com/hashicorp/hc-install v0.9.0/go.mod h1:+6vOP+mf3tuGgMApVYtmsnDoKWMDcFXeTxCACYZ8SFg= +github.com/hashicorp/hc-install v0.9.1 h1:gkqTfE3vVbafGQo6VZXcy2v5yoz2bE0+nhZXruCuODQ= +github.com/hashicorp/hc-install v0.9.1/go.mod h1:pWWvN/IrfeBK4XPeXXYkL6EjMufHkCK5DvwxeLKuBf0= github.com/hashicorp/terraform-exec v0.21.0 h1:uNkLAe95ey5Uux6KJdua6+cv8asgILFVWkd/RG0D2XQ= github.com/hashicorp/terraform-exec v0.21.0/go.mod h1:1PPeMYou+KDUSSeRE9szMZ/oHf4fYUmB923Wzbq1ICg= github.com/hashicorp/terraform-json v0.23.0 h1:sniCkExU4iKtTADReHzACkk8fnpQXrdD2xoR+lppBkI= @@ -141,8 +143,8 @@ github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDj github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= -github.com/skeema/knownhosts v1.2.2 h1:Iug2P4fLmDw9f41PB6thxUkNUkJzB5i+1/exaj40L3A= -github.com/skeema/knownhosts v1.2.2/go.mod h1:xYbVRSPxqBZFrdmDyMmsOs+uX1UZC3nTN3ThzgDxUwo= +github.com/skeema/knownhosts v1.3.0 h1:AM+y0rI04VksttfwjkSTNQorvGqmwATnvnAHpSgc0LY= +github.com/skeema/knownhosts v1.3.0/go.mod h1:sPINvnADmT/qYH1kfv+ePMmOBTH6Tbl7b5LvTDjFK7M= github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= @@ -204,11 +206,11 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= -golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= -golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70= +golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -224,10 +226,10 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= -golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= +golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= +golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= diff --git a/integration/assumptions/main_test.go b/integration/assumptions/main_test.go deleted file mode 100644 index be2761385..000000000 --- a/integration/assumptions/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package assumptions_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/bundle/apps_test.go b/integration/bundle/apps_test.go new file mode 100644 index 000000000..23cd784be --- /dev/null +++ b/integration/bundle/apps_test.go @@ -0,0 +1,139 @@ +package bundle_test + +import ( + "fmt" + "io" + "testing" + + "github.com/databricks/cli/integration/internal/acc" + "github.com/databricks/cli/internal/testcli" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/testdiff" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/google/uuid" + "github.com/stretchr/testify/require" +) + +func TestDeployBundleWithApp(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + // TODO: should only skip app run when app can be created with no_compute option. + if testing.Short() { + t.Log("Skip the app creation and run in short mode") + return + } + + if testutil.GetCloud(t) == testutil.GCP { + t.Skip("Skipping test for GCP cloud because /api/2.0/apps is temporarily unavailable there.") + } + + uniqueId := uuid.New().String() + appId := "app-" + uuid.New().String()[0:8] + nodeTypeId := testutil.GetCloud(t).NodeTypeID() + instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID") + + root := initTestTemplate(t, ctx, "apps", map[string]any{ + "unique_id": uniqueId, + "app_id": appId, + "node_type_id": nodeTypeId, + "spark_version": defaultSparkVersion, + "instance_pool_id": instancePoolId, + }) + + t.Cleanup(func() { + destroyBundle(t, ctx, root) + app, err := wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: "test-app"}) + if err != nil { + require.ErrorContains(t, err, "does not exist") + } else { + require.Contains(t, []apps.ApplicationState{apps.ApplicationStateUnavailable}, app.AppStatus.State) + } + }) + + ctx, replacements := testdiff.WithReplacementsMap(ctx) + replacements.Set(uniqueId, "$UNIQUE_PRJ") + + user, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + require.NotNil(t, user) + testdiff.PrepareReplacementsUser(t, replacements, *user) + testdiff.PrepareReplacementsWorkspaceClient(t, replacements, wt.W) + testdiff.PrepareReplacementsUUID(t, replacements) + testdiff.PrepareReplacementsNumber(t, replacements) + testdiff.PrepareReplacementsTemporaryDirectory(t, replacements) + + testutil.Chdir(t, root) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "validate"}, + testutil.TestData("testdata/apps/bundle_validate.txt"), + ) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "deploy", "--force-lock", "--auto-approve"}, + testutil.TestData("testdata/apps/bundle_deploy.txt"), + ) + + // App should exists after bundle deployment + app, err := wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: appId}) + require.NoError(t, err) + require.NotNil(t, app) + + // Check app config + currentUser, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + + pathToAppYml := fmt.Sprintf("/Workspace/Users/%s/.bundle/%s/files/app/app.yml", currentUser.UserName, uniqueId) + reader, err := wt.W.Workspace.Download(ctx, pathToAppYml) + require.NoError(t, err) + + data, err := io.ReadAll(reader) + require.NoError(t, err) + + job, err := wt.W.Jobs.GetBySettingsName(ctx, "test-job-with-cluster-"+uniqueId) + require.NoError(t, err) + + content := string(data) + require.Contains(t, content, fmt.Sprintf(`command: + - flask + - --app + - app + - run +env: + - name: JOB_ID + value: "%d"`, job.JobId)) + + // Try to run the app + _, out := runResourceWithStderr(t, ctx, root, "test_app") + require.Contains(t, out, app.Url) + + // App should be in the running state + app, err = wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: appId}) + require.NoError(t, err) + require.NotNil(t, app) + require.Equal(t, apps.ApplicationStateRunning, app.AppStatus.State) + + // Stop the app + wait, err := wt.W.Apps.Stop(ctx, apps.StopAppRequest{Name: appId}) + require.NoError(t, err) + app, err = wait.Get() + require.NoError(t, err) + require.NotNil(t, app) + require.Equal(t, apps.ApplicationStateUnavailable, app.AppStatus.State) + + // Try to run the app again + _, out = runResourceWithStderr(t, ctx, root, "test_app") + require.Contains(t, out, app.Url) + + // App should be in the running state + app, err = wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: appId}) + require.NoError(t, err) + require.NotNil(t, app) + require.Equal(t, apps.ApplicationStateRunning, app.AppStatus.State) + + // Redeploy it again just to check that it can be redeployed + deployBundle(t, ctx, root) +} diff --git a/integration/bundle/artifacts_test.go b/integration/bundle/artifacts_test.go index 1b71a1c3d..94b96899e 100644 --- a/integration/bundle/artifacts_test.go +++ b/integration/bundle/artifacts_test.go @@ -85,13 +85,13 @@ func TestUploadArtifactFileToCorrectRemotePath(t *testing.T) { // The remote path attribute on the artifact file should have been set. require.Regexp(t, - regexp.MustCompile(path.Join(regexp.QuoteMeta(wsDir), `.internal/test\.whl`)), + path.Join(regexp.QuoteMeta(wsDir), `.internal/test\.whl`), b.Config.Artifacts["test"].Files[0].RemotePath, ) // The task library path should have been updated to the remote path. require.Regexp(t, - regexp.MustCompile(path.Join("/Workspace", regexp.QuoteMeta(wsDir), `.internal/test\.whl`)), + path.Join("/Workspace", regexp.QuoteMeta(wsDir), `.internal/test\.whl`), b.Config.Resources.Jobs["test"].JobSettings.Tasks[0].Libraries[0].Whl, ) } @@ -149,13 +149,13 @@ func TestUploadArtifactFileToCorrectRemotePathWithEnvironments(t *testing.T) { // The remote path attribute on the artifact file should have been set. require.Regexp(t, - regexp.MustCompile(path.Join(regexp.QuoteMeta(wsDir), `.internal/test\.whl`)), + path.Join(regexp.QuoteMeta(wsDir), `.internal/test\.whl`), b.Config.Artifacts["test"].Files[0].RemotePath, ) // The job environment deps path should have been updated to the remote path. require.Regexp(t, - regexp.MustCompile(path.Join("/Workspace", regexp.QuoteMeta(wsDir), `.internal/test\.whl`)), + path.Join("/Workspace", regexp.QuoteMeta(wsDir), `.internal/test\.whl`), b.Config.Resources.Jobs["test"].JobSettings.Environments[0].Spec.Dependencies[0], ) } @@ -218,13 +218,13 @@ func TestUploadArtifactFileToCorrectRemotePathForVolumes(t *testing.T) { // The remote path attribute on the artifact file should have been set. require.Regexp(t, - regexp.MustCompile(path.Join(regexp.QuoteMeta(volumePath), `.internal/test\.whl`)), + path.Join(regexp.QuoteMeta(volumePath), `.internal/test\.whl`), b.Config.Artifacts["test"].Files[0].RemotePath, ) // The task library path should have been updated to the remote path. require.Regexp(t, - regexp.MustCompile(path.Join(regexp.QuoteMeta(volumePath), `.internal/test\.whl`)), + path.Join(regexp.QuoteMeta(volumePath), `.internal/test\.whl`), b.Config.Resources.Jobs["test"].JobSettings.Tasks[0].Libraries[0].Whl, ) } @@ -257,7 +257,7 @@ func TestUploadArtifactFileToVolumeThatDoesNotExist(t *testing.T) { stdout, stderr, err := testcli.RequireErrorRun(t, ctx, "bundle", "deploy") assert.Error(t, err) - assert.Equal(t, fmt.Sprintf(`Error: volume /Volumes/main/%s/doesnotexist does not exist: Not Found + assert.Equal(t, fmt.Sprintf(`Error: volume main.%s.doesnotexist does not exist at workspace.artifact_path in databricks.yml:6:18 @@ -293,7 +293,7 @@ func TestUploadArtifactToVolumeNotYetDeployed(t *testing.T) { stdout, stderr, err := testcli.RequireErrorRun(t, ctx, "bundle", "deploy") assert.Error(t, err) - assert.Equal(t, fmt.Sprintf(`Error: volume /Volumes/main/%s/my_volume does not exist: Not Found + assert.Equal(t, fmt.Sprintf(`Error: volume main.%s.my_volume does not exist at workspace.artifact_path resources.volumes.foo in databricks.yml:6:18 diff --git a/integration/bundle/bind_resource_test.go b/integration/bundle/bind_resource_test.go index 508aa3410..ba10190aa 100644 --- a/integration/bundle/bind_resource_test.go +++ b/integration/bundle/bind_resource_test.go @@ -1,9 +1,9 @@ package bundle_test import ( - "fmt" "os" "path/filepath" + "strconv" "testing" "github.com/databricks/cli/integration/internal/acc" @@ -35,7 +35,7 @@ func TestBindJobToExistingJob(t *testing.T) { }) ctx = env.Set(ctx, "BUNDLE_ROOT", bundleRoot) - c := testcli.NewRunner(t, ctx, "bundle", "deployment", "bind", "foo", fmt.Sprint(jobId), "--auto-approve") + c := testcli.NewRunner(t, ctx, "bundle", "deployment", "bind", "foo", strconv.FormatInt(jobId, 10), "--auto-approve") _, _, err := c.Run() require.NoError(t, err) @@ -53,7 +53,7 @@ func TestBindJobToExistingJob(t *testing.T) { JobId: jobId, }) require.NoError(t, err) - require.Equal(t, job.Settings.Name, fmt.Sprintf("test-job-basic-%s", uniqueId)) + require.Equal(t, job.Settings.Name, "test-job-basic-"+uniqueId) require.Contains(t, job.Settings.Tasks[0].SparkPythonTask.PythonFile, "hello_world.py") c = testcli.NewRunner(t, ctx, "bundle", "deployment", "unbind", "foo") @@ -71,7 +71,7 @@ func TestBindJobToExistingJob(t *testing.T) { JobId: jobId, }) require.NoError(t, err) - require.Equal(t, job.Settings.Name, fmt.Sprintf("test-job-basic-%s", uniqueId)) + require.Equal(t, job.Settings.Name, "test-job-basic-"+uniqueId) require.Contains(t, job.Settings.Tasks[0].SparkPythonTask.PythonFile, "hello_world.py") } @@ -96,7 +96,7 @@ func TestAbortBind(t *testing.T) { // Bind should fail because prompting is not possible. ctx = env.Set(ctx, "BUNDLE_ROOT", bundleRoot) ctx = env.Set(ctx, "TERM", "dumb") - c := testcli.NewRunner(t, ctx, "bundle", "deployment", "bind", "foo", fmt.Sprint(jobId)) + c := testcli.NewRunner(t, ctx, "bundle", "deployment", "bind", "foo", strconv.FormatInt(jobId, 10)) // Expect error suggesting to use --auto-approve _, _, err := c.Run() @@ -114,7 +114,7 @@ func TestAbortBind(t *testing.T) { }) require.NoError(t, err) - require.NotEqual(t, job.Settings.Name, fmt.Sprintf("test-job-basic-%s", uniqueId)) + require.NotEqual(t, job.Settings.Name, "test-job-basic-"+uniqueId) require.Contains(t, job.Settings.Tasks[0].NotebookTask.NotebookPath, "test") } @@ -143,7 +143,7 @@ func TestGenerateAndBind(t *testing.T) { ctx = env.Set(ctx, "BUNDLE_ROOT", bundleRoot) c := testcli.NewRunner(t, ctx, "bundle", "generate", "job", "--key", "test_job_key", - "--existing-job-id", fmt.Sprint(jobId), + "--existing-job-id", strconv.FormatInt(jobId, 10), "--config-dir", filepath.Join(bundleRoot, "resources"), "--source-dir", filepath.Join(bundleRoot, "src")) _, _, err = c.Run() @@ -157,7 +157,7 @@ func TestGenerateAndBind(t *testing.T) { require.Len(t, matches, 1) - c = testcli.NewRunner(t, ctx, "bundle", "deployment", "bind", "test_job_key", fmt.Sprint(jobId), "--auto-approve") + c = testcli.NewRunner(t, ctx, "bundle", "deployment", "bind", "test_job_key", strconv.FormatInt(jobId, 10), "--auto-approve") _, _, err = c.Run() require.NoError(t, err) diff --git a/integration/bundle/bundles/apps/databricks_template_schema.json b/integration/bundle/bundles/apps/databricks_template_schema.json new file mode 100644 index 000000000..c9faeabf3 --- /dev/null +++ b/integration/bundle/bundles/apps/databricks_template_schema.json @@ -0,0 +1,24 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for job name" + }, + "app_id": { + "type": "string", + "description": "Unique ID for app name" + }, + "spark_version": { + "type": "string", + "description": "Spark version used for job cluster" + }, + "node_type_id": { + "type": "string", + "description": "Node type id for job cluster" + }, + "instance_pool_id": { + "type": "string", + "description": "Instance pool id for job cluster" + } + } +} diff --git a/integration/bundle/bundles/apps/template/app/app.py b/integration/bundle/bundles/apps/template/app/app.py new file mode 100644 index 000000000..a60c786fe --- /dev/null +++ b/integration/bundle/bundles/apps/template/app/app.py @@ -0,0 +1,15 @@ +import os + +from databricks.sdk import WorkspaceClient +from flask import Flask + +app = Flask(__name__) + + +@app.route("/") +def home(): + job_id = os.getenv("JOB_ID") + + w = WorkspaceClient() + job = w.jobs.get(job_id) + return job.settings.name diff --git a/integration/bundle/bundles/apps/template/databricks.yml.tmpl b/integration/bundle/bundles/apps/template/databricks.yml.tmpl new file mode 100644 index 000000000..4d862a06f --- /dev/null +++ b/integration/bundle/bundles/apps/template/databricks.yml.tmpl @@ -0,0 +1,42 @@ +bundle: + name: basic + +workspace: + root_path: "~/.bundle/{{.unique_id}}" + +resources: + apps: + test_app: + name: "{{.app_id}}" + description: "App which manages job created by this bundle" + source_code_path: ./app + config: + command: + - flask + - --app + - app + - run + env: + - name: JOB_ID + value: ${resources.jobs.foo.id} + + resources: + - name: "app-job" + description: "A job for app to be able to work with" + job: + id: ${resources.jobs.foo.id} + permission: "CAN_MANAGE_RUN" + + jobs: + foo: + name: test-job-with-cluster-{{.unique_id}} + tasks: + - task_key: my_notebook_task + new_cluster: + num_workers: 1 + spark_version: "{{.spark_version}}" + node_type_id: "{{.node_type_id}}" + data_security_mode: USER_ISOLATION + instance_pool_id: "{{.instance_pool_id}}" + spark_python_task: + python_file: ./hello_world.py diff --git a/integration/bundle/bundles/apps/template/hello_world.py b/integration/bundle/bundles/apps/template/hello_world.py new file mode 100644 index 000000000..f301245e2 --- /dev/null +++ b/integration/bundle/bundles/apps/template/hello_world.py @@ -0,0 +1 @@ +print("Hello World!") diff --git a/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl b/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl index 4ebeb2655..4ea687cf1 100644 --- a/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl +++ b/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl @@ -9,7 +9,6 @@ variables: description: The catalog the DLT pipeline should use. default: main - resources: pipelines: foo: @@ -19,6 +18,13 @@ resources: path: ./nb.sql development: true catalog: ${var.catalog} + target: ${resources.schemas.bar.id} + + schemas: + bar: + name: test-schema-{{.unique_id}} + catalog_name: ${var.catalog} + comment: This schema was created from DABs include: - "*.yml" diff --git a/integration/bundle/clusters_test.go b/integration/bundle/clusters_test.go index 449206208..b94b8365e 100644 --- a/integration/bundle/clusters_test.go +++ b/integration/bundle/clusters_test.go @@ -1,7 +1,6 @@ package bundle_test import ( - "fmt" "testing" "github.com/databricks/cli/integration/internal/acc" @@ -29,7 +28,7 @@ func TestDeployBundleWithCluster(t *testing.T) { t.Cleanup(func() { destroyBundle(t, ctx, root) - cluster, err := wt.W.Clusters.GetByClusterName(ctx, fmt.Sprintf("test-cluster-%s", uniqueId)) + cluster, err := wt.W.Clusters.GetByClusterName(ctx, "test-cluster-"+uniqueId) if err != nil { require.ErrorContains(t, err, "does not exist") } else { @@ -40,7 +39,7 @@ func TestDeployBundleWithCluster(t *testing.T) { deployBundle(t, ctx, root) // Cluster should exists after bundle deployment - cluster, err := wt.W.Clusters.GetByClusterName(ctx, fmt.Sprintf("test-cluster-%s", uniqueId)) + cluster, err := wt.W.Clusters.GetByClusterName(ctx, "test-cluster-"+uniqueId) require.NoError(t, err) require.NotNil(t, cluster) diff --git a/integration/bundle/dashboards_test.go b/integration/bundle/dashboards_test.go index 83b4b8b03..a96b657f6 100644 --- a/integration/bundle/dashboards_test.go +++ b/integration/bundle/dashboards_test.go @@ -40,7 +40,7 @@ func TestDashboards(t *testing.T) { // Load the dashboard by its ID and confirm its display name. dashboard, err := wt.W.Lakeview.GetByDashboardId(ctx, oi.ResourceId) require.NoError(t, err) - assert.Equal(t, fmt.Sprintf("test-dashboard-%s", uniqueID), dashboard.DisplayName) + assert.Equal(t, "test-dashboard-"+uniqueID, dashboard.DisplayName) // Make an out of band modification to the dashboard and confirm that it is detected. _, err = wt.W.Lakeview.Update(ctx, dashboards.UpdateDashboardRequest{ diff --git a/integration/bundle/deploy_test.go b/integration/bundle/deploy_test.go index 0b37e5630..309b82917 100644 --- a/integration/bundle/deploy_test.go +++ b/integration/bundle/deploy_test.go @@ -2,7 +2,6 @@ package bundle_test import ( "context" - "errors" "fmt" "io" "os" @@ -99,7 +98,7 @@ func TestBundleDeployUcSchema(t *testing.T) { // Assert the schema is deleted _, err = w.Schemas.GetByFullName(ctx, strings.Join([]string{catalogName, schemaName}, ".")) apiErr := &apierr.APIError{} - assert.True(t, errors.As(err, &apiErr)) + assert.ErrorAs(t, err, &apiErr) assert.Equal(t, "SCHEMA_DOES_NOT_EXIST", apiErr.ErrorCode) } diff --git a/integration/bundle/deploy_to_shared_test.go b/integration/bundle/deploy_to_shared_test.go index b4395f4c6..387d3c67a 100644 --- a/integration/bundle/deploy_to_shared_test.go +++ b/integration/bundle/deploy_to_shared_test.go @@ -1,7 +1,6 @@ package bundle_test import ( - "fmt" "testing" "github.com/databricks/cli/integration/internal/acc" @@ -23,7 +22,7 @@ func TestDeployBasicToSharedWorkspacePath(t *testing.T) { "unique_id": uniqueId, "node_type_id": nodeTypeId, "spark_version": defaultSparkVersion, - "root_path": fmt.Sprintf("/Shared/%s", currentUser.UserName), + "root_path": "/Shared/" + currentUser.UserName, }) t.Cleanup(func() { diff --git a/integration/bundle/destroy_test.go b/integration/bundle/destroy_test.go index f18138ce5..b69382a58 100644 --- a/integration/bundle/destroy_test.go +++ b/integration/bundle/destroy_test.go @@ -1,7 +1,6 @@ package bundle_test import ( - "errors" "os" "path/filepath" "testing" @@ -71,11 +70,11 @@ func TestBundleDestroy(t *testing.T) { // Assert snapshot file is deleted entries, err = os.ReadDir(snapshotsDir) require.NoError(t, err) - assert.Len(t, entries, 0) + assert.Empty(t, entries) // Assert bundle deployment path is deleted _, err = w.Workspace.GetStatusByPath(ctx, remoteRoot) apiErr := &apierr.APIError{} - assert.True(t, errors.As(err, &apiErr)) + assert.ErrorAs(t, err, &apiErr) assert.Equal(t, "RESOURCE_DOES_NOT_EXIST", apiErr.ErrorCode) } diff --git a/integration/bundle/empty_bundle_test.go b/integration/bundle/empty_bundle_test.go index 1ab240d13..2c650cbef 100644 --- a/integration/bundle/empty_bundle_test.go +++ b/integration/bundle/empty_bundle_test.go @@ -1,7 +1,6 @@ package bundle_test import ( - "fmt" "os" "path/filepath" "testing" @@ -19,8 +18,7 @@ func TestEmptyBundleDeploy(t *testing.T) { f, err := os.Create(filepath.Join(tmpDir, "databricks.yml")) require.NoError(t, err) - bundleRoot := fmt.Sprintf(`bundle: - name: %s`, uuid.New().String()) + bundleRoot := "bundle:\n name: " + uuid.New().String() _, err = f.WriteString(bundleRoot) require.NoError(t, err) f.Close() diff --git a/integration/bundle/generate_job_test.go b/integration/bundle/generate_job_test.go index b68bb7d61..f3c4c7829 100644 --- a/integration/bundle/generate_job_test.go +++ b/integration/bundle/generate_job_test.go @@ -2,10 +2,10 @@ package bundle_test import ( "context" - "fmt" "os" "path" "path/filepath" + "strconv" "strings" "testing" @@ -37,7 +37,7 @@ func TestGenerateFromExistingJobAndDeploy(t *testing.T) { ctx = env.Set(ctx, "BUNDLE_ROOT", bundleRoot) c := testcli.NewRunner(t, ctx, "bundle", "generate", "job", - "--existing-job-id", fmt.Sprint(jobId), + "--existing-job-id", strconv.FormatInt(jobId, 10), "--config-dir", filepath.Join(bundleRoot, "resources"), "--source-dir", filepath.Join(bundleRoot, "src")) _, _, err := c.Run() @@ -55,7 +55,7 @@ func TestGenerateFromExistingJobAndDeploy(t *testing.T) { require.NoError(t, err) generatedYaml := string(data) require.Contains(t, generatedYaml, "notebook_task:") - require.Contains(t, generatedYaml, fmt.Sprintf("notebook_path: %s", filepath.Join("..", "src", "test.py"))) + require.Contains(t, generatedYaml, "notebook_path: "+filepath.Join("..", "src", "test.py")) require.Contains(t, generatedYaml, "task_key: test") require.Contains(t, generatedYaml, "new_cluster:") require.Contains(t, generatedYaml, "spark_version: 13.3.x-scala2.12") diff --git a/integration/bundle/generate_pipeline_test.go b/integration/bundle/generate_pipeline_test.go index 7843ec0c3..3565ab928 100644 --- a/integration/bundle/generate_pipeline_test.go +++ b/integration/bundle/generate_pipeline_test.go @@ -2,7 +2,6 @@ package bundle_test import ( "context" - "fmt" "os" "path" "path/filepath" @@ -36,7 +35,7 @@ func TestGenerateFromExistingPipelineAndDeploy(t *testing.T) { ctx = env.Set(ctx, "BUNDLE_ROOT", bundleRoot) c := testcli.NewRunner(t, ctx, "bundle", "generate", "pipeline", - "--existing-pipeline-id", fmt.Sprint(pipelineId), + "--existing-pipeline-id", pipelineId, "--config-dir", filepath.Join(bundleRoot, "resources"), "--source-dir", filepath.Join(bundleRoot, "src")) _, _, err := c.Run() @@ -65,9 +64,9 @@ func TestGenerateFromExistingPipelineAndDeploy(t *testing.T) { require.Contains(t, generatedYaml, "libraries:") require.Contains(t, generatedYaml, "- notebook:") - require.Contains(t, generatedYaml, fmt.Sprintf("path: %s", filepath.Join("..", "src", "notebook.py"))) + require.Contains(t, generatedYaml, "path: "+filepath.Join("..", "src", "notebook.py")) require.Contains(t, generatedYaml, "- file:") - require.Contains(t, generatedYaml, fmt.Sprintf("path: %s", filepath.Join("..", "src", "test.py"))) + require.Contains(t, generatedYaml, "path: "+filepath.Join("..", "src", "test.py")) deployBundle(t, ctx, bundleRoot) diff --git a/integration/bundle/helpers_test.go b/integration/bundle/helpers_test.go index e884cd8c6..b4f9c9086 100644 --- a/integration/bundle/helpers_test.go +++ b/integration/bundle/helpers_test.go @@ -16,7 +16,6 @@ import ( "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/env" - "github.com/databricks/cli/libs/filer" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/folders" "github.com/databricks/cli/libs/template" @@ -40,10 +39,19 @@ func initTestTemplateWithBundleRoot(t testutil.TestingT, ctx context.Context, te cmd := cmdio.NewIO(ctx, flags.OutputJSON, strings.NewReader(""), os.Stdout, os.Stderr, "", "bundles") ctx = cmdio.InContext(ctx, cmd) - out, err := filer.NewLocalClient(bundleRoot) + r := template.Resolver{ + TemplatePathOrUrl: templateRoot, + ConfigFile: configFilePath, + OutputDir: bundleRoot, + } + + tmpl, err := r.Resolve(ctx) require.NoError(t, err) - err = template.Materialize(ctx, configFilePath, os.DirFS(templateRoot), out) + defer tmpl.Reader.Cleanup(ctx) + + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) require.NoError(t, err) + return bundleRoot } @@ -119,6 +127,17 @@ func runResource(t testutil.TestingT, ctx context.Context, path, key string) (st return stdout.String(), err } +func runResourceWithStderr(t testutil.TestingT, ctx context.Context, path, key string) (string, string) { + ctx = env.Set(ctx, "BUNDLE_ROOT", path) + ctx = cmdio.NewContext(ctx, cmdio.Default()) + + c := testcli.NewRunner(t, ctx, "bundle", "run", key) + stdout, stderr, err := c.Run() + require.NoError(t, err) + + return stdout.String(), stderr.String() +} + func runResourceWithParams(t testutil.TestingT, ctx context.Context, path, key string, params ...string) (string, error) { ctx = env.Set(ctx, "BUNDLE_ROOT", path) ctx = cmdio.NewContext(ctx, cmdio.Default()) diff --git a/integration/bundle/init_default_python_test.go b/integration/bundle/init_default_python_test.go index 9b65636e9..931660032 100644 --- a/integration/bundle/init_default_python_test.go +++ b/integration/bundle/init_default_python_test.go @@ -11,6 +11,7 @@ import ( "github.com/databricks/cli/internal/testcli" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/python/pythontest" + "github.com/databricks/cli/libs/testdiff" "github.com/stretchr/testify/require" ) @@ -50,14 +51,17 @@ func testDefaultPython(t *testing.T, pythonVersion string) { ctx, wt := acc.WorkspaceTest(t) uniqueProjectId := testutil.RandomName("") - ctx, replacements := testcli.WithReplacementsMap(ctx) + ctx, replacements := testdiff.WithReplacementsMap(ctx) replacements.Set(uniqueProjectId, "$UNIQUE_PRJ") user, err := wt.W.CurrentUser.Me(ctx) require.NoError(t, err) require.NotNil(t, user) - testcli.PrepareReplacementsUser(t, replacements, *user) - testcli.PrepareReplacements(t, replacements, wt.W) + testdiff.PrepareReplacementsUser(t, replacements, *user) + testdiff.PrepareReplacementsWorkspaceClient(t, replacements, wt.W) + testdiff.PrepareReplacementsUUID(t, replacements) + testdiff.PrepareReplacementsNumber(t, replacements) + testdiff.PrepareReplacementsTemporaryDirectory(t, replacements) tmpDir := t.TempDir() testutil.Chdir(t, tmpDir) diff --git a/integration/bundle/init_test.go b/integration/bundle/init_test.go index f5c263ca3..87a3e30e5 100644 --- a/integration/bundle/init_test.go +++ b/integration/bundle/init_test.go @@ -66,7 +66,7 @@ func TestBundleInitOnMlopsStacks(t *testing.T) { // Assert that the README.md file was created contents := testutil.ReadFile(t, filepath.Join(tmpDir2, "repo_name", projectName, "README.md")) - assert.Contains(t, contents, fmt.Sprintf("# %s", projectName)) + assert.Contains(t, contents, "# "+projectName) // Validate the stack testutil.Chdir(t, filepath.Join(tmpDir2, "repo_name", projectName)) diff --git a/integration/bundle/main_test.go b/integration/bundle/main_test.go deleted file mode 100644 index 1c44d0aaf..000000000 --- a/integration/bundle/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package bundle_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/bundle/testdata/apps/bundle_deploy.txt b/integration/bundle/testdata/apps/bundle_deploy.txt new file mode 100644 index 000000000..b077f327d --- /dev/null +++ b/integration/bundle/testdata/apps/bundle_deploy.txt @@ -0,0 +1,5 @@ +Uploading bundle files to /Workspace/Users/$USERNAME/.bundle/$UNIQUE_PRJ/files... +Note: Databricks apps included in this bundle may increase initial deployment time due to compute provisioning. +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/integration/bundle/testdata/apps/bundle_validate.txt b/integration/bundle/testdata/apps/bundle_validate.txt new file mode 100644 index 000000000..dc9016a0f --- /dev/null +++ b/integration/bundle/testdata/apps/bundle_validate.txt @@ -0,0 +1,7 @@ +Name: basic +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/$UNIQUE_PRJ + +Validation OK! diff --git a/integration/bundle/testdata/default_python/bundle_init.txt b/integration/bundle/testdata/default_python/bundle_init.txt index 6cfc32f98..c2917ea4e 100644 --- a/integration/bundle/testdata/default_python/bundle_init.txt +++ b/integration/bundle/testdata/default_python/bundle_init.txt @@ -1,6 +1,6 @@ Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'project_name_$UNIQUE_PRJ/databricks.yml'): https://$DATABRICKS_HOST +Workspace to use (auto-detected, edit in 'project_name_$UNIQUE_PRJ/databricks.yml'): $DATABRICKS_URL ✨ Your new project has been created in the 'project_name_$UNIQUE_PRJ' directory! diff --git a/integration/bundle/testdata/default_python/bundle_summary.txt b/integration/bundle/testdata/default_python/bundle_summary.txt index 3143d729c..318cd2543 100644 --- a/integration/bundle/testdata/default_python/bundle_summary.txt +++ b/integration/bundle/testdata/default_python/bundle_summary.txt @@ -15,14 +15,15 @@ "lock": { "enabled": false } - } + }, + "uuid": "" }, "include": [ "resources/project_name_$UNIQUE_PRJ.job.yml", "resources/project_name_$UNIQUE_PRJ.pipeline.yml" ], "workspace": { - "host": "https://$DATABRICKS_HOST", + "host": "$DATABRICKS_URL", "current_user": { "active": true, "displayName": "$USERNAME", @@ -140,7 +141,7 @@ "unit": "DAYS" } }, - "url": "https://$DATABRICKS_HOST/jobs/?o=" + "url": "$DATABRICKS_URL/jobs/?o=" } }, "pipelines": { @@ -164,7 +165,7 @@ ], "name": "[dev $USERNAME] project_name_$UNIQUE_PRJ_pipeline", "target": "project_name_$UNIQUE_PRJ_dev", - "url": "https://$DATABRICKS_HOST/pipelines/?o=" + "url": "$DATABRICKS_URL/pipelines/?o=" } } }, diff --git a/integration/bundle/testdata/default_python/bundle_validate.txt b/integration/bundle/testdata/default_python/bundle_validate.txt index 88a5fdd18..578fd6494 100644 --- a/integration/bundle/testdata/default_python/bundle_validate.txt +++ b/integration/bundle/testdata/default_python/bundle_validate.txt @@ -1,7 +1,7 @@ Name: project_name_$UNIQUE_PRJ Target: dev Workspace: - Host: https://$DATABRICKS_HOST + Host: $DATABRICKS_URL User: $USERNAME Path: /Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev diff --git a/integration/cmd/alerts/main_test.go b/integration/cmd/alerts/main_test.go deleted file mode 100644 index 6987ade02..000000000 --- a/integration/cmd/alerts/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package alerts_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/api/main_test.go b/integration/cmd/api/main_test.go deleted file mode 100644 index 70d021790..000000000 --- a/integration/cmd/api/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package api_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/auth/describe_test.go b/integration/cmd/auth/describe_test.go index 41288dce6..f592bc276 100644 --- a/integration/cmd/auth/describe_test.go +++ b/integration/cmd/auth/describe_test.go @@ -2,7 +2,6 @@ package auth_test import ( "context" - "fmt" "testing" "github.com/databricks/cli/internal/testcli" @@ -21,14 +20,14 @@ func TestAuthDescribeSuccess(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, outStr) - require.Contains(t, outStr, fmt.Sprintf("Host: %s", w.Config.Host)) + require.Contains(t, outStr, "Host: "+w.Config.Host) me, err := w.CurrentUser.Me(context.Background()) require.NoError(t, err) - require.Contains(t, outStr, fmt.Sprintf("User: %s", me.UserName)) - require.Contains(t, outStr, fmt.Sprintf("Authenticated with: %s", w.Config.AuthType)) + require.Contains(t, outStr, "User: "+me.UserName) + require.Contains(t, outStr, "Authenticated with: "+w.Config.AuthType) require.Contains(t, outStr, "Current configuration:") - require.Contains(t, outStr, fmt.Sprintf("✓ host: %s", w.Config.Host)) + require.Contains(t, outStr, "✓ host: "+w.Config.Host) require.Contains(t, outStr, "✓ profile: default") } @@ -47,6 +46,6 @@ func TestAuthDescribeFailure(t *testing.T) { w, err := databricks.NewWorkspaceClient(&databricks.Config{}) require.NoError(t, err) - require.Contains(t, outStr, fmt.Sprintf("✓ host: %s", w.Config.Host)) + require.Contains(t, outStr, "✓ host: "+w.Config.Host) require.Contains(t, outStr, "✓ profile: nonexistent (from --profile flag)") } diff --git a/integration/cmd/auth/main_test.go b/integration/cmd/auth/main_test.go deleted file mode 100644 index 97b1d740b..000000000 --- a/integration/cmd/auth/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package auth_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/clusters/main_test.go b/integration/cmd/clusters/main_test.go deleted file mode 100644 index ccd5660e7..000000000 --- a/integration/cmd/clusters/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package clusters_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/fs/cat_test.go b/integration/cmd/fs/cat_test.go index 3e964fe6e..14ec8140e 100644 --- a/integration/cmd/fs/cat_test.go +++ b/integration/cmd/fs/cat_test.go @@ -18,13 +18,11 @@ func TestFsCat(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Write(context.Background(), "hello.txt", strings.NewReader("abcd"), filer.CreateParentDirectories) require.NoError(t, err) @@ -40,13 +38,11 @@ func TestFsCatOnADir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "dir1") require.NoError(t, err) @@ -61,13 +57,11 @@ func TestFsCatOnNonExistentFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "cat", path.Join(tmpDir, "non-existent-file")) assert.ErrorIs(t, err, fs.ErrNotExist) diff --git a/integration/cmd/fs/completion_test.go b/integration/cmd/fs/completion_test.go index 88ce2fcc1..b13bf9d60 100644 --- a/integration/cmd/fs/completion_test.go +++ b/integration/cmd/fs/completion_test.go @@ -2,7 +2,6 @@ package fs_test import ( "context" - "fmt" "strings" "testing" @@ -24,6 +23,6 @@ func TestFsCompletion(t *testing.T) { setupCompletionFile(t, f) stdout, _ := testcli.RequireSuccessfulRun(t, ctx, "__complete", "fs", "ls", tmpDir+"/") - expectedOutput := fmt.Sprintf("%s/dir1/\n:2\n", tmpDir) + expectedOutput := tmpDir + "/dir1/\n:2\n" assert.Equal(t, expectedOutput, stdout.String()) } diff --git a/integration/cmd/fs/cp_test.go b/integration/cmd/fs/cp_test.go index 76aef7acf..6d0266555 100644 --- a/integration/cmd/fs/cp_test.go +++ b/integration/cmd/fs/cp_test.go @@ -126,14 +126,12 @@ func TestFsCpDir(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", sourceDir, targetDir, "--recursive") @@ -147,14 +145,12 @@ func TestFsCpFileToFile(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceFile(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", path.Join(sourceDir, "foo.txt"), path.Join(targetDir, "bar.txt")) @@ -168,14 +164,12 @@ func TestFsCpFileToDir(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceFile(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", path.Join(sourceDir, "foo.txt"), targetDir) @@ -205,14 +199,12 @@ func TestFsCpDirToDirFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -231,14 +223,12 @@ func TestFsCpFileToDirFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -255,14 +245,12 @@ func TestFsCpFileToFileFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -279,14 +267,12 @@ func TestFsCpDirToDirWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -303,14 +289,12 @@ func TestFsCpFileToFileWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -327,14 +311,12 @@ func TestFsCpFileToDirWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -351,13 +333,11 @@ func TestFsCpErrorsWhenSourceIsDirWithoutRecursiveFlag(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "cp", path.Join(tmpDir), path.Join(tmpDir, "foobar")) r := regexp.MustCompile("source path .* is a directory. Please specify the --recursive flag") @@ -376,14 +356,12 @@ func TestFsCpSourceIsDirectoryButTargetIsFile(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target diff --git a/integration/cmd/fs/ls_test.go b/integration/cmd/fs/ls_test.go index 58e776d8a..0f53193bf 100644 --- a/integration/cmd/fs/ls_test.go +++ b/integration/cmd/fs/ls_test.go @@ -5,7 +5,6 @@ import ( "encoding/json" "io/fs" "path" - "regexp" "strings" "testing" @@ -44,13 +43,11 @@ func TestFsLs(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json") @@ -65,11 +62,11 @@ func TestFsLs(t *testing.T) { assert.Equal(t, "a", parsedStdout[0]["name"]) assert.Equal(t, true, parsedStdout[0]["is_directory"]) - assert.Equal(t, float64(0), parsedStdout[0]["size"]) + assert.InDelta(t, float64(0), parsedStdout[0]["size"], 0.0001) assert.Equal(t, "bye.txt", parsedStdout[1]["name"]) assert.Equal(t, false, parsedStdout[1]["is_directory"]) - assert.Equal(t, float64(3), parsedStdout[1]["size"]) + assert.InDelta(t, float64(3), parsedStdout[1]["size"], 0.0001) }) } } @@ -78,13 +75,11 @@ func TestFsLsWithAbsolutePaths(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json", "--absolute") @@ -99,11 +94,11 @@ func TestFsLsWithAbsolutePaths(t *testing.T) { assert.Equal(t, path.Join(tmpDir, "a"), parsedStdout[0]["name"]) assert.Equal(t, true, parsedStdout[0]["is_directory"]) - assert.Equal(t, float64(0), parsedStdout[0]["size"]) + assert.InDelta(t, float64(0), parsedStdout[0]["size"], 0.0001) assert.Equal(t, path.Join(tmpDir, "bye.txt"), parsedStdout[1]["name"]) assert.Equal(t, false, parsedStdout[1]["is_directory"]) - assert.Equal(t, float64(3), parsedStdout[1]["size"]) + assert.InDelta(t, float64(3), parsedStdout[1]["size"].(float64), 0.0001) }) } } @@ -112,17 +107,15 @@ func TestFsLsOnFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "ls", path.Join(tmpDir, "a", "hello.txt"), "--output=json") - assert.Regexp(t, regexp.MustCompile("not a directory: .*/a/hello.txt"), err.Error()) + assert.Regexp(t, "not a directory: .*/a/hello.txt", err.Error()) assert.ErrorAs(t, err, &filer.NotADirectory{}) }) } @@ -132,13 +125,11 @@ func TestFsLsOnEmptyDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json") assert.Equal(t, "", stderr.String()) @@ -147,7 +138,7 @@ func TestFsLsOnEmptyDir(t *testing.T) { require.NoError(t, err) // assert on ls output - assert.Equal(t, 0, len(parsedStdout)) + assert.Empty(t, parsedStdout) }) } } @@ -156,17 +147,15 @@ func TestFsLsForNonexistingDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "ls", path.Join(tmpDir, "nonexistent"), "--output=json") assert.ErrorIs(t, err, fs.ErrNotExist) - assert.Regexp(t, regexp.MustCompile("no such directory: .*/nonexistent"), err.Error()) + assert.Regexp(t, "no such directory: .*/nonexistent", err.Error()) }) } } diff --git a/integration/cmd/fs/main_test.go b/integration/cmd/fs/main_test.go deleted file mode 100644 index b9402f0b2..000000000 --- a/integration/cmd/fs/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package fs_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/fs/mkdir_test.go b/integration/cmd/fs/mkdir_test.go index f332bb526..5cea0599c 100644 --- a/integration/cmd/fs/mkdir_test.go +++ b/integration/cmd/fs/mkdir_test.go @@ -17,13 +17,11 @@ func TestFsMkdir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a" stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "mkdir", path.Join(tmpDir, "a")) @@ -34,7 +32,7 @@ func TestFsMkdir(t *testing.T) { info, err := f.Stat(context.Background(), "a") require.NoError(t, err) assert.Equal(t, "a", info.Name()) - assert.Equal(t, true, info.IsDir()) + assert.True(t, info.IsDir()) }) } } @@ -43,13 +41,11 @@ func TestFsMkdirCreatesIntermediateDirectories(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a/b/c" stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "mkdir", path.Join(tmpDir, "a", "b", "c")) @@ -60,19 +56,19 @@ func TestFsMkdirCreatesIntermediateDirectories(t *testing.T) { infoA, err := f.Stat(context.Background(), "a") require.NoError(t, err) assert.Equal(t, "a", infoA.Name()) - assert.Equal(t, true, infoA.IsDir()) + assert.True(t, infoA.IsDir()) // assert directory "b" is created infoB, err := f.Stat(context.Background(), "a/b") require.NoError(t, err) assert.Equal(t, "b", infoB.Name()) - assert.Equal(t, true, infoB.IsDir()) + assert.True(t, infoB.IsDir()) // assert directory "c" is created infoC, err := f.Stat(context.Background(), "a/b/c") require.NoError(t, err) assert.Equal(t, "c", infoC.Name()) - assert.Equal(t, true, infoC.IsDir()) + assert.True(t, infoC.IsDir()) }) } } @@ -81,13 +77,11 @@ func TestFsMkdirWhenDirectoryAlreadyExists(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a" err := f.Mkdir(context.Background(), "a") diff --git a/integration/cmd/fs/rm_test.go b/integration/cmd/fs/rm_test.go index 018c7920e..fc19bb5b5 100644 --- a/integration/cmd/fs/rm_test.go +++ b/integration/cmd/fs/rm_test.go @@ -17,14 +17,12 @@ func TestFsRmFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a file ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Write(context.Background(), "hello.txt", strings.NewReader("abcd"), filer.CreateParentDirectories) require.NoError(t, err) @@ -48,14 +46,12 @@ func TestFsRmEmptyDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a directory ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "a") require.NoError(t, err) @@ -79,14 +75,12 @@ func TestFsRmNonEmptyDirectory(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a directory ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "a") require.NoError(t, err) @@ -110,13 +104,11 @@ func TestFsRmForNonExistentFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) // Expect error if file does not exist _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "rm", path.Join(tmpDir, "does-not-exist")) @@ -129,13 +121,11 @@ func TestFsRmDirRecursively(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // Create a directory err := f.Mkdir(context.Background(), "a") diff --git a/integration/cmd/jobs/jobs_test.go b/integration/cmd/jobs/jobs_test.go index b6bcfc5b3..7ebc135a3 100644 --- a/integration/cmd/jobs/jobs_test.go +++ b/integration/cmd/jobs/jobs_test.go @@ -3,7 +3,7 @@ package jobs_test import ( "context" "encoding/json" - "fmt" + "strconv" "testing" "github.com/databricks/cli/internal/testcli" @@ -20,5 +20,5 @@ func TestCreateJob(t *testing.T) { var output map[string]int err := json.Unmarshal(stdout.Bytes(), &output) require.NoError(t, err) - testcli.RequireSuccessfulRun(t, ctx, "jobs", "delete", fmt.Sprint(output["job_id"]), "--log-level=debug") + testcli.RequireSuccessfulRun(t, ctx, "jobs", "delete", strconv.Itoa(output["job_id"]), "--log-level=debug") } diff --git a/integration/cmd/jobs/main_test.go b/integration/cmd/jobs/main_test.go deleted file mode 100644 index 46369a526..000000000 --- a/integration/cmd/jobs/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package jobs_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/main_test.go b/integration/cmd/main_test.go deleted file mode 100644 index a1a5586b6..000000000 --- a/integration/cmd/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package cmd_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/repos/main_test.go b/integration/cmd/repos/main_test.go deleted file mode 100644 index 7eaa174bc..000000000 --- a/integration/cmd/repos/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package repos_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/secrets/main_test.go b/integration/cmd/secrets/main_test.go deleted file mode 100644 index a44d30671..000000000 --- a/integration/cmd/secrets/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package secrets_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/storage_credentials/main_test.go b/integration/cmd/storage_credentials/main_test.go deleted file mode 100644 index 14d00d966..000000000 --- a/integration/cmd/storage_credentials/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package storage_credentials_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/sync/main_test.go b/integration/cmd/sync/main_test.go deleted file mode 100644 index 8d9f3ca25..000000000 --- a/integration/cmd/sync/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package sync_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/sync/sync_test.go b/integration/cmd/sync/sync_test.go index 984f6ac49..632497054 100644 --- a/integration/cmd/sync/sync_test.go +++ b/integration/cmd/sync/sync_test.go @@ -151,10 +151,7 @@ func (a *syncTest) remoteFileContent(ctx context.Context, relativePath, expected filePath := path.Join(a.remoteRoot, relativePath) // Remove leading "/" so we can use it in the URL. - urlPath := fmt.Sprintf( - "/api/2.0/workspace-files/%s", - strings.TrimLeft(filePath, "/"), - ) + urlPath := "/api/2.0/workspace-files/" + strings.TrimLeft(filePath, "/") apiClient, err := client.New(a.w.Config) require.NoError(a.t, err) @@ -225,7 +222,7 @@ func (a *syncTest) snapshotContains(files []string) { assert.Equal(a.t, s.RemotePath, a.remoteRoot) for _, filePath := range files { _, ok := s.LastModifiedTimes[filePath] - assert.True(a.t, ok, fmt.Sprintf("%s not in snapshot file: %v", filePath, s.LastModifiedTimes)) + assert.True(a.t, ok, "%s not in snapshot file: %v", filePath, s.LastModifiedTimes) } assert.Equal(a.t, len(files), len(s.LastModifiedTimes)) } diff --git a/integration/cmd/version/main_test.go b/integration/cmd/version/main_test.go deleted file mode 100644 index 4aa5e046a..000000000 --- a/integration/cmd/version/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package version_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/workspace/main_test.go b/integration/cmd/workspace/main_test.go deleted file mode 100644 index 40d140eac..000000000 --- a/integration/cmd/workspace/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package workspace_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/cmd/workspace/workspace_test.go b/integration/cmd/workspace/workspace_test.go index 4edbbfc83..c376a87d2 100644 --- a/integration/cmd/workspace/workspace_test.go +++ b/integration/cmd/workspace/workspace_test.go @@ -114,7 +114,7 @@ func TestExportDir(t *testing.T) { require.NoError(t, err) expectedLogs := strings.Join([]string{ - fmt.Sprintf("Exporting files from %s", sourceDir), + "Exporting files from " + sourceDir, fmt.Sprintf("%s -> %s", path.Join(sourceDir, "a/b/c/file-b"), filepath.Join(targetDir, "a/b/c/file-b")), fmt.Sprintf("%s -> %s", path.Join(sourceDir, "file-a"), filepath.Join(targetDir, "file-a")), fmt.Sprintf("%s -> %s", path.Join(sourceDir, "pyNotebook"), filepath.Join(targetDir, "pyNotebook.py")), @@ -185,7 +185,7 @@ func TestImportDir(t *testing.T) { stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "workspace", "import-dir", "./testdata/import_dir", targetDir, "--log-level=debug") expectedLogs := strings.Join([]string{ - fmt.Sprintf("Importing files from %s", "./testdata/import_dir"), + "Importing files from " + "./testdata/import_dir", fmt.Sprintf("%s -> %s", filepath.FromSlash("a/b/c/file-b"), path.Join(targetDir, "a/b/c/file-b")), fmt.Sprintf("%s -> %s", filepath.FromSlash("file-a"), path.Join(targetDir, "file-a")), fmt.Sprintf("%s -> %s", filepath.FromSlash("jupyterNotebook.ipynb"), path.Join(targetDir, "jupyterNotebook")), diff --git a/integration/enforce_convention_test.go b/integration/enforce_convention_test.go deleted file mode 100644 index cc822a6a3..000000000 --- a/integration/enforce_convention_test.go +++ /dev/null @@ -1,116 +0,0 @@ -package integration - -import ( - "go/parser" - "go/token" - "os" - "path/filepath" - "strings" - "testing" - "text/template" - - "golang.org/x/exp/maps" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -type packageInfo struct { - Name string - Files []string -} - -func enumeratePackages(t *testing.T) map[string]packageInfo { - pkgmap := make(map[string]packageInfo) - err := filepath.Walk(".", func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - // Skip files. - if !info.IsDir() { - return nil - } - - // Skip the root directory and the "internal" directory. - if path == "." || strings.HasPrefix(path, "internal") { - return nil - } - - fset := token.NewFileSet() - pkgs, err := parser.ParseDir(fset, path, nil, parser.ParseComments) - require.NoError(t, err) - if len(pkgs) == 0 { - return nil - } - - // Expect one package per directory. - require.Len(t, pkgs, 1, "Directory %s contains more than one package", path) - v := maps.Values(pkgs)[0] - - // Record the package. - pkgmap[path] = packageInfo{ - Name: v.Name, - Files: maps.Keys(v.Files), - } - return nil - }) - require.NoError(t, err) - return pkgmap -} - -// TestEnforcePackageNames checks that all integration test package names use the "_test" suffix. -// We enforce this package name to avoid package name aliasing. -func TestEnforcePackageNames(t *testing.T) { - pkgmap := enumeratePackages(t) - for _, pkg := range pkgmap { - assert.True(t, strings.HasSuffix(pkg.Name, "_test"), "Package name %s does not end with _test", pkg.Name) - } -} - -var mainTestTemplate = template.Must(template.New("main_test").Parse( - `package {{.Name}} - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} -`)) - -func TestEnforceMainTest(t *testing.T) { - pkgmap := enumeratePackages(t) - for dir, pkg := range pkgmap { - found := false - for _, file := range pkg.Files { - if filepath.Base(file) == "main_test.go" { - found = true - break - } - } - - // Expect a "main_test.go" file in each package. - assert.True(t, found, "Directory %s does not contain a main_test.go file", dir) - } -} - -func TestWriteMainTest(t *testing.T) { - t.Skip("Uncomment to write main_test.go files") - - pkgmap := enumeratePackages(t) - for dir, pkg := range pkgmap { - // Write a "main_test.go" file to the package. - // This file is required to run the integration tests. - f, err := os.Create(filepath.Join(dir, "main_test.go")) - require.NoError(t, err) - defer f.Close() - err = mainTestTemplate.Execute(f, pkg) - require.NoError(t, err) - } -} diff --git a/integration/internal/acc/debug.go b/integration/internal/acc/debug.go index b4939881e..08e385b09 100644 --- a/integration/internal/acc/debug.go +++ b/integration/internal/acc/debug.go @@ -11,14 +11,14 @@ import ( ) // Detects if test is run from "debug test" feature in VS Code. -func isInDebug() bool { +func IsInDebug() bool { ex, _ := os.Executable() return strings.HasPrefix(path.Base(ex), "__debug_bin") } // Loads debug environment from ~/.databricks/debug-env.json. func loadDebugEnvIfRunFromIDE(t testutil.TestingT, key string) { - if !isInDebug() { + if !IsInDebug() { return } home, err := os.UserHomeDir() diff --git a/integration/internal/acc/fixtures.go b/integration/internal/acc/fixtures.go index cd867fb3a..2367d228f 100644 --- a/integration/internal/acc/fixtures.go +++ b/integration/internal/acc/fixtures.go @@ -45,7 +45,7 @@ func TemporaryDbfsDir(t *WorkspaceT, name ...string) string { // Prefix the name with "integration-test-" to make it easier to identify. name = append([]string{"integration-test-"}, name...) - path := fmt.Sprintf("/tmp/%s", testutil.RandomName(name...)) + path := "/tmp/" + testutil.RandomName(name...) t.Logf("Creating DBFS directory %s", path) err := t.W.Dbfs.MkdirsByPath(ctx, path) diff --git a/integration/internal/acc/workspace.go b/integration/internal/acc/workspace.go index 2f8a5b8e7..64deda7c1 100644 --- a/integration/internal/acc/workspace.go +++ b/integration/internal/acc/workspace.go @@ -21,9 +21,10 @@ type WorkspaceT struct { } func WorkspaceTest(t testutil.TestingT) (context.Context, *WorkspaceT) { + t.Helper() loadDebugEnvIfRunFromIDE(t, "workspace") - t.Log(testutil.GetEnvOrSkipTest(t, "CLOUD_ENV")) + t.Logf("CLOUD_ENV=%s", testutil.GetEnvOrSkipTest(t, "CLOUD_ENV")) w, err := databricks.NewWorkspaceClient() require.NoError(t, err) @@ -41,9 +42,10 @@ func WorkspaceTest(t testutil.TestingT) (context.Context, *WorkspaceT) { // Run the workspace test only on UC workspaces. func UcWorkspaceTest(t testutil.TestingT) (context.Context, *WorkspaceT) { + t.Helper() loadDebugEnvIfRunFromIDE(t, "workspace") - t.Log(testutil.GetEnvOrSkipTest(t, "CLOUD_ENV")) + t.Logf("CLOUD_ENV=%s", testutil.GetEnvOrSkipTest(t, "CLOUD_ENV")) if os.Getenv("TEST_METASTORE_ID") == "" { t.Skipf("Skipping on non-UC workspaces") @@ -67,19 +69,21 @@ func UcWorkspaceTest(t testutil.TestingT) (context.Context, *WorkspaceT) { } func (t *WorkspaceT) TestClusterID() string { + t.Helper() clusterID := testutil.GetEnvOrSkipTest(t, "TEST_BRICKS_CLUSTER_ID") err := t.W.Clusters.EnsureClusterIsRunning(t.ctx, clusterID) - require.NoError(t, err) + require.NoError(t, err, "Unexpected error from EnsureClusterIsRunning for clusterID=%s", clusterID) return clusterID } func (t *WorkspaceT) RunPython(code string) (string, error) { + t.Helper() var err error // Create command executor only once per test. if t.exec == nil { t.exec, err = t.W.CommandExecution.Start(t.ctx, t.TestClusterID(), compute.LanguagePython) - require.NoError(t, err) + require.NoError(t, err, "Unexpected error from CommandExecution.Start(clusterID=%v)", t.TestClusterID()) t.Cleanup(func() { err := t.exec.Destroy(t.ctx) @@ -88,7 +92,7 @@ func (t *WorkspaceT) RunPython(code string) (string, error) { } results, err := t.exec.Execute(t.ctx, code) - require.NoError(t, err) + require.NoError(t, err, "Unexpected error from Execute(%v)", code) require.NotEqual(t, compute.ResultTypeError, results.ResultType, results.Cause) output, ok := results.Data.(string) require.True(t, ok, "unexpected type %T", results.Data) diff --git a/integration/internal/main.go b/integration/internal/main.go deleted file mode 100644 index 6d69dcf70..000000000 --- a/integration/internal/main.go +++ /dev/null @@ -1,20 +0,0 @@ -package internal - -import ( - "fmt" - "os" - "testing" -) - -// Main is the entry point for integration tests. -// We use this for all integration tests defined in this subtree to ensure -// they are not inadvertently executed when calling `go test ./...`. -func Main(m *testing.M) { - value := os.Getenv("CLOUD_ENV") - if value == "" { - fmt.Println("CLOUD_ENV is not set, skipping integration tests") - return - } - - m.Run() -} diff --git a/integration/libs/filer/filer_test.go b/integration/libs/filer/filer_test.go index 766f9817b..bc1713b30 100644 --- a/integration/libs/filer/filer_test.go +++ b/integration/libs/filer/filer_test.go @@ -4,11 +4,9 @@ import ( "bytes" "context" "encoding/json" - "errors" "io" "io/fs" "path" - "regexp" "strings" "testing" @@ -106,7 +104,7 @@ func commonFilerRecursiveDeleteTest(t *testing.T, ctx context.Context, f filer.F for _, e := range entriesBeforeDelete { names = append(names, e.Name()) } - assert.Equal(t, names, []string{"file1", "file2", "subdir1", "subdir2"}) + assert.Equal(t, []string{"file1", "file2", "subdir1", "subdir2"}, names) err = f.Delete(ctx, "dir") assert.ErrorAs(t, err, &filer.DirectoryNotEmptyError{}) @@ -130,11 +128,9 @@ func TestFilerRecursiveDelete(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() // Common tests we run across all filers to ensure consistent behavior. @@ -149,13 +145,13 @@ func commonFilerReadWriteTests(t *testing.T, ctx context.Context, f filer.Filer) // Write should fail because the intermediate directory doesn't exist. err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello world`)) - assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{})) - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorAs(t, err, &filer.NoSuchDirectoryError{}) + assert.ErrorIs(t, err, fs.ErrNotExist) // Read should fail because the intermediate directory doesn't yet exist. _, err = f.Read(ctx, "/foo/bar") - assert.True(t, errors.As(err, &filer.FileDoesNotExistError{})) - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorAs(t, err, &filer.FileDoesNotExistError{}) + assert.ErrorIs(t, err, fs.ErrNotExist) // Read should fail because the path points to a directory err = f.Mkdir(ctx, "/dir") @@ -170,8 +166,8 @@ func commonFilerReadWriteTests(t *testing.T, ctx context.Context, f filer.Filer) // Write should fail because there is an existing file at the specified path. err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello universe`)) - assert.True(t, errors.As(err, &filer.FileAlreadyExistsError{})) - assert.True(t, errors.Is(err, fs.ErrExist)) + assert.ErrorAs(t, err, &filer.FileAlreadyExistsError{}) + assert.ErrorIs(t, err, fs.ErrExist) // Write with OverwriteIfExists should succeed. err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello universe`), filer.OverwriteIfExists) @@ -188,7 +184,7 @@ func commonFilerReadWriteTests(t *testing.T, ctx context.Context, f filer.Filer) require.NoError(t, err) assert.Equal(t, "foo", info.Name()) assert.True(t, info.Mode().IsDir()) - assert.Equal(t, true, info.IsDir()) + assert.True(t, info.IsDir()) // Stat on a file should succeed. // Note: size and modification time behave differently between backends. @@ -196,17 +192,17 @@ func commonFilerReadWriteTests(t *testing.T, ctx context.Context, f filer.Filer) require.NoError(t, err) assert.Equal(t, "bar", info.Name()) assert.True(t, info.Mode().IsRegular()) - assert.Equal(t, false, info.IsDir()) + assert.False(t, info.IsDir()) // Delete should fail if the file doesn't exist. err = f.Delete(ctx, "/doesnt_exist") assert.ErrorAs(t, err, &filer.FileDoesNotExistError{}) - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorIs(t, err, fs.ErrNotExist) // Stat should fail if the file doesn't exist. _, err = f.Stat(ctx, "/doesnt_exist") assert.ErrorAs(t, err, &filer.FileDoesNotExistError{}) - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorIs(t, err, fs.ErrNotExist) // Delete should succeed for file that does exist. err = f.Delete(ctx, "/foo/bar") @@ -215,7 +211,7 @@ func commonFilerReadWriteTests(t *testing.T, ctx context.Context, f filer.Filer) // Delete should fail for a non-empty directory. err = f.Delete(ctx, "/foo") assert.ErrorAs(t, err, &filer.DirectoryNotEmptyError{}) - assert.True(t, errors.Is(err, fs.ErrInvalid)) + assert.ErrorIs(t, err, fs.ErrInvalid) // Delete should succeed for a non-empty directory if the DeleteRecursively flag is set. err = f.Delete(ctx, "/foo", filer.DeleteRecursively) @@ -224,8 +220,8 @@ func commonFilerReadWriteTests(t *testing.T, ctx context.Context, f filer.Filer) // Delete of the filer root should ALWAYS fail, otherwise subsequent writes would fail. // It is not in the filer's purview to delete its root directory. err = f.Delete(ctx, "/") - assert.True(t, errors.As(err, &filer.CannotDeleteRootError{})) - assert.True(t, errors.Is(err, fs.ErrInvalid)) + assert.ErrorAs(t, err, &filer.CannotDeleteRootError{}) + assert.ErrorIs(t, err, fs.ErrInvalid) } func TestFilerReadWrite(t *testing.T) { @@ -241,11 +237,9 @@ func TestFilerReadWrite(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() // Common tests we run across all filers to ensure consistent behavior. @@ -262,7 +256,7 @@ func commonFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { // We start with an empty directory. entries, err := f.ReadDir(ctx, ".") require.NoError(t, err) - assert.Len(t, entries, 0) + assert.Empty(t, entries) // Write a file. err = f.Write(ctx, "/hello.txt", strings.NewReader(`hello world`)) @@ -282,8 +276,8 @@ func commonFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { // Expect an error if the path doesn't exist. _, err = f.ReadDir(ctx, "/dir/a/b/c/d/e") - assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{}), err) - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorAs(t, err, &filer.NoSuchDirectoryError{}, err) + assert.ErrorIs(t, err, fs.ErrNotExist) // Expect two entries in the root. entries, err = f.ReadDir(ctx, ".") @@ -295,7 +289,7 @@ func commonFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { assert.False(t, entries[1].IsDir()) info, err = entries[1].Info() require.NoError(t, err) - assert.Greater(t, info.ModTime().Unix(), int64(0)) + assert.Positive(t, info.ModTime().Unix()) // Expect two entries in the directory. entries, err = f.ReadDir(ctx, "/dir") @@ -307,7 +301,7 @@ func commonFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { assert.False(t, entries[1].IsDir()) info, err = entries[1].Info() require.NoError(t, err) - assert.Greater(t, info.ModTime().Unix(), int64(0)) + assert.Positive(t, info.ModTime().Unix()) // Expect a single entry in the nested path. entries, err = f.ReadDir(ctx, "/dir/a/b") @@ -325,7 +319,7 @@ func commonFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { require.NoError(t, err) entries, err = f.ReadDir(ctx, "empty-dir") assert.NoError(t, err) - assert.Len(t, entries, 0) + assert.Empty(t, entries) // Expect one entry for a directory with a file in it err = f.Write(ctx, "dir-with-one-file/my-file.txt", strings.NewReader("abc"), filer.CreateParentDirectories) @@ -333,7 +327,7 @@ func commonFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { entries, err = f.ReadDir(ctx, "dir-with-one-file") assert.NoError(t, err) assert.Len(t, entries, 1) - assert.Equal(t, entries[0].Name(), "my-file.txt") + assert.Equal(t, "my-file.txt", entries[0].Name()) assert.False(t, entries[0].IsDir()) } @@ -350,11 +344,9 @@ func TestFilerReadDir(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() commonFilerReadDirTest(t, ctx, f) @@ -459,7 +451,7 @@ func TestFilerWorkspaceNotebook(t *testing.T) { // Assert uploading a second time fails due to overwrite mode missing err = f.Write(ctx, tc.name, strings.NewReader(tc.content2)) require.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/`+tc.nameWithoutExt+`$`), err.Error()) + assert.Regexp(t, `file already exists: .*/`+tc.nameWithoutExt+`$`, err.Error()) // Try uploading the notebook again with overwrite flag. This time it should succeed. err = f.Write(ctx, tc.name, strings.NewReader(tc.content2), filer.OverwriteIfExists) diff --git a/integration/libs/filer/main_test.go b/integration/libs/filer/main_test.go deleted file mode 100644 index ca866d952..000000000 --- a/integration/libs/filer/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package filer_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/libs/git/main_test.go b/integration/libs/git/main_test.go deleted file mode 100644 index 5d68e0851..000000000 --- a/integration/libs/git/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package git_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/libs/locker/locker_test.go b/integration/libs/locker/locker_test.go index c51972b90..93cb1ffce 100644 --- a/integration/libs/locker/locker_test.go +++ b/integration/libs/locker/locker_test.go @@ -60,15 +60,14 @@ func TestLock(t *testing.T) { lockerErrs := make([]error, numConcurrentLocks) lockers := make([]*lockpkg.Locker, numConcurrentLocks) - for i := 0; i < numConcurrentLocks; i++ { + for i := range numConcurrentLocks { lockers[i], err = lockpkg.CreateLocker("humpty.dumpty@databricks.com", remoteProjectRoot, wsc) require.NoError(t, err) } var wg sync.WaitGroup - for i := 0; i < numConcurrentLocks; i++ { + for currentIndex := range numConcurrentLocks { wg.Add(1) - currentIndex := i go func() { defer wg.Done() time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond) @@ -80,7 +79,7 @@ func TestLock(t *testing.T) { countActive := 0 indexOfActiveLocker := 0 indexOfAnInactiveLocker := -1 - for i := 0; i < numConcurrentLocks; i++ { + for i := range numConcurrentLocks { if lockers[i].Active { countActive += 1 assert.NoError(t, lockerErrs[i]) @@ -102,7 +101,7 @@ func TestLock(t *testing.T) { assert.True(t, remoteLocker.AcquisitionTime.Equal(lockers[indexOfActiveLocker].State.AcquisitionTime), "remote locker acquisition time does not match active locker") // test all other locks (inactive ones) do not match the remote lock and Unlock fails - for i := 0; i < numConcurrentLocks; i++ { + for i := range numConcurrentLocks { if i == indexOfActiveLocker { continue } @@ -112,7 +111,7 @@ func TestLock(t *testing.T) { } // test inactive locks fail to write a file - for i := 0; i < numConcurrentLocks; i++ { + for i := range numConcurrentLocks { if i == indexOfActiveLocker { continue } @@ -140,7 +139,7 @@ func TestLock(t *testing.T) { assert.Equal(t, "Shah Rukh", res["name"]) // inactive locker file reads fail - for i := 0; i < numConcurrentLocks; i++ { + for i := range numConcurrentLocks { if i == indexOfActiveLocker { continue } diff --git a/integration/libs/locker/main_test.go b/integration/libs/locker/main_test.go deleted file mode 100644 index 33a883768..000000000 --- a/integration/libs/locker/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package locker_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/libs/tags/main_test.go b/integration/libs/tags/main_test.go deleted file mode 100644 index 4eaf54a20..000000000 --- a/integration/libs/tags/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package tags_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/python/main_test.go b/integration/python/main_test.go deleted file mode 100644 index b35da21e1..000000000 --- a/integration/python/main_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package python_test - -import ( - "testing" - - "github.com/databricks/cli/integration/internal" -) - -// TestMain is the entrypoint executed by the test runner. -// See [internal.Main] for prerequisites for running integration tests. -func TestMain(m *testing.M) { - internal.Main(m) -} diff --git a/integration/python/python_tasks_test.go b/integration/python/python_tasks_test.go index 9ad3ed5de..39b38f890 100644 --- a/integration/python/python_tasks_test.go +++ b/integration/python/python_tasks_test.go @@ -5,7 +5,6 @@ import ( "context" "encoding/base64" "encoding/json" - "fmt" "os" "path" "slices" @@ -244,8 +243,8 @@ func prepareDBFSFiles(t *testing.T) *testFiles { return &testFiles{ w: w, pyNotebookPath: path.Join(baseDir, "test.py"), - sparkPythonPath: fmt.Sprintf("dbfs:%s", path.Join(baseDir, "spark.py")), - wheelPath: fmt.Sprintf("dbfs:%s", path.Join(baseDir, "my_test_code-0.0.1-py3-none-any.whl")), + sparkPythonPath: "dbfs:" + path.Join(baseDir, "spark.py"), + wheelPath: "dbfs:" + path.Join(baseDir, "my_test_code-0.0.1-py3-none-any.whl"), } } @@ -266,9 +265,9 @@ func prepareRepoFiles(t *testing.T) *testFiles { func GenerateNotebookTasks(notebookPath string, versions []string, nodeTypeId string) []jobs.SubmitTask { tasks := make([]jobs.SubmitTask, 0) - for i := 0; i < len(versions); i++ { + for i := range versions { task := jobs.SubmitTask{ - TaskKey: fmt.Sprintf("notebook_%s", strings.ReplaceAll(versions[i], ".", "_")), + TaskKey: "notebook_" + strings.ReplaceAll(versions[i], ".", "_"), NotebookTask: &jobs.NotebookTask{ NotebookPath: notebookPath, }, @@ -287,9 +286,9 @@ func GenerateNotebookTasks(notebookPath string, versions []string, nodeTypeId st func GenerateSparkPythonTasks(notebookPath string, versions []string, nodeTypeId string) []jobs.SubmitTask { tasks := make([]jobs.SubmitTask, 0) - for i := 0; i < len(versions); i++ { + for i := range versions { task := jobs.SubmitTask{ - TaskKey: fmt.Sprintf("spark_%s", strings.ReplaceAll(versions[i], ".", "_")), + TaskKey: "spark_" + strings.ReplaceAll(versions[i], ".", "_"), SparkPythonTask: &jobs.SparkPythonTask{ PythonFile: notebookPath, }, @@ -308,9 +307,9 @@ func GenerateSparkPythonTasks(notebookPath string, versions []string, nodeTypeId func GenerateWheelTasks(wheelPath string, versions []string, nodeTypeId string) []jobs.SubmitTask { tasks := make([]jobs.SubmitTask, 0) - for i := 0; i < len(versions); i++ { + for i := range versions { task := jobs.SubmitTask{ - TaskKey: fmt.Sprintf("whl_%s", strings.ReplaceAll(versions[i], ".", "_")), + TaskKey: "whl_" + strings.ReplaceAll(versions[i], ".", "_"), PythonWheelTask: &jobs.PythonWheelTask{ PackageName: "my_test_code", EntryPoint: "run", diff --git a/internal/testcli/golden.go b/internal/testcli/golden.go index 34f38f18a..669cc2f9b 100644 --- a/internal/testcli/golden.go +++ b/internal/testcli/golden.go @@ -3,222 +3,28 @@ package testcli import ( "context" "fmt" - "os" - "regexp" - "slices" - "strings" - "testing" "github.com/databricks/cli/internal/testutil" - "github.com/databricks/cli/libs/iamutil" "github.com/databricks/cli/libs/testdiff" - "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" ) -var OverwriteMode = os.Getenv("TESTS_OUTPUT") == "OVERWRITE" - -func ReadFile(t testutil.TestingT, ctx context.Context, filename string) string { - data, err := os.ReadFile(filename) - if os.IsNotExist(err) { - return "" - } - assert.NoError(t, err) - // On CI, on Windows \n in the file somehow end up as \r\n - return NormalizeNewlines(string(data)) -} - func captureOutput(t testutil.TestingT, ctx context.Context, args []string) string { - t.Logf("run args: [%s]", strings.Join(args, ", ")) + t.Helper() r := NewRunner(t, ctx, args...) stdout, stderr, err := r.Run() assert.NoError(t, err) - out := stderr.String() + stdout.String() - return ReplaceOutput(t, ctx, out) -} - -func WriteFile(t testutil.TestingT, filename, data string) { - t.Logf("Overwriting %s", filename) - err := os.WriteFile(filename, []byte(data), 0o644) - assert.NoError(t, err) + return stderr.String() + stdout.String() } func AssertOutput(t testutil.TestingT, ctx context.Context, args []string, expectedPath string) { - expected := ReadFile(t, ctx, expectedPath) - + t.Helper() out := captureOutput(t, ctx, args) - - if out != expected { - actual := fmt.Sprintf("Output from %v", args) - testdiff.AssertEqualTexts(t, expectedPath, actual, expected, out) - - if OverwriteMode { - WriteFile(t, expectedPath, out) - } - } + testdiff.AssertOutput(t, ctx, out, fmt.Sprintf("Output from %v", args), expectedPath) } func AssertOutputJQ(t testutil.TestingT, ctx context.Context, args []string, expectedPath string, ignorePaths []string) { - expected := ReadFile(t, ctx, expectedPath) - + t.Helper() out := captureOutput(t, ctx, args) - - if out != expected { - actual := fmt.Sprintf("Output from %v", args) - testdiff.AssertEqualJQ(t.(*testing.T), expectedPath, actual, expected, out, ignorePaths) - - if OverwriteMode { - WriteFile(t, expectedPath, out) - } - } -} - -var ( - uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`) - numIdRegex = regexp.MustCompile(`[0-9]{3,}`) - privatePathRegex = regexp.MustCompile(`(/tmp|/private)(/.*)/([a-zA-Z0-9]+)`) -) - -func ReplaceOutput(t testutil.TestingT, ctx context.Context, out string) string { - out = NormalizeNewlines(out) - replacements := GetReplacementsMap(ctx) - if replacements == nil { - t.Fatal("WithReplacementsMap was not called") - } - out = replacements.Replace(out) - out = uuidRegex.ReplaceAllString(out, "") - out = numIdRegex.ReplaceAllString(out, "") - out = privatePathRegex.ReplaceAllString(out, "/tmp/.../$3") - - return out -} - -type key int - -const ( - replacementsMapKey = key(1) -) - -type Replacement struct { - Old string - New string -} - -type ReplacementsContext struct { - Repls []Replacement -} - -func (r *ReplacementsContext) Replace(s string) string { - // QQQ Should probably only replace whole words - for _, repl := range r.Repls { - s = strings.ReplaceAll(s, repl.Old, repl.New) - } - return s -} - -func (r *ReplacementsContext) Set(old, new string) { - if old == "" || new == "" { - return - } - r.Repls = append(r.Repls, Replacement{Old: old, New: new}) -} - -func WithReplacementsMap(ctx context.Context) (context.Context, *ReplacementsContext) { - value := ctx.Value(replacementsMapKey) - if value != nil { - if existingMap, ok := value.(*ReplacementsContext); ok { - return ctx, existingMap - } - } - - newMap := &ReplacementsContext{} - ctx = context.WithValue(ctx, replacementsMapKey, newMap) - return ctx, newMap -} - -func GetReplacementsMap(ctx context.Context) *ReplacementsContext { - value := ctx.Value(replacementsMapKey) - if value != nil { - if existingMap, ok := value.(*ReplacementsContext); ok { - return existingMap - } - } - return nil -} - -func PrepareReplacements(t testutil.TestingT, r *ReplacementsContext, w *databricks.WorkspaceClient) { - // in some clouds (gcp) w.Config.Host includes "https://" prefix in others it's really just a host (azure) - host := strings.TrimPrefix(strings.TrimPrefix(w.Config.Host, "http://"), "https://") - r.Set(host, "$DATABRICKS_HOST") - r.Set(w.Config.ClusterID, "$DATABRICKS_CLUSTER_ID") - r.Set(w.Config.WarehouseID, "$DATABRICKS_WAREHOUSE_ID") - r.Set(w.Config.ServerlessComputeID, "$DATABRICKS_SERVERLESS_COMPUTE_ID") - r.Set(w.Config.MetadataServiceURL, "$DATABRICKS_METADATA_SERVICE_URL") - r.Set(w.Config.AccountID, "$DATABRICKS_ACCOUNT_ID") - r.Set(w.Config.Token, "$DATABRICKS_TOKEN") - r.Set(w.Config.Username, "$DATABRICKS_USERNAME") - r.Set(w.Config.Password, "$DATABRICKS_PASSWORD") - r.Set(w.Config.Profile, "$DATABRICKS_CONFIG_PROFILE") - r.Set(w.Config.ConfigFile, "$DATABRICKS_CONFIG_FILE") - r.Set(w.Config.GoogleServiceAccount, "$DATABRICKS_GOOGLE_SERVICE_ACCOUNT") - r.Set(w.Config.GoogleCredentials, "$GOOGLE_CREDENTIALS") - r.Set(w.Config.AzureResourceID, "$DATABRICKS_AZURE_RESOURCE_ID") - r.Set(w.Config.AzureClientSecret, "$ARM_CLIENT_SECRET") - // r.Set(w.Config.AzureClientID, "$ARM_CLIENT_ID") - r.Set(w.Config.AzureClientID, "$USERNAME") - r.Set(w.Config.AzureTenantID, "$ARM_TENANT_ID") - r.Set(w.Config.ActionsIDTokenRequestURL, "$ACTIONS_ID_TOKEN_REQUEST_URL") - r.Set(w.Config.ActionsIDTokenRequestToken, "$ACTIONS_ID_TOKEN_REQUEST_TOKEN") - r.Set(w.Config.AzureEnvironment, "$ARM_ENVIRONMENT") - r.Set(w.Config.ClientID, "$DATABRICKS_CLIENT_ID") - r.Set(w.Config.ClientSecret, "$DATABRICKS_CLIENT_SECRET") - r.Set(w.Config.DatabricksCliPath, "$DATABRICKS_CLI_PATH") - // This is set to words like "path" that happen too frequently - // r.Set(w.Config.AuthType, "$DATABRICKS_AUTH_TYPE") -} - -func PrepareReplacementsUser(t testutil.TestingT, r *ReplacementsContext, u iam.User) { - // There could be exact matches or overlap between different name fields, so sort them by length - // to ensure we match the largest one first and map them all to the same token - names := []string{ - u.DisplayName, - u.UserName, - iamutil.GetShortUserName(&u), - u.Name.FamilyName, - u.Name.GivenName, - } - if u.Name != nil { - names = append(names, u.Name.FamilyName) - names = append(names, u.Name.GivenName) - } - for _, val := range u.Emails { - names = append(names, val.Value) - } - stableSortReverseLength(names) - - for _, name := range names { - r.Set(name, "$USERNAME") - } - - for ind, val := range u.Groups { - r.Set(val.Value, fmt.Sprintf("$USER.Groups[%d]", ind)) - } - - r.Set(u.Id, "$USER.Id") - - for ind, val := range u.Roles { - r.Set(val.Value, fmt.Sprintf("$USER.Roles[%d]", ind)) - } -} - -func stableSortReverseLength(strs []string) { - slices.SortStableFunc(strs, func(a, b string) int { - return len(b) - len(a) - }) -} - -func NormalizeNewlines(input string) string { - output := strings.ReplaceAll(input, "\r\n", "\n") - return strings.ReplaceAll(output, "\r", "\n") + testdiff.AssertOutputJQ(t, ctx, out, fmt.Sprintf("Output from %v", args), expectedPath, ignorePaths) } diff --git a/internal/testcli/golden_test.go b/internal/testcli/golden_test.go deleted file mode 100644 index 215bf33d3..000000000 --- a/internal/testcli/golden_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package testcli - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestSort(t *testing.T) { - input := []string{"a", "bc", "cd"} - stableSortReverseLength(input) - assert.Equal(t, []string{"bc", "cd", "a"}, input) -} diff --git a/internal/testcli/runner.go b/internal/testcli/runner.go index 95073b57c..f462f44fc 100644 --- a/internal/testcli/runner.go +++ b/internal/testcli/runner.go @@ -6,13 +6,10 @@ import ( "context" "encoding/json" "io" - "reflect" "strings" "sync" "time" - "github.com/spf13/cobra" - "github.com/spf13/pflag" "github.com/stretchr/testify/require" "github.com/databricks/cli/cmd" @@ -42,6 +39,8 @@ type Runner struct { StderrLines <-chan string errch <-chan error + + Verbose bool } func consumeLines(ctx context.Context, wg *sync.WaitGroup, r io.Reader) <-chan string { @@ -68,38 +67,6 @@ func consumeLines(ctx context.Context, wg *sync.WaitGroup, r io.Reader) <-chan s return ch } -func (r *Runner) registerFlagCleanup(c *cobra.Command) { - // Find target command that will be run. Example: if the command run is `databricks fs cp`, - // target command corresponds to `cp` - targetCmd, _, err := c.Find(r.args) - if err != nil && strings.HasPrefix(err.Error(), "unknown command") { - // even if command is unknown, we can proceed - require.NotNil(r, targetCmd) - } else { - require.NoError(r, err) - } - - // Force initialization of default flags. - // These are initialized by cobra at execution time and would otherwise - // not be cleaned up by the cleanup function below. - targetCmd.InitDefaultHelpFlag() - targetCmd.InitDefaultVersionFlag() - - // Restore flag values to their original value on test completion. - targetCmd.Flags().VisitAll(func(f *pflag.Flag) { - v := reflect.ValueOf(f.Value) - if v.Kind() == reflect.Ptr { - v = v.Elem() - } - // Store copy of the current flag value. - reset := reflect.New(v.Type()).Elem() - reset.Set(v) - r.Cleanup(func() { - v.Set(reset) - }) - }) -} - // Like [Runner.Eventually], but more specific func (r *Runner) WaitForTextPrinted(text string, timeout time.Duration) { r.Eventually(func() bool { @@ -158,12 +125,6 @@ func (r *Runner) RunBackground() { cli.SetIn(r.stdinR) } - // Register cleanup function to restore flags to their original values - // once test has been executed. This is needed because flag values reside - // in a global singleton data-structure, and thus subsequent tests might - // otherwise interfere with each other - r.registerFlagCleanup(cli) - errch := make(chan error) ctx, cancel := context.WithCancel(ctx) @@ -180,7 +141,9 @@ func (r *Runner) RunBackground() { go func() { err := root.Execute(ctx, cli) if err != nil { - r.Logf("Error running command: %s", err) + if r.Verbose { + r.Logf("Error running command: %s", err) + } } // Close pipes to signal EOF. @@ -195,7 +158,9 @@ func (r *Runner) RunBackground() { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(r.stdout.Bytes())) for scanner.Scan() { - r.Logf("[databricks stdout]: %s", scanner.Text()) + if r.Verbose { + r.Logf("[databricks stdout]: %s", scanner.Text()) + } } } @@ -203,16 +168,12 @@ func (r *Runner) RunBackground() { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(r.stderr.Bytes())) for scanner.Scan() { - r.Logf("[databricks stderr]: %s", scanner.Text()) + if r.Verbose { + r.Logf("[databricks stderr]: %s", scanner.Text()) + } } } - // Reset context on command for the next test. - // These commands are globals so we have to clean up to the best of our ability after each run. - // See https://github.com/spf13/cobra/blob/a6f198b635c4b18fff81930c40d464904e55b161/command.go#L1062-L1066 - //nolint:staticcheck // cobra sets the context and doesn't clear it - cli.SetContext(nil) - // Make caller aware of error. errch <- err close(errch) @@ -230,13 +191,56 @@ func (r *Runner) RunBackground() { } func (r *Runner) Run() (bytes.Buffer, bytes.Buffer, error) { - r.RunBackground() - err := <-r.errch - return r.stdout, r.stderr, err + r.Helper() + var stdout, stderr bytes.Buffer + ctx := cmdio.NewContext(r.ctx, &cmdio.Logger{ + Mode: flags.ModeAppend, + Reader: bufio.Reader{}, + Writer: &stderr, + }) + + cli := cmd.New(ctx) + cli.SetOut(&stdout) + cli.SetErr(&stderr) + cli.SetArgs(r.args) + + if r.Verbose { + r.Logf(" args: %s", strings.Join(r.args, ", ")) + } + + err := root.Execute(ctx, cli) + if err != nil { + if r.Verbose { + r.Logf(" error: %s", err) + } + } + + if stdout.Len() > 0 { + // Make a copy of the buffer such that it remains "unread". + scanner := bufio.NewScanner(bytes.NewBuffer(stdout.Bytes())) + for scanner.Scan() { + if r.Verbose { + r.Logf("stdout: %s", scanner.Text()) + } + } + } + + if stderr.Len() > 0 { + // Make a copy of the buffer such that it remains "unread". + scanner := bufio.NewScanner(bytes.NewBuffer(stderr.Bytes())) + for scanner.Scan() { + if r.Verbose { + r.Logf("stderr: %s", scanner.Text()) + } + } + } + + return stdout, stderr, err } // Like [require.Eventually] but errors if the underlying command has failed. func (r *Runner) Eventually(condition func() bool, waitFor, tick time.Duration, msgAndArgs ...any) { + r.Helper() ch := make(chan bool, 1) timer := time.NewTimer(waitFor) @@ -269,12 +273,14 @@ func (r *Runner) Eventually(condition func() bool, waitFor, tick time.Duration, } func (r *Runner) RunAndExpectOutput(heredoc string) { + r.Helper() stdout, _, err := r.Run() require.NoError(r, err) require.Equal(r, cmdio.Heredoc(heredoc), strings.TrimSpace(stdout.String())) } func (r *Runner) RunAndParseJSON(v any) { + r.Helper() stdout, _, err := r.Run() require.NoError(r, err) err = json.Unmarshal(stdout.Bytes(), &v) @@ -285,13 +291,14 @@ func NewRunner(t testutil.TestingT, ctx context.Context, args ...string) *Runner return &Runner{ TestingT: t, - ctx: ctx, - args: args, + ctx: ctx, + args: args, + Verbose: true, } } func RequireSuccessfulRun(t testutil.TestingT, ctx context.Context, args ...string) (bytes.Buffer, bytes.Buffer) { - t.Logf("run args: [%s]", strings.Join(args, ", ")) + t.Helper() r := NewRunner(t, ctx, args...) stdout, stderr, err := r.Run() require.NoError(t, err) @@ -299,6 +306,7 @@ func RequireSuccessfulRun(t testutil.TestingT, ctx context.Context, args ...stri } func RequireErrorRun(t testutil.TestingT, ctx context.Context, args ...string) (bytes.Buffer, bytes.Buffer, error) { + t.Helper() r := NewRunner(t, ctx, args...) stdout, stderr, err := r.Run() require.Error(t, err) diff --git a/internal/testutil/file.go b/internal/testutil/file.go index 538a3c20a..476c4123a 100644 --- a/internal/testutil/file.go +++ b/internal/testutil/file.go @@ -4,6 +4,7 @@ import ( "os" "path/filepath" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -52,3 +53,31 @@ func ReadFile(t TestingT, path string) string { return string(b) } + +// StatFile returns the file info for a file. +func StatFile(t TestingT, path string) os.FileInfo { + fi, err := os.Stat(path) + require.NoError(t, err) + + return fi +} + +// AssertFileContents asserts that the file at path has the expected content. +func AssertFileContents(t TestingT, path, expected string) bool { + actual := ReadFile(t, path) + return assert.Equal(t, expected, actual) +} + +// AssertFilePermissions asserts that the file at path has the expected permissions. +func AssertFilePermissions(t TestingT, path string, expected os.FileMode) bool { + fi := StatFile(t, path) + assert.False(t, fi.Mode().IsDir(), "expected a file, got a directory") + return assert.Equal(t, expected, fi.Mode().Perm(), "expected 0%o, got 0%o", expected, fi.Mode().Perm()) +} + +// AssertDirPermissions asserts that the file at path has the expected permissions. +func AssertDirPermissions(t TestingT, path string, expected os.FileMode) bool { + fi := StatFile(t, path) + assert.True(t, fi.Mode().IsDir(), "expected a directory, got a file") + return assert.Equal(t, expected, fi.Mode().Perm(), "expected 0%o, got 0%o", expected, fi.Mode().Perm()) +} diff --git a/internal/testutil/helpers.go b/internal/testutil/helpers.go index 019a8e618..44c2c9375 100644 --- a/internal/testutil/helpers.go +++ b/internal/testutil/helpers.go @@ -5,6 +5,9 @@ import ( "math/rand" "os" "strings" + "time" + + "github.com/stretchr/testify/require" ) // GetEnvOrSkipTest proceeds with test only with that env variable. @@ -30,3 +33,12 @@ func RandomName(prefix ...string) string { } return string(b) } + +func SkipUntil(t TestingT, date string) { + deadline, err := time.Parse(time.DateOnly, date) + require.NoError(t, err) + + if time.Now().Before(deadline) { + t.Skipf("Skipping test until %s. Time right now: %s", deadline.Format(time.DateOnly), time.Now()) + } +} diff --git a/internal/testutil/interface.go b/internal/testutil/interface.go index 2c3004800..97441212d 100644 --- a/internal/testutil/interface.go +++ b/internal/testutil/interface.go @@ -24,4 +24,6 @@ type TestingT interface { Setenv(key, value string) TempDir() string + + Helper() } diff --git a/libs/auth/cache/file_test.go b/libs/auth/cache/file_test.go index 3e4aae36f..54964bed3 100644 --- a/libs/auth/cache/file_test.go +++ b/libs/auth/cache/file_test.go @@ -42,7 +42,7 @@ func TestStoreAndLookup(t *testing.T) { tok, err := l.Lookup("x") require.NoError(t, err) assert.Equal(t, "abc", tok.AccessToken) - assert.Equal(t, 2, len(l.Tokens)) + assert.Len(t, l.Tokens, 2) _, err = l.Lookup("z") assert.Equal(t, ErrNotConfigured, err) diff --git a/libs/auth/env.go b/libs/auth/env.go new file mode 100644 index 000000000..c58cc53e3 --- /dev/null +++ b/libs/auth/env.go @@ -0,0 +1,26 @@ +package auth + +import "github.com/databricks/databricks-sdk-go/config" + +// Env generates the authentication environment variables we need to set for +// downstream applications from the CLI to work correctly. +func Env(cfg *config.Config) map[string]string { + out := make(map[string]string) + for _, attr := range config.ConfigAttributes { + // Ignore profile so that downstream tools don't try and reload + // the profile. We know the current configuration is already valid since + // otherwise the CLI would have thrown an error when loading it. + if attr.Name == "profile" { + continue + } + if len(attr.EnvVars) == 0 { + continue + } + if attr.IsZero(cfg) { + continue + } + out[attr.EnvVars[0]] = attr.GetString(cfg) + } + + return out +} diff --git a/libs/auth/env_test.go b/libs/auth/env_test.go new file mode 100644 index 000000000..be1cfc7ac --- /dev/null +++ b/libs/auth/env_test.go @@ -0,0 +1,42 @@ +package auth + +import ( + "testing" + + "github.com/databricks/databricks-sdk-go/config" + "github.com/stretchr/testify/assert" +) + +func TestAuthEnv(t *testing.T) { + in := &config.Config{ + Profile: "thisshouldbeignored", + Host: "https://test.com", + Token: "test-token", + Password: "test-password", + MetadataServiceURL: "http://somurl.com", + + AzureUseMSI: true, + AzureTenantID: "test-tenant-id", + AzureClientID: "test-client-id", + AzureClientSecret: "test-client-secret", + + ActionsIDTokenRequestToken: "test-actions-id-token-request-token", + } + + expected := map[string]string{ + "DATABRICKS_HOST": "https://test.com", + "DATABRICKS_TOKEN": "test-token", + "DATABRICKS_PASSWORD": "test-password", + "DATABRICKS_METADATA_SERVICE_URL": "http://somurl.com", + + "ARM_USE_MSI": "true", + "ARM_TENANT_ID": "test-tenant-id", + "ARM_CLIENT_ID": "test-client-id", + "ARM_CLIENT_SECRET": "test-client-secret", + + "ACTIONS_ID_TOKEN_REQUEST_TOKEN": "test-actions-id-token-request-token", + } + + out := Env(in) + assert.Equal(t, expected, out) +} diff --git a/libs/auth/oauth.go b/libs/auth/oauth.go index 026c45468..1037a5a85 100644 --- a/libs/auth/oauth.go +++ b/libs/auth/oauth.go @@ -107,7 +107,7 @@ func (a *PersistentAuth) Load(ctx context.Context) (*oauth2.Token, error) { func (a *PersistentAuth) ProfileName() string { if a.AccountID != "" { - return fmt.Sprintf("ACCOUNT-%s", a.AccountID) + return "ACCOUNT-" + a.AccountID } host := strings.TrimPrefix(a.Host, "https://") split := strings.Split(host, ".") @@ -210,12 +210,12 @@ func (a *PersistentAuth) oidcEndpoints(ctx context.Context) (*oauthAuthorization prefix := a.key() if a.AccountID != "" { return &oauthAuthorizationServer{ - AuthorizationEndpoint: fmt.Sprintf("%s/v1/authorize", prefix), - TokenEndpoint: fmt.Sprintf("%s/v1/token", prefix), + AuthorizationEndpoint: prefix + "/v1/authorize", + TokenEndpoint: prefix + "/v1/token", }, nil } var oauthEndpoints oauthAuthorizationServer - oidc := fmt.Sprintf("%s/oidc/.well-known/oauth-authorization-server", prefix) + oidc := prefix + "/oidc/.well-known/oauth-authorization-server" err := a.http.Do(ctx, "GET", oidc, httpclient.WithResponseUnmarshal(&oauthEndpoints)) if err != nil { return nil, fmt.Errorf("fetch .well-known: %w", err) @@ -247,7 +247,7 @@ func (a *PersistentAuth) oauth2Config(ctx context.Context) (*oauth2.Config, erro TokenURL: endpoints.TokenEndpoint, AuthStyle: oauth2.AuthStyleInParams, }, - RedirectURL: fmt.Sprintf("http://%s", appRedirectAddr), + RedirectURL: "http://" + appRedirectAddr, Scopes: scopes, }, nil } @@ -258,7 +258,7 @@ func (a *PersistentAuth) oauth2Config(ctx context.Context) (*oauth2.Config, erro func (a *PersistentAuth) key() string { a.Host = strings.TrimSuffix(a.Host, "/") if !strings.HasPrefix(a.Host, "http") { - a.Host = fmt.Sprintf("https://%s", a.Host) + a.Host = "https://" + a.Host } if a.AccountID != "" { return fmt.Sprintf("%s/oidc/accounts/%s", a.Host, a.AccountID) diff --git a/libs/auth/oauth_test.go b/libs/auth/oauth_test.go index 837ff4fee..6c3b9bf47 100644 --- a/libs/auth/oauth_test.go +++ b/libs/auth/oauth_test.go @@ -112,7 +112,7 @@ func TestLoadRefresh(t *testing.T) { }, }.ApplyClient(t, func(ctx context.Context, c *client.DatabricksClient) { ctx = useInsecureOAuthHttpClientForTests(ctx) - expectedKey := fmt.Sprintf("%s/oidc/accounts/xyz", c.Config.Host) + expectedKey := c.Config.Host + "/oidc/accounts/xyz" p := &PersistentAuth{ Host: c.Config.Host, AccountID: "xyz", @@ -149,7 +149,7 @@ func TestChallenge(t *testing.T) { }, }.ApplyClient(t, func(ctx context.Context, c *client.DatabricksClient) { ctx = useInsecureOAuthHttpClientForTests(ctx) - expectedKey := fmt.Sprintf("%s/oidc/accounts/xyz", c.Config.Host) + expectedKey := c.Config.Host + "/oidc/accounts/xyz" browserOpened := make(chan string) p := &PersistentAuth{ diff --git a/libs/cmdio/error_event.go b/libs/cmdio/error_event.go index 933f9d0d0..62897995b 100644 --- a/libs/cmdio/error_event.go +++ b/libs/cmdio/error_event.go @@ -1,13 +1,11 @@ package cmdio -import "fmt" - type ErrorEvent struct { Error string `json:"error"` } func (event *ErrorEvent) String() string { - return fmt.Sprintf("Error: %s", event.Error) + return "Error: " + event.Error } func (event *ErrorEvent) IsInplaceSupported() bool { diff --git a/libs/cmdio/io.go b/libs/cmdio/io.go index c0e9e868a..11b75157d 100644 --- a/libs/cmdio/io.go +++ b/libs/cmdio/io.go @@ -285,3 +285,14 @@ func fromContext(ctx context.Context) *cmdIO { } return io } + +// Mocks the context with a cmdio object that discards all output. +func MockDiscard(ctx context.Context) context.Context { + return InContext(ctx, &cmdIO{ + interactive: false, + outputFormat: flags.OutputText, + in: io.NopCloser(strings.NewReader("")), + out: io.Discard, + err: io.Discard, + }) +} diff --git a/libs/cmdio/logger.go b/libs/cmdio/logger.go index 7bc95e9a5..48b76ce42 100644 --- a/libs/cmdio/logger.go +++ b/libs/cmdio/logger.go @@ -4,6 +4,7 @@ import ( "bufio" "context" "encoding/json" + "errors" "fmt" "io" "os" @@ -124,7 +125,7 @@ func splitAtLastNewLine(s string) (string, string) { func (l *Logger) AskSelect(question string, choices []string) (string, error) { if l.Mode == flags.ModeJson { - return "", fmt.Errorf("question prompts are not supported in json mode") + return "", errors.New("question prompts are not supported in json mode") } // Promptui does not support multiline prompts. So we split the question. @@ -140,7 +141,7 @@ func (l *Logger) AskSelect(question string, choices []string) (string, error) { HideHelp: true, Templates: &promptui.SelectTemplates{ Label: "{{.}}: ", - Selected: fmt.Sprintf("%s: {{.}}", last), + Selected: last + ": {{.}}", }, } @@ -153,7 +154,7 @@ func (l *Logger) AskSelect(question string, choices []string) (string, error) { func (l *Logger) Ask(question, defaultVal string) (string, error) { if l.Mode == flags.ModeJson { - return "", fmt.Errorf("question prompts are not supported in json mode") + return "", errors.New("question prompts are not supported in json mode") } // Add default value to question prompt. @@ -188,7 +189,7 @@ func (l *Logger) writeJson(event Event) { // we panic because there we cannot catch this in jobs.RunNowAndWait panic(err) } - _, _ = l.Writer.Write([]byte(b)) + _, _ = l.Writer.Write(b) _, _ = l.Writer.Write([]byte("\n")) } diff --git a/libs/cmdio/render.go b/libs/cmdio/render.go index 1529274a3..1a6aadcfa 100644 --- a/libs/cmdio/render.go +++ b/libs/cmdio/render.go @@ -39,7 +39,7 @@ func Heredoc(tmpl string) (trimmed string) { break } } - for i := 0; i < len(lines); i++ { + for i := range lines { if lines[i] == "" || strings.TrimSpace(lines[i]) == "" { continue } diff --git a/libs/cmdio/render_test.go b/libs/cmdio/render_test.go index f26190a23..51b385b1d 100644 --- a/libs/cmdio/render_test.go +++ b/libs/cmdio/render_test.go @@ -55,7 +55,7 @@ func (d *dummyIterator) Next(ctx context.Context) (*provisioning.Workspace, erro func makeWorkspaces(count int) []*provisioning.Workspace { res := make([]*provisioning.Workspace, 0, count) next := []*provisioning.Workspace{&dummyWorkspace1, &dummyWorkspace2} - for i := 0; i < count; i++ { + for range count { n := next[0] next = append(next[1:], n) res = append(res, n) @@ -74,7 +74,7 @@ func makeIterator(count int) listing.Iterator[*provisioning.Workspace] { func makeBigOutput(count int) string { res := bytes.Buffer{} for _, ws := range makeWorkspaces(count) { - res.Write([]byte(fmt.Sprintf("%d %s\n", ws.WorkspaceId, ws.WorkspaceName))) + res.WriteString(fmt.Sprintf("%d %s\n", ws.WorkspaceId, ws.WorkspaceName)) } return res.String() } diff --git a/libs/databrickscfg/cfgpickers/clusters.go b/libs/databrickscfg/cfgpickers/clusters.go index 6ae7d99c6..ba920b59b 100644 --- a/libs/databrickscfg/cfgpickers/clusters.go +++ b/libs/databrickscfg/cfgpickers/clusters.go @@ -33,7 +33,7 @@ func GetRuntimeVersion(cluster compute.ClusterDetails) (string, bool) { match = dbrSnapshotVersionRegex.FindStringSubmatch(cluster.SparkVersion) if len(match) > 1 { // we return 14.999 for 14.x-snapshot for semver.Compare() to work properly - return fmt.Sprintf("%s.999", match[1]), true + return match[1] + ".999", true } return "", false } @@ -136,7 +136,18 @@ func loadInteractiveClusters(ctx context.Context, w *databricks.WorkspaceClient, promptSpinner := cmdio.Spinner(ctx) promptSpinner <- "Loading list of clusters to select from" defer close(promptSpinner) - all, err := w.Clusters.ListAll(ctx, compute.ListClustersRequest{}) + all, err := w.Clusters.ListAll(ctx, compute.ListClustersRequest{ + // Maximum page size to optimize for load time. + PageSize: 100, + + // Filter out system clusters. + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{ + compute.ClusterSourceApi, + compute.ClusterSourceUi, + }, + }, + }) if err != nil { return nil, fmt.Errorf("list clusters: %w", err) } diff --git a/libs/databrickscfg/cfgpickers/clusters_test.go b/libs/databrickscfg/cfgpickers/clusters_test.go index cde09aa44..840916e91 100644 --- a/libs/databrickscfg/cfgpickers/clusters_test.go +++ b/libs/databrickscfg/cfgpickers/clusters_test.go @@ -1,12 +1,10 @@ package cfgpickers import ( - "bytes" "context" "testing" "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/flags" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/qa" "github.com/databricks/databricks-sdk-go/service/compute" @@ -70,7 +68,7 @@ func TestFirstCompatibleCluster(t *testing.T) { cfg, server := qa.HTTPFixtures{ { Method: "GET", - Resource: "/api/2.1/clusters/list?", + Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=API&filter_by.cluster_sources=UI&page_size=100", Response: compute.ListClustersResponse{ Clusters: []compute.ClusterDetails{ { @@ -114,8 +112,8 @@ func TestFirstCompatibleCluster(t *testing.T) { defer server.Close() w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg))) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx := cmdio.MockDiscard(context.Background()) + clusterID, err := AskForCluster(ctx, w, WithDatabricksConnect("13.1")) require.NoError(t, err) require.Equal(t, "bcd-id", clusterID) @@ -125,7 +123,7 @@ func TestNoCompatibleClusters(t *testing.T) { cfg, server := qa.HTTPFixtures{ { Method: "GET", - Resource: "/api/2.1/clusters/list?", + Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=API&filter_by.cluster_sources=UI&page_size=100", Response: compute.ListClustersResponse{ Clusters: []compute.ClusterDetails{ { @@ -161,8 +159,7 @@ func TestNoCompatibleClusters(t *testing.T) { defer server.Close() w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg))) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx := cmdio.MockDiscard(context.Background()) _, err := AskForCluster(ctx, w, WithDatabricksConnect("13.1")) require.Equal(t, ErrNoCompatibleClusters, err) } diff --git a/libs/databrickscfg/loader.go b/libs/databrickscfg/loader.go index 12a516c59..84c8398bf 100644 --- a/libs/databrickscfg/loader.go +++ b/libs/databrickscfg/loader.go @@ -19,7 +19,7 @@ var errNoMatchingProfiles = errors.New("no matching config profiles found") type errMultipleProfiles []string func (e errMultipleProfiles) Error() string { - return fmt.Sprintf("multiple profiles matched: %s", strings.Join(e, ", ")) + return "multiple profiles matched: " + strings.Join(e, ", ") } func findMatchingProfile(configFile *config.File, matcher func(*ini.Section) bool) (*ini.Section, error) { diff --git a/libs/databrickscfg/ops_test.go b/libs/databrickscfg/ops_test.go index 3ea92024c..dd8484fb7 100644 --- a/libs/databrickscfg/ops_test.go +++ b/libs/databrickscfg/ops_test.go @@ -216,7 +216,7 @@ func TestSaveToProfile_ClearingPreviousProfile(t *testing.T) { dlft, err := file.GetSection("DEFAULT") assert.NoError(t, err) - assert.Len(t, dlft.KeysHash(), 0) + assert.Empty(t, dlft.KeysHash()) abc, err := file.GetSection("abc") assert.NoError(t, err) diff --git a/libs/databrickscfg/profile/file_test.go b/libs/databrickscfg/profile/file_test.go index 8e5cfefc0..6bcaec4b7 100644 --- a/libs/databrickscfg/profile/file_test.go +++ b/libs/databrickscfg/profile/file_test.go @@ -11,10 +11,10 @@ import ( ) func TestProfileCloud(t *testing.T) { - assert.Equal(t, Profile{Host: "https://dbc-XXXXXXXX-YYYY.cloud.databricks.com"}.Cloud(), "AWS") - assert.Equal(t, Profile{Host: "https://adb-xxx.y.azuredatabricks.net/"}.Cloud(), "Azure") - assert.Equal(t, Profile{Host: "https://workspace.gcp.databricks.com/"}.Cloud(), "GCP") - assert.Equal(t, Profile{Host: "https://some.invalid.host.com/"}.Cloud(), "AWS") + assert.Equal(t, "AWS", Profile{Host: "https://dbc-XXXXXXXX-YYYY.cloud.databricks.com"}.Cloud()) + assert.Equal(t, "Azure", Profile{Host: "https://adb-xxx.y.azuredatabricks.net/"}.Cloud()) + assert.Equal(t, "GCP", Profile{Host: "https://workspace.gcp.databricks.com/"}.Cloud()) + assert.Equal(t, "AWS", Profile{Host: "https://some.invalid.host.com/"}.Cloud()) } func TestProfilesSearchCaseInsensitive(t *testing.T) { diff --git a/libs/diag/diagnostic.go b/libs/diag/diagnostic.go index a4f8c7b6b..0c7699b4e 100644 --- a/libs/diag/diagnostic.go +++ b/libs/diag/diagnostic.go @@ -86,6 +86,16 @@ func Infof(format string, args ...any) Diagnostics { } } +// Recommendationf creates a new recommendation diagnostic. +func Recommendationf(format string, args ...any) Diagnostics { + return []Diagnostic{ + { + Severity: Recommendation, + Summary: fmt.Sprintf(format, args...), + }, + } +} + // Diagnostics holds zero or more instances of [Diagnostic]. type Diagnostics []Diagnostic diff --git a/libs/dyn/convert/from_typed.go b/libs/dyn/convert/from_typed.go index ed1b85a36..3de017b75 100644 --- a/libs/dyn/convert/from_typed.go +++ b/libs/dyn/convert/from_typed.go @@ -209,7 +209,7 @@ func fromTypedSlice(src reflect.Value, ref dyn.Value) (dyn.Value, error) { } out := make([]dyn.Value, src.Len()) - for i := 0; i < src.Len(); i++ { + for i := range src.Len() { v := src.Index(i) refv := ref.Index(i) diff --git a/libs/dyn/convert/normalize.go b/libs/dyn/convert/normalize.go index 31cd8b6e3..ee26d5afc 100644 --- a/libs/dyn/convert/normalize.go +++ b/libs/dyn/convert/normalize.go @@ -97,7 +97,7 @@ func (n normalizeOptions) normalizeStruct(typ reflect.Type, src dyn.Value, seen if !pv.IsAnchor() { diags = diags.Append(diag.Diagnostic{ Severity: diag.Warning, - Summary: fmt.Sprintf("unknown field: %s", pk.MustString()), + Summary: "unknown field: " + pk.MustString(), // Show all locations the unknown field is defined at. Locations: pk.Locations(), Paths: []dyn.Path{path}, diff --git a/libs/dyn/convert/struct_info.go b/libs/dyn/convert/struct_info.go index f5fd29cb9..1e34008e2 100644 --- a/libs/dyn/convert/struct_info.go +++ b/libs/dyn/convert/struct_info.go @@ -65,7 +65,7 @@ func buildStructInfo(typ reflect.Type) structInfo { } nf := styp.NumField() - for j := 0; j < nf; j++ { + for j := range nf { sf := styp.Field(j) // Recurse into anonymous fields. diff --git a/libs/dyn/jsonloader/json.go b/libs/dyn/jsonloader/json.go index 3f2dc859f..708fc401f 100644 --- a/libs/dyn/jsonloader/json.go +++ b/libs/dyn/jsonloader/json.go @@ -3,6 +3,7 @@ package jsonloader import ( "bytes" "encoding/json" + "errors" "fmt" "io" @@ -20,7 +21,7 @@ func LoadJSON(data []byte, source string) (dyn.Value, error) { value, err := decodeValue(decoder, &offset) if err != nil { if err == io.EOF { - err = fmt.Errorf("unexpected end of JSON input") + err = errors.New("unexpected end of JSON input") } return dyn.InvalidValue, fmt.Errorf("error decoding JSON at %s: %v", value.Location(), err) } @@ -57,7 +58,7 @@ func decodeValue(decoder *json.Decoder, o *Offset) (dyn.Value, error) { } key, ok := keyToken.(string) if !ok { - return invalidValueWithLocation(decoder, o), fmt.Errorf("expected string for object key") + return invalidValueWithLocation(decoder, o), errors.New("expected string for object key") } // Get the offset of the key by subtracting the length of the key and the '"' character diff --git a/libs/dyn/location.go b/libs/dyn/location.go index 961d2f121..d2b2ad596 100644 --- a/libs/dyn/location.go +++ b/libs/dyn/location.go @@ -1,6 +1,7 @@ package dyn import ( + "errors" "fmt" "path/filepath" ) @@ -17,7 +18,7 @@ func (l Location) String() string { func (l Location) Directory() (string, error) { if l.File == "" { - return "", fmt.Errorf("no file in location") + return "", errors.New("no file in location") } return filepath.Dir(l.File), nil diff --git a/libs/dyn/mapping_test.go b/libs/dyn/mapping_test.go index 43b24b0c5..d0347d22a 100644 --- a/libs/dyn/mapping_test.go +++ b/libs/dyn/mapping_test.go @@ -1,7 +1,7 @@ package dyn_test import ( - "fmt" + "strconv" "testing" "github.com/databricks/cli/libs/dyn" @@ -185,14 +185,14 @@ func TestMappingClone(t *testing.T) { func TestMappingMerge(t *testing.T) { var m1 dyn.Mapping - for i := 0; i < 10; i++ { - err := m1.Set(dyn.V(fmt.Sprintf("%d", i)), dyn.V(i)) + for i := range 10 { + err := m1.Set(dyn.V(strconv.Itoa(i)), dyn.V(i)) require.NoError(t, err) } var m2 dyn.Mapping for i := 5; i < 15; i++ { - err := m2.Set(dyn.V(fmt.Sprintf("%d", i)), dyn.V(i)) + err := m2.Set(dyn.V(strconv.Itoa(i)), dyn.V(i)) require.NoError(t, err) } diff --git a/libs/dyn/merge/elements_by_key.go b/libs/dyn/merge/elements_by_key.go index e6e640d14..df393003a 100644 --- a/libs/dyn/merge/elements_by_key.go +++ b/libs/dyn/merge/elements_by_key.go @@ -7,7 +7,7 @@ type elementsByKey struct { keyFunc func(dyn.Value) string } -func (e elementsByKey) Map(_ dyn.Path, v dyn.Value) (dyn.Value, error) { +func (e elementsByKey) doMap(_ dyn.Path, v dyn.Value, mergeFunc func(a, b dyn.Value) (dyn.Value, error)) (dyn.Value, error) { // We know the type of this value is a sequence. // For additional defence, return self if it is not. elements, ok := v.AsSequence() @@ -33,7 +33,7 @@ func (e elementsByKey) Map(_ dyn.Path, v dyn.Value) (dyn.Value, error) { } // Merge this instance into the reference. - nv, err := Merge(ref, elements[i]) + nv, err := mergeFunc(ref, elements[i]) if err != nil { return v, err } @@ -55,6 +55,26 @@ func (e elementsByKey) Map(_ dyn.Path, v dyn.Value) (dyn.Value, error) { return dyn.NewValue(out, v.Locations()), nil } +func (e elementsByKey) Map(_ dyn.Path, v dyn.Value) (dyn.Value, error) { + return e.doMap(nil, v, Merge) +} + +func (e elementsByKey) MapWithOverride(p dyn.Path, v dyn.Value) (dyn.Value, error) { + return e.doMap(nil, v, func(a, b dyn.Value) (dyn.Value, error) { + return Override(a, b, OverrideVisitor{ + VisitInsert: func(_ dyn.Path, v dyn.Value) (dyn.Value, error) { + return v, nil + }, + VisitDelete: func(valuePath dyn.Path, left dyn.Value) error { + return nil + }, + VisitUpdate: func(_ dyn.Path, a, b dyn.Value) (dyn.Value, error) { + return b, nil + }, + }) + }) +} + // ElementsByKey returns a [dyn.MapFunc] that operates on a sequence // where each element is a map. It groups elements by a key and merges // elements with the same key. @@ -65,3 +85,7 @@ func (e elementsByKey) Map(_ dyn.Path, v dyn.Value) (dyn.Value, error) { func ElementsByKey(key string, keyFunc func(dyn.Value) string) dyn.MapFunc { return elementsByKey{key, keyFunc}.Map } + +func ElementsByKeyWithOverride(key string, keyFunc func(dyn.Value) string) dyn.MapFunc { + return elementsByKey{key, keyFunc}.MapWithOverride +} diff --git a/libs/dyn/merge/elements_by_key_test.go b/libs/dyn/merge/elements_by_key_test.go index ef316cc66..09efece07 100644 --- a/libs/dyn/merge/elements_by_key_test.go +++ b/libs/dyn/merge/elements_by_key_test.go @@ -50,3 +50,42 @@ func TestElementByKey(t *testing.T) { }, ) } + +func TestElementByKeyWithOverride(t *testing.T) { + vin := dyn.V([]dyn.Value{ + dyn.V(map[string]dyn.Value{ + "key": dyn.V("foo"), + "value": dyn.V(42), + }), + dyn.V(map[string]dyn.Value{ + "key": dyn.V("bar"), + "value": dyn.V(43), + }), + dyn.V(map[string]dyn.Value{ + "key": dyn.V("foo"), + "othervalue": dyn.V(44), + }), + }) + + keyFunc := func(v dyn.Value) string { + return strings.ToLower(v.MustString()) + } + + vout, err := dyn.MapByPath(vin, dyn.EmptyPath, ElementsByKeyWithOverride("key", keyFunc)) + require.NoError(t, err) + assert.Len(t, vout.MustSequence(), 2) + assert.Equal(t, + vout.Index(0).AsAny(), + map[string]any{ + "key": "foo", + "othervalue": 44, + }, + ) + assert.Equal(t, + vout.Index(1).AsAny(), + map[string]any{ + "key": "bar", + "value": 43, + }, + ) +} diff --git a/libs/dyn/merge/override.go b/libs/dyn/merge/override.go index ca62c7305..1e49d5544 100644 --- a/libs/dyn/merge/override.go +++ b/libs/dyn/merge/override.go @@ -165,7 +165,7 @@ func overrideSequence(basePath dyn.Path, left, right []dyn.Value, visitor Overri minLen := min(len(left), len(right)) var values []dyn.Value - for i := 0; i < minLen; i++ { + for i := range minLen { path := basePath.Append(dyn.Index(i)) merged, err := override(path, left[i], right[i], visitor) if err != nil { diff --git a/libs/dyn/merge/override_test.go b/libs/dyn/merge/override_test.go index ea161d27c..d9d3f3983 100644 --- a/libs/dyn/merge/override_test.go +++ b/libs/dyn/merge/override_test.go @@ -1,7 +1,7 @@ package merge import ( - "fmt" + "errors" "testing" "time" @@ -373,7 +373,7 @@ func TestOverride_Primitive(t *testing.T) { if modified { t.Run(tc.name+" - visitor has error", func(t *testing.T) { - _, visitor := createVisitor(visitorOpts{error: fmt.Errorf("unexpected change in test")}) + _, visitor := createVisitor(visitorOpts{error: errors.New("unexpected change in test")}) _, err := override(dyn.EmptyPath, tc.left, tc.right, visitor) assert.EqualError(t, err, "unexpected change in test") diff --git a/libs/dyn/path.go b/libs/dyn/path.go index 76377e2dc..25bff5070 100644 --- a/libs/dyn/path.go +++ b/libs/dyn/path.go @@ -65,7 +65,7 @@ func (p Path) Equal(q Path) bool { if pl != ql { return false } - for i := 0; i < pl; i++ { + for i := range pl { if p[i] != q[i] { return false } @@ -81,7 +81,7 @@ func (p Path) HasPrefix(q Path) bool { if pl < ql { return false } - for i := 0; i < ql; i++ { + for i := range ql { if p[i] != q[i] { return false } diff --git a/libs/dyn/path_string_test.go b/libs/dyn/path_string_test.go index 0d64bf110..eb1816d7d 100644 --- a/libs/dyn/path_string_test.go +++ b/libs/dyn/path_string_test.go @@ -1,7 +1,7 @@ package dyn_test import ( - "fmt" + "errors" "testing" . "github.com/databricks/cli/libs/dyn" @@ -52,31 +52,31 @@ func TestNewPathFromString(t *testing.T) { }, { input: "foo[123", - err: fmt.Errorf("invalid path: foo[123"), + err: errors.New("invalid path: foo[123"), }, { input: "foo[123]]", - err: fmt.Errorf("invalid path: foo[123]]"), + err: errors.New("invalid path: foo[123]]"), }, { input: "foo[[123]", - err: fmt.Errorf("invalid path: foo[[123]"), + err: errors.New("invalid path: foo[[123]"), }, { input: "foo[[123]]", - err: fmt.Errorf("invalid path: foo[[123]]"), + err: errors.New("invalid path: foo[[123]]"), }, { input: "foo[foo]", - err: fmt.Errorf("invalid path: foo[foo]"), + err: errors.New("invalid path: foo[foo]"), }, { input: "foo..bar", - err: fmt.Errorf("invalid path: foo..bar"), + err: errors.New("invalid path: foo..bar"), }, { input: "foo.bar.", - err: fmt.Errorf("invalid path: foo.bar."), + err: errors.New("invalid path: foo.bar."), }, { // Every component may have a leading dot. @@ -86,7 +86,7 @@ func TestNewPathFromString(t *testing.T) { { // But after an index there must be a dot. input: "foo[1]bar", - err: fmt.Errorf("invalid path: foo[1]bar"), + err: errors.New("invalid path: foo[1]bar"), }, } { p, err := NewPathFromString(tc.input) diff --git a/libs/dyn/value_underlying.go b/libs/dyn/value_underlying.go index 0a867375d..a33ecd38e 100644 --- a/libs/dyn/value_underlying.go +++ b/libs/dyn/value_underlying.go @@ -81,7 +81,7 @@ func (v Value) AsInt() (int64, bool) { case int32: return int64(vv), true case int64: - return int64(vv), true + return vv, true default: return 0, false } diff --git a/libs/dyn/visit_map_test.go b/libs/dyn/visit_map_test.go index d62327d6f..ad091743d 100644 --- a/libs/dyn/visit_map_test.go +++ b/libs/dyn/visit_map_test.go @@ -1,6 +1,7 @@ package dyn_test import ( + "errors" "fmt" "testing" @@ -71,7 +72,7 @@ func TestMapFuncOnMap(t *testing.T) { }, vbar.AsAny()) // Return error from map function. - ref := fmt.Errorf("error") + ref := errors.New("error") verr, err := dyn.MapByPath(vin, dyn.NewPath(dyn.Key("foo")), func(_ dyn.Path, v dyn.Value) (dyn.Value, error) { return dyn.InvalidValue, ref }) @@ -87,12 +88,12 @@ func TestMapFuncOnMapWithEmptySequence(t *testing.T) { dyn.V([]dyn.Value{dyn.V(42)}), } - for i := 0; i < len(variants); i++ { + for i := range variants { vin := dyn.V(map[string]dyn.Value{ "key": variants[i], }) - for j := 0; j < len(variants); j++ { + for j := range variants { vout, err := dyn.MapByPath(vin, dyn.NewPath(dyn.Key("key")), func(_ dyn.Path, v dyn.Value) (dyn.Value, error) { return variants[j], nil }) @@ -137,7 +138,7 @@ func TestMapFuncOnSequence(t *testing.T) { assert.Equal(t, []any{42, 45}, v1.AsAny()) // Return error from map function. - ref := fmt.Errorf("error") + ref := errors.New("error") verr, err := dyn.MapByPath(vin, dyn.NewPath(dyn.Index(0)), func(_ dyn.Path, v dyn.Value) (dyn.Value, error) { return dyn.InvalidValue, ref }) @@ -153,12 +154,12 @@ func TestMapFuncOnSequenceWithEmptySequence(t *testing.T) { dyn.V([]dyn.Value{dyn.V(42)}), } - for i := 0; i < len(variants); i++ { + for i := range variants { vin := dyn.V([]dyn.Value{ variants[i], }) - for j := 0; j < len(variants); j++ { + for j := range variants { vout, err := dyn.MapByPath(vin, dyn.NewPath(dyn.Index(0)), func(_ dyn.Path, v dyn.Value) (dyn.Value, error) { return variants[j], nil }) @@ -211,7 +212,7 @@ func TestMapForeachOnMapError(t *testing.T) { }) // Check that an error from the map function propagates. - ref := fmt.Errorf("error") + ref := errors.New("error") _, err := dyn.Map(vin, ".", dyn.Foreach(func(_ dyn.Path, v dyn.Value) (dyn.Value, error) { return dyn.InvalidValue, ref })) @@ -255,7 +256,7 @@ func TestMapForeachOnSequenceError(t *testing.T) { }) // Check that an error from the map function propagates. - ref := fmt.Errorf("error") + ref := errors.New("error") _, err := dyn.Map(vin, ".", dyn.Foreach(func(_ dyn.Path, v dyn.Value) (dyn.Value, error) { return dyn.InvalidValue, ref })) diff --git a/libs/dyn/yamlsaver/saver.go b/libs/dyn/yamlsaver/saver.go index 7398e2594..a7838ff36 100644 --- a/libs/dyn/yamlsaver/saver.go +++ b/libs/dyn/yamlsaver/saver.go @@ -123,9 +123,9 @@ func (s *saver) toYamlNodeWithStyle(v dyn.Value, style yaml.Style) (*yaml.Node, } return &yaml.Node{Kind: yaml.ScalarNode, Value: v.MustString(), Style: style}, nil case dyn.KindBool: - return &yaml.Node{Kind: yaml.ScalarNode, Value: fmt.Sprint(v.MustBool()), Style: style}, nil + return &yaml.Node{Kind: yaml.ScalarNode, Value: strconv.FormatBool(v.MustBool()), Style: style}, nil case dyn.KindInt: - return &yaml.Node{Kind: yaml.ScalarNode, Value: fmt.Sprint(v.MustInt()), Style: style}, nil + return &yaml.Node{Kind: yaml.ScalarNode, Value: strconv.FormatInt(v.MustInt(), 10), Style: style}, nil case dyn.KindFloat: return &yaml.Node{Kind: yaml.ScalarNode, Value: fmt.Sprint(v.MustFloat()), Style: style}, nil case dyn.KindTime: diff --git a/libs/env/context.go b/libs/env/context.go index af4d1afa0..37b76147a 100644 --- a/libs/env/context.go +++ b/libs/env/context.go @@ -65,7 +65,7 @@ func Set(ctx context.Context, key, value string) context.Context { return setMap(ctx, m) } -func homeEnvVar() string { +func HomeEnvVar() string { if runtime.GOOS == "windows" { return "USERPROFILE" } @@ -73,14 +73,14 @@ func homeEnvVar() string { } func WithUserHomeDir(ctx context.Context, value string) context.Context { - return Set(ctx, homeEnvVar(), value) + return Set(ctx, HomeEnvVar(), value) } // ErrNoHomeEnv indicates the absence of $HOME env variable var ErrNoHomeEnv = errors.New("$HOME is not set") func UserHomeDir(ctx context.Context) (string, error) { - home := Get(ctx, homeEnvVar()) + home := Get(ctx, HomeEnvVar()) if home == "" { return "", ErrNoHomeEnv } diff --git a/libs/errs/aggregate_test.go b/libs/errs/aggregate_test.go index a307e956f..216276a06 100644 --- a/libs/errs/aggregate_test.go +++ b/libs/errs/aggregate_test.go @@ -2,36 +2,35 @@ package errs import ( "errors" - "fmt" "testing" "github.com/stretchr/testify/assert" ) func TestFromManyErrors(t *testing.T) { - e1 := fmt.Errorf("Error 1") - e2 := fmt.Errorf("Error 2") - e3 := fmt.Errorf("Error 3") + e1 := errors.New("Error 1") + e2 := errors.New("Error 2") + e3 := errors.New("Error 3") err := FromMany(e1, e2, e3) - assert.True(t, errors.Is(err, e1)) - assert.True(t, errors.Is(err, e2)) - assert.True(t, errors.Is(err, e3)) + assert.ErrorIs(t, err, e1) + assert.ErrorIs(t, err, e2) + assert.ErrorIs(t, err, e3) - assert.Equal(t, err.Error(), `Error 1 + assert.Equal(t, `Error 1 Error 2 -Error 3`) +Error 3`, err.Error()) } func TestFromManyErrorsWihtNil(t *testing.T) { - e1 := fmt.Errorf("Error 1") + e1 := errors.New("Error 1") var e2 error = nil - e3 := fmt.Errorf("Error 3") + e3 := errors.New("Error 3") err := FromMany(e1, e2, e3) - assert.True(t, errors.Is(err, e1)) - assert.True(t, errors.Is(err, e3)) + assert.ErrorIs(t, err, e1) + assert.ErrorIs(t, err, e3) - assert.Equal(t, err.Error(), `Error 1 -Error 3`) + assert.Equal(t, `Error 1 +Error 3`, err.Error()) } diff --git a/libs/exec/exec_test.go b/libs/exec/exec_test.go index e75c158bd..f245f9dd1 100644 --- a/libs/exec/exec_test.go +++ b/libs/exec/exec_test.go @@ -85,7 +85,7 @@ func testExecutorWithShell(t *testing.T, shell string) { // Create temporary directory with only the shell executable in the PATH. tmpDir := t.TempDir() - t.Setenv("PATH", tmpDir) + t.Setenv("PATH", fmt.Sprintf("%s%c%s", tmpDir, os.PathListSeparator, os.Getenv("PATH"))) if runtime.GOOS == "windows" { err = os.Symlink(p, fmt.Sprintf("%s/%s.exe", tmpDir, shell)) require.NoError(t, err) @@ -141,7 +141,7 @@ func TestMultipleCommandsRunInParrallel(t *testing.T) { const count = 5 var wg sync.WaitGroup - for i := 0; i < count; i++ { + for i := range count { wg.Add(1) cmd, err := executor.StartCommand(context.Background(), fmt.Sprintf("echo 'Hello %d'", i)) go func(cmd Command, i int) { diff --git a/libs/exec/shell_cmd.go b/libs/exec/shell_cmd.go index 164d09739..057ed06a4 100644 --- a/libs/exec/shell_cmd.go +++ b/libs/exec/shell_cmd.go @@ -2,7 +2,6 @@ package exec import ( "errors" - "fmt" osexec "os/exec" ) @@ -18,7 +17,7 @@ func (s cmdShell) prepare(command string) (*execContext, error) { return &execContext{ executable: s.executable, - args: []string{"/D", "/E:ON", "/V:OFF", "/S", "/C", fmt.Sprintf(`CALL %s`, filename)}, + args: []string{"/D", "/E:ON", "/V:OFF", "/S", "/C", "CALL " + filename}, scriptFile: filename, }, nil } diff --git a/libs/fakefs/fakefs.go b/libs/fakefs/fakefs.go index a8d5eb873..050ee2d6e 100644 --- a/libs/fakefs/fakefs.go +++ b/libs/fakefs/fakefs.go @@ -1,12 +1,12 @@ package fakefs import ( - "fmt" + "errors" "io/fs" "time" ) -var ErrNotImplemented = fmt.Errorf("not implemented") +var ErrNotImplemented = errors.New("not implemented") // DirEntry is a fake implementation of [fs.DirEntry]. type DirEntry struct { diff --git a/libs/filer/completer/completer_test.go b/libs/filer/completer/completer_test.go index d284447b9..865d34c2f 100644 --- a/libs/filer/completer/completer_test.go +++ b/libs/filer/completer/completer_test.go @@ -37,7 +37,7 @@ func TestFilerCompleterSetsPrefix(t *testing.T) { assert.Equal(t, []string{"dbfs:/dir/dirA/", "dbfs:/dir/dirB/"}, completions) assert.Equal(t, cobra.ShellCompDirectiveNoSpace, directive) - assert.Nil(t, err) + assert.NoError(t, err) } func TestFilerCompleterReturnsNestedDirs(t *testing.T) { @@ -46,7 +46,7 @@ func TestFilerCompleterReturnsNestedDirs(t *testing.T) { assert.Equal(t, []string{"dir/dirA/", "dir/dirB/"}, completions) assert.Equal(t, cobra.ShellCompDirectiveNoSpace, directive) - assert.Nil(t, err) + assert.NoError(t, err) } func TestFilerCompleterReturnsAdjacentDirs(t *testing.T) { @@ -55,7 +55,7 @@ func TestFilerCompleterReturnsAdjacentDirs(t *testing.T) { assert.Equal(t, []string{"dir/dirA/", "dir/dirB/"}, completions) assert.Equal(t, cobra.ShellCompDirectiveNoSpace, directive) - assert.Nil(t, err) + assert.NoError(t, err) } func TestFilerCompleterReturnsNestedDirsAndFiles(t *testing.T) { @@ -64,7 +64,7 @@ func TestFilerCompleterReturnsNestedDirsAndFiles(t *testing.T) { assert.Equal(t, []string{"dir/dirA/", "dir/dirB/", "dir/fileA"}, completions) assert.Equal(t, cobra.ShellCompDirectiveNoSpace, directive) - assert.Nil(t, err) + assert.NoError(t, err) } func TestFilerCompleterAddsDbfsPath(t *testing.T) { @@ -78,7 +78,7 @@ func TestFilerCompleterAddsDbfsPath(t *testing.T) { assert.Equal(t, []string{"dir/dirA/", "dir/dirB/", "dbfs:/"}, completions) assert.Equal(t, cobra.ShellCompDirectiveNoSpace, directive) - assert.Nil(t, err) + assert.NoError(t, err) } func TestFilerCompleterWindowsSeparator(t *testing.T) { @@ -92,7 +92,7 @@ func TestFilerCompleterWindowsSeparator(t *testing.T) { assert.Equal(t, []string{"dir\\dirA\\", "dir\\dirB\\", "dbfs:/"}, completions) assert.Equal(t, cobra.ShellCompDirectiveNoSpace, directive) - assert.Nil(t, err) + assert.NoError(t, err) } func TestFilerCompleterNoCompletions(t *testing.T) { diff --git a/libs/filer/fake_filer.go b/libs/filer/fake_filer.go index 76b8bcd94..1e1cbd985 100644 --- a/libs/filer/fake_filer.go +++ b/libs/filer/fake_filer.go @@ -2,7 +2,7 @@ package filer import ( "context" - "fmt" + "errors" "io" "io/fs" "path" @@ -17,7 +17,7 @@ type FakeFiler struct { } func (f *FakeFiler) Write(ctx context.Context, p string, reader io.Reader, mode ...WriteMode) error { - return fmt.Errorf("not implemented") + return errors.New("not implemented") } func (f *FakeFiler) Read(ctx context.Context, p string) (io.ReadCloser, error) { @@ -30,7 +30,7 @@ func (f *FakeFiler) Read(ctx context.Context, p string) (io.ReadCloser, error) { } func (f *FakeFiler) Delete(ctx context.Context, p string, mode ...DeleteMode) error { - return fmt.Errorf("not implemented") + return errors.New("not implemented") } func (f *FakeFiler) ReadDir(ctx context.Context, p string) ([]fs.DirEntry, error) { @@ -59,7 +59,7 @@ func (f *FakeFiler) ReadDir(ctx context.Context, p string) ([]fs.DirEntry, error } func (f *FakeFiler) Mkdir(ctx context.Context, path string) error { - return fmt.Errorf("not implemented") + return errors.New("not implemented") } func (f *FakeFiler) Stat(ctx context.Context, path string) (fs.FileInfo, error) { diff --git a/libs/filer/filer.go b/libs/filer/filer.go index 83dc560cb..372c82929 100644 --- a/libs/filer/filer.go +++ b/libs/filer/filer.go @@ -2,7 +2,6 @@ package filer import ( "context" - "fmt" "io" "io/fs" ) @@ -36,7 +35,7 @@ type FileAlreadyExistsError struct { } func (err FileAlreadyExistsError) Error() string { - return fmt.Sprintf("file already exists: %s", err.path) + return "file already exists: " + err.path } func (err FileAlreadyExistsError) Is(other error) bool { @@ -52,7 +51,7 @@ func (err FileDoesNotExistError) Is(other error) bool { } func (err FileDoesNotExistError) Error() string { - return fmt.Sprintf("file does not exist: %s", err.path) + return "file does not exist: " + err.path } type NoSuchDirectoryError struct { @@ -60,7 +59,7 @@ type NoSuchDirectoryError struct { } func (err NoSuchDirectoryError) Error() string { - return fmt.Sprintf("no such directory: %s", err.path) + return "no such directory: " + err.path } func (err NoSuchDirectoryError) Is(other error) bool { @@ -72,7 +71,7 @@ type NotADirectory struct { } func (err NotADirectory) Error() string { - return fmt.Sprintf("not a directory: %s", err.path) + return "not a directory: " + err.path } func (err NotADirectory) Is(other error) bool { @@ -84,7 +83,7 @@ type NotAFile struct { } func (err NotAFile) Error() string { - return fmt.Sprintf("not a file: %s", err.path) + return "not a file: " + err.path } func (err NotAFile) Is(other error) bool { @@ -96,7 +95,7 @@ type DirectoryNotEmptyError struct { } func (err DirectoryNotEmptyError) Error() string { - return fmt.Sprintf("directory not empty: %s", err.path) + return "directory not empty: " + err.path } func (err DirectoryNotEmptyError) Is(other error) bool { @@ -118,7 +117,7 @@ type PermissionError struct { } func (err PermissionError) Error() string { - return fmt.Sprintf("access denied: %s", err.path) + return "access denied: " + err.path } func (err PermissionError) Is(other error) bool { diff --git a/libs/filer/files_client.go b/libs/filer/files_client.go index 7ea1d0f03..88bbadd32 100644 --- a/libs/filer/files_client.go +++ b/libs/filer/files_client.go @@ -116,10 +116,7 @@ func (w *FilesClient) urlPath(name string) (string, string, error) { } // The user specified part of the path must be escaped. - urlPath := fmt.Sprintf( - "/api/2.0/fs/files/%s", - url.PathEscape(strings.TrimLeft(absPath, "/")), - ) + urlPath := "/api/2.0/fs/files/" + url.PathEscape(strings.TrimLeft(absPath, "/")) return absPath, urlPath, nil } @@ -306,8 +303,6 @@ func (w *FilesClient) recursiveDelete(ctx context.Context, name string) error { group.SetLimit(maxFilesRequestsInFlight) for _, file := range filesToDelete { - file := file - // Skip the file if the context has already been cancelled. select { case <-groupCtx.Done(): diff --git a/libs/filer/fs_test.go b/libs/filer/fs_test.go index 849cf6f7c..6168af39a 100644 --- a/libs/filer/fs_test.go +++ b/libs/filer/fs_test.go @@ -63,7 +63,7 @@ func TestFsOpenFile(t *testing.T) { assert.Equal(t, "fileA", info.Name()) assert.Equal(t, int64(3), info.Size()) assert.Equal(t, fs.FileMode(0), info.Mode()) - assert.Equal(t, false, info.IsDir()) + assert.False(t, info.IsDir()) // Read until closed. b := make([]byte, 3) @@ -91,7 +91,7 @@ func TestFsOpenDir(t *testing.T) { info, err := fakeFile.Stat() require.NoError(t, err) assert.Equal(t, "root", info.Name()) - assert.Equal(t, true, info.IsDir()) + assert.True(t, info.IsDir()) de, ok := fakeFile.(fs.ReadDirFile) require.True(t, ok) @@ -107,7 +107,7 @@ func TestFsOpenDir(t *testing.T) { de.Close() - for i := 0; i < 3; i++ { + for range 3 { tmp, err = de.ReadDir(1) require.NoError(t, err) entries = append(entries, tmp...) diff --git a/libs/filer/workspace_files_cache_test.go b/libs/filer/workspace_files_cache_test.go index 8983c5982..a73f415c1 100644 --- a/libs/filer/workspace_files_cache_test.go +++ b/libs/filer/workspace_files_cache_test.go @@ -2,7 +2,7 @@ package filer import ( "context" - "fmt" + "errors" "io" "io/fs" "testing" @@ -11,7 +11,7 @@ import ( "github.com/stretchr/testify/assert" ) -var errNotImplemented = fmt.Errorf("not implemented") +var errNotImplemented = errors.New("not implemented") type cacheTestFiler struct { calls int diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 9e0a7ce50..8d5148edd 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -195,7 +195,7 @@ func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io // This API returns 400 if the file already exists, when the object type is notebook regex := regexp.MustCompile(`Path \((.*)\) already exists.`) - if aerr.StatusCode == http.StatusBadRequest && regex.Match([]byte(aerr.Message)) { + if aerr.StatusCode == http.StatusBadRequest && regex.MatchString(aerr.Message) { // Parse file path from regex capture group matches := regex.FindStringSubmatch(aerr.Message) if len(matches) == 2 { diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 9ee2722e1..0127d180c 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -16,7 +16,7 @@ import ( "github.com/databricks/databricks-sdk-go/service/workspace" ) -type workspaceFilesExtensionsClient struct { +type WorkspaceFilesExtensionsClient struct { workspaceClient *databricks.WorkspaceClient wsfs Filer @@ -32,7 +32,7 @@ type workspaceFileStatus struct { nameForWorkspaceAPI string } -func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) (wsfsFileInfo, error) { +func (w *WorkspaceFilesExtensionsClient) stat(ctx context.Context, name string) (wsfsFileInfo, error) { info, err := w.wsfs.Stat(ctx, name) if err != nil { return wsfsFileInfo{}, err @@ -42,7 +42,7 @@ func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) // This function returns the stat for the provided notebook. The stat object itself contains the path // with the extension since it is meant to be used in the context of a fs.FileInfo. -func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { +func (w *WorkspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { ext := path.Ext(name) nameWithoutExt := strings.TrimSuffix(name, ext) @@ -104,7 +104,7 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex }, nil } -func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx context.Context, name string) (*workspaceFileStatus, error) { +func (w *WorkspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx context.Context, name string) (*workspaceFileStatus, error) { stat, err := w.stat(ctx, name) if err != nil { return nil, err @@ -184,7 +184,7 @@ func newWorkspaceFilesExtensionsClient(w *databricks.WorkspaceClient, root strin filer = newWorkspaceFilesReadaheadCache(filer) } - return &workspaceFilesExtensionsClient{ + return &WorkspaceFilesExtensionsClient{ workspaceClient: w, wsfs: filer, @@ -193,7 +193,7 @@ func newWorkspaceFilesExtensionsClient(w *databricks.WorkspaceClient, root strin }, nil } -func (w *workspaceFilesExtensionsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { +func (w *WorkspaceFilesExtensionsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { entries, err := w.wsfs.ReadDir(ctx, name) if err != nil { return nil, err @@ -235,7 +235,7 @@ func (w *workspaceFilesExtensionsClient) ReadDir(ctx context.Context, name strin // Note: The import API returns opaque internal errors for namespace clashes // (e.g. a file and a notebook or a directory and a notebook). Thus users of this // method should be careful to avoid such clashes. -func (w *workspaceFilesExtensionsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { +func (w *WorkspaceFilesExtensionsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { if w.readonly { return ReadOnlyError{"write"} } @@ -244,7 +244,7 @@ func (w *workspaceFilesExtensionsClient) Write(ctx context.Context, name string, } // Try to read the file as a regular file. If the file is not found, try to read it as a notebook. -func (w *workspaceFilesExtensionsClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { +func (w *WorkspaceFilesExtensionsClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { // Ensure that the file / notebook exists. We do this check here to avoid reading // the content of a notebook called `foo` when the user actually wanted // to read the content of a file called `foo`. @@ -283,7 +283,7 @@ func (w *workspaceFilesExtensionsClient) Read(ctx context.Context, name string) } // Try to delete the file as a regular file. If the file is not found, try to delete it as a notebook. -func (w *workspaceFilesExtensionsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { +func (w *WorkspaceFilesExtensionsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { if w.readonly { return ReadOnlyError{"delete"} } @@ -320,7 +320,7 @@ func (w *workspaceFilesExtensionsClient) Delete(ctx context.Context, name string } // Try to stat the file as a regular file. If the file is not found, try to stat it as a notebook. -func (w *workspaceFilesExtensionsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { +func (w *WorkspaceFilesExtensionsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { info, err := w.wsfs.Stat(ctx, name) // If the file is not found, it might be a notebook. @@ -361,7 +361,7 @@ func (w *workspaceFilesExtensionsClient) Stat(ctx context.Context, name string) // Note: The import API returns opaque internal errors for namespace clashes // (e.g. a file and a notebook or a directory and a notebook). Thus users of this // method should be careful to avoid such clashes. -func (w *workspaceFilesExtensionsClient) Mkdir(ctx context.Context, name string) error { +func (w *WorkspaceFilesExtensionsClient) Mkdir(ctx context.Context, name string) error { if w.readonly { return ReadOnlyError{"mkdir"} } diff --git a/libs/filer/workspace_files_extensions_client_test.go b/libs/filer/workspace_files_extensions_client_test.go index 10a2bebf0..9ea837fa9 100644 --- a/libs/filer/workspace_files_extensions_client_test.go +++ b/libs/filer/workspace_files_extensions_client_test.go @@ -181,7 +181,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { root: NewWorkspaceRootPath("/dir"), } - workspaceFilesExtensionsClient := workspaceFilesExtensionsClient{ + workspaceFilesExtensionsClient := WorkspaceFilesExtensionsClient{ workspaceClient: mockedWorkspaceClient.WorkspaceClient, wsfs: &workspaceFilesClient, } diff --git a/libs/fileset/glob_test.go b/libs/fileset/glob_test.go index 9eb786db9..0224b2547 100644 --- a/libs/fileset/glob_test.go +++ b/libs/fileset/glob_test.go @@ -52,7 +52,7 @@ func TestGlobFileset(t *testing.T) { files, err = g.Files() require.NoError(t, err) - require.Equal(t, len(files), 0) + require.Empty(t, files) } func TestGlobFilesetWithRelativeRoot(t *testing.T) { diff --git a/libs/flags/json_flag_test.go b/libs/flags/json_flag_test.go index b31324011..4bebf8b68 100644 --- a/libs/flags/json_flag_test.go +++ b/libs/flags/json_flag_test.go @@ -1,7 +1,6 @@ package flags import ( - "fmt" "os" "path" "testing" @@ -68,7 +67,7 @@ func TestJsonFlagFile(t *testing.T) { fpath = f.Name() } - err := body.Set(fmt.Sprintf("@%s", fpath)) + err := body.Set("@" + fpath) require.NoError(t, err) diags := body.Unmarshal(&request) @@ -123,12 +122,12 @@ func TestJsonUnmarshalForRequest(t *testing.T) { assert.Equal(t, "new job", r.NewSettings.Name) assert.Equal(t, 0, r.NewSettings.TimeoutSeconds) assert.Equal(t, 1, r.NewSettings.MaxConcurrentRuns) - assert.Equal(t, 1, len(r.NewSettings.Tasks)) + assert.Len(t, r.NewSettings.Tasks, 1) assert.Equal(t, "new task", r.NewSettings.Tasks[0].TaskKey) assert.Equal(t, 0, r.NewSettings.Tasks[0].TimeoutSeconds) assert.Equal(t, 0, r.NewSettings.Tasks[0].MaxRetries) assert.Equal(t, 0, r.NewSettings.Tasks[0].MinRetryIntervalMillis) - assert.Equal(t, true, r.NewSettings.Tasks[0].RetryOnTimeout) + assert.True(t, r.NewSettings.Tasks[0].RetryOnTimeout) } const incorrectJsonData = `{ @@ -280,8 +279,8 @@ func TestJsonUnmarshalForRequestWithForceSendFields(t *testing.T) { assert.NoError(t, diags.Error()) assert.Empty(t, diags) - assert.Equal(t, false, r.NewSettings.NotificationSettings.NoAlertForSkippedRuns) - assert.Equal(t, false, r.NewSettings.NotificationSettings.NoAlertForCanceledRuns) + assert.False(t, r.NewSettings.NotificationSettings.NoAlertForSkippedRuns) + assert.False(t, r.NewSettings.NotificationSettings.NoAlertForCanceledRuns) assert.NotContains(t, r.NewSettings.NotificationSettings.ForceSendFields, "NoAlertForSkippedRuns") assert.Contains(t, r.NewSettings.NotificationSettings.ForceSendFields, "NoAlertForCanceledRuns") } diff --git a/libs/flags/output.go b/libs/flags/output.go index 17da144bd..e0c799131 100644 --- a/libs/flags/output.go +++ b/libs/flags/output.go @@ -1,6 +1,7 @@ package flags import ( + "errors" "fmt" "strings" @@ -25,7 +26,7 @@ func (f *Output) Set(s string) error { case `json`, `text`: *f = Output(lower) default: - return fmt.Errorf("accepted arguments are json and text") + return errors.New("accepted arguments are json and text") } return nil } diff --git a/libs/folders/folders_test.go b/libs/folders/folders_test.go index 17afc4022..d2afc4f2d 100644 --- a/libs/folders/folders_test.go +++ b/libs/folders/folders_test.go @@ -33,6 +33,6 @@ func TestFindDirWithLeaf(t *testing.T) { { out, err := FindDirWithLeaf(root, "this-leaf-doesnt-exist-anywhere") assert.ErrorIs(t, err, os.ErrNotExist) - assert.Equal(t, out, "") + assert.Equal(t, "", out) } } diff --git a/libs/git/reference.go b/libs/git/reference.go index e1126d4f2..6001d70de 100644 --- a/libs/git/reference.go +++ b/libs/git/reference.go @@ -13,8 +13,8 @@ import ( type ReferenceType string var ( - ErrNotAReferencePointer = fmt.Errorf("HEAD does not point to another reference") - ErrNotABranch = fmt.Errorf("HEAD is not a reference to a git branch") + ErrNotAReferencePointer = errors.New("HEAD does not point to another reference") + ErrNotABranch = errors.New("HEAD is not a reference to a git branch") ) const ( diff --git a/libs/git/repository_test.go b/libs/git/repository_test.go index 857df65a9..58a540190 100644 --- a/libs/git/repository_test.go +++ b/libs/git/repository_test.go @@ -1,7 +1,6 @@ package git import ( - "fmt" "os" "path/filepath" "strings" @@ -96,8 +95,7 @@ func (testRepo *testRepository) addOriginUrl(url string) { defer f.Close() _, err = f.WriteString( - fmt.Sprintf(`[remote "origin"] - url = %s`, url)) + "[remote \"origin\"]\n\turl = " + url) require.NoError(testRepo.t, err) // reload config to reflect the remote url diff --git a/libs/git/worktree_test.go b/libs/git/worktree_test.go index 3d620c483..072a9d348 100644 --- a/libs/git/worktree_test.go +++ b/libs/git/worktree_test.go @@ -53,12 +53,12 @@ func TestWorktreeResolveGitDir(t *testing.T) { writeGitCommonDir(t, dir, "../..") t.Run("relative", func(t *testing.T) { - writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", "../.git/worktrees/my_worktree")) + writeGitDir(t, dir, "gitdir: "+"../.git/worktrees/my_worktree") verifyCorrectDirs(t, dir) }) t.Run("absolute", func(t *testing.T) { - writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", filepath.Join(dir, ".git/worktrees/my_worktree"))) + writeGitDir(t, dir, "gitdir: "+filepath.Join(dir, ".git/worktrees/my_worktree")) verifyCorrectDirs(t, dir) }) @@ -77,7 +77,7 @@ func TestWorktreeResolveGitDir(t *testing.T) { func TestWorktreeResolveCommonDir(t *testing.T) { dir := setupWorktree(t) - writeGitDir(t, dir, fmt.Sprintf("gitdir: %s", "../.git/worktrees/my_worktree")) + writeGitDir(t, dir, "gitdir: "+"../.git/worktrees/my_worktree") t.Run("relative", func(t *testing.T) { writeGitCommonDir(t, dir, "../..") diff --git a/libs/jsonschema/from_type.go b/libs/jsonschema/from_type.go index 18a2b3ba5..6f8f39d96 100644 --- a/libs/jsonschema/from_type.go +++ b/libs/jsonschema/from_type.go @@ -211,7 +211,7 @@ func getStructFields(typ reflect.Type) []reflect.StructField { fields := []reflect.StructField{} bfsQueue := list.New() - for i := 0; i < typ.NumField(); i++ { + for i := range typ.NumField() { bfsQueue.PushBack(typ.Field(i)) } for bfsQueue.Len() > 0 { @@ -233,7 +233,7 @@ func getStructFields(typ reflect.Type) []reflect.StructField { fieldType = fieldType.Elem() } - for i := 0; i < fieldType.NumField(); i++ { + for i := range fieldType.NumField() { bfsQueue.PushBack(fieldType.Field(i)) } } diff --git a/libs/jsonschema/instance.go b/libs/jsonschema/instance.go index 4440a2fe2..eb36822a0 100644 --- a/libs/jsonschema/instance.go +++ b/libs/jsonschema/instance.go @@ -2,6 +2,7 @@ package jsonschema import ( "encoding/json" + "errors" "fmt" "os" "slices" @@ -149,7 +150,7 @@ func (s *Schema) validateAnyOf(instance map[string]any) error { // According to the JSON schema RFC, anyOf must contain at least one schema. // https://json-schema.org/draft/2020-12/json-schema-core if len(s.AnyOf) == 0 { - return fmt.Errorf("anyOf must contain at least one schema") + return errors.New("anyOf must contain at least one schema") } for _, anyOf := range s.AnyOf { @@ -158,5 +159,5 @@ func (s *Schema) validateAnyOf(instance map[string]any) error { return nil } } - return fmt.Errorf("instance does not match any of the schemas in anyOf") + return errors.New("instance does not match any of the schemas in anyOf") } diff --git a/libs/jsonschema/utils.go b/libs/jsonschema/utils.go index ff9b88312..bc9339cae 100644 --- a/libs/jsonschema/utils.go +++ b/libs/jsonschema/utils.go @@ -150,7 +150,7 @@ func (e patternMatchError) Error() string { // If custom user error message is defined, return error with the custom message msg := e.FailureMessage if msg == "" { - msg = fmt.Sprintf("Expected to match regex pattern: %s", e.Pattern) + msg = "Expected to match regex pattern: " + e.Pattern } return fmt.Sprintf("invalid value for %s: %q. %s", e.PropertyName, e.PropertyValue, msg) } diff --git a/libs/jsonschema/utils_test.go b/libs/jsonschema/utils_test.go index 89200dae3..954c723d3 100644 --- a/libs/jsonschema/utils_test.go +++ b/libs/jsonschema/utils_test.go @@ -96,7 +96,7 @@ func TestTemplateFromString(t *testing.T) { v, err = fromString("1.1", NumberType) assert.NoError(t, err) // Floating point conversions are not perfect - assert.True(t, (v.(float64)-1.1) < 0.000001) + assert.Less(t, (v.(float64) - 1.1), 0.000001) v, err = fromString("12345", IntegerType) assert.NoError(t, err) @@ -104,7 +104,7 @@ func TestTemplateFromString(t *testing.T) { v, err = fromString("123", NumberType) assert.NoError(t, err) - assert.Equal(t, float64(123), v) + assert.InDelta(t, float64(123), v.(float64), 0.0001) _, err = fromString("qrt", ArrayType) assert.EqualError(t, err, "cannot parse string as object of type array. Value of string: \"qrt\"") diff --git a/libs/locker/locker.go b/libs/locker/locker.go index eb59c9f74..aadc50b58 100644 --- a/libs/locker/locker.go +++ b/libs/locker/locker.go @@ -116,14 +116,14 @@ func (locker *Locker) assertLockHeld(ctx context.Context) error { // idempotent function since overwrite is set to true func (locker *Locker) Write(ctx context.Context, pathToFile string, content []byte) error { if !locker.Active { - return fmt.Errorf("failed to put file. deploy lock not held") + return errors.New("failed to put file. deploy lock not held") } return locker.filer.Write(ctx, pathToFile, bytes.NewReader(content), filer.OverwriteIfExists, filer.CreateParentDirectories) } func (locker *Locker) Read(ctx context.Context, path string) (io.ReadCloser, error) { if !locker.Active { - return nil, fmt.Errorf("failed to get file. deploy lock not held") + return nil, errors.New("failed to get file. deploy lock not held") } return locker.filer.Read(ctx, path) } @@ -173,7 +173,7 @@ func (locker *Locker) Lock(ctx context.Context, isForced bool) error { func (locker *Locker) Unlock(ctx context.Context, opts ...UnlockOption) error { if !locker.Active { - return fmt.Errorf("unlock called when lock is not held") + return errors.New("unlock called when lock is not held") } // if allowLockFileNotExist is set, do not throw an error if the lock file does diff --git a/libs/notebook/detect.go b/libs/notebook/detect.go index 40c850945..579cc1de3 100644 --- a/libs/notebook/detect.go +++ b/libs/notebook/detect.go @@ -47,7 +47,7 @@ func (f file) close() error { func (f file) readHeader() (string, error) { // Scan header line with some padding. buf := make([]byte, headerLength) - n, err := f.f.Read([]byte(buf)) + n, err := f.f.Read(buf) if err != nil && err != io.EOF { return "", err } diff --git a/libs/notebook/detect_test.go b/libs/notebook/detect_test.go index 4ede7bf9b..49a67d2d3 100644 --- a/libs/notebook/detect_test.go +++ b/libs/notebook/detect_test.go @@ -1,7 +1,6 @@ package notebook import ( - "errors" "io/fs" "os" "path/filepath" @@ -53,7 +52,7 @@ func TestDetectCallsDetectJupyter(t *testing.T) { func TestDetectUnknownExtension(t *testing.T) { _, _, err := Detect("./testdata/doesntexist.foobar") - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorIs(t, err, fs.ErrNotExist) nb, _, err := Detect("./testdata/unknown_extension.foobar") require.NoError(t, err) @@ -62,7 +61,7 @@ func TestDetectUnknownExtension(t *testing.T) { func TestDetectNoExtension(t *testing.T) { _, _, err := Detect("./testdata/doesntexist") - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorIs(t, err, fs.ErrNotExist) nb, _, err := Detect("./testdata/no_extension") require.NoError(t, err) diff --git a/libs/notebook/testdata/.ruff.toml b/libs/notebook/testdata/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/libs/notebook/testdata/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/libs/process/background_test.go b/libs/process/background_test.go index 2e47e814b..5cc810f5d 100644 --- a/libs/process/background_test.go +++ b/libs/process/background_test.go @@ -4,7 +4,7 @@ import ( "bufio" "bytes" "context" - "fmt" + "errors" "os/exec" "strings" "testing" @@ -95,13 +95,13 @@ func TestBackgroundNoStdin(t *testing.T) { func TestBackgroundFails(t *testing.T) { ctx := context.Background() _, err := Background(ctx, []string{"ls", "/dev/null/x"}) - assert.NotNil(t, err) + assert.Error(t, err) } func TestBackgroundFailsOnOption(t *testing.T) { ctx := context.Background() _, err := Background(ctx, []string{"ls", "/dev/null/x"}, func(_ context.Context, c *exec.Cmd) error { - return fmt.Errorf("nope") + return errors.New("nope") }) assert.EqualError(t, err, "nope") } diff --git a/libs/process/forwarded_test.go b/libs/process/forwarded_test.go index ddb79818f..71f0a6a63 100644 --- a/libs/process/forwarded_test.go +++ b/libs/process/forwarded_test.go @@ -27,7 +27,7 @@ func TestForwardedFails(t *testing.T) { err := Forwarded(ctx, []string{ "_non_existent_", }, strings.NewReader("abc\n"), &buf, &buf) - assert.NotNil(t, err) + assert.Error(t, err) } func TestForwardedFailsOnStdinPipe(t *testing.T) { @@ -39,5 +39,5 @@ func TestForwardedFailsOnStdinPipe(t *testing.T) { c.Stdin = strings.NewReader("x") return nil }) - assert.NotNil(t, err) + assert.Error(t, err) } diff --git a/libs/process/opts_test.go b/libs/process/opts_test.go index 3a819fbb9..8b5d51928 100644 --- a/libs/process/opts_test.go +++ b/libs/process/opts_test.go @@ -41,7 +41,7 @@ func TestWorksWithLibsEnv(t *testing.T) { vars := cmd.Environ() sort.Strings(vars) - assert.True(t, len(vars) >= 2) + assert.GreaterOrEqual(t, len(vars), 2) assert.Equal(t, "CCC=DDD", vars[0]) assert.Equal(t, "EEE=FFF", vars[1]) } diff --git a/libs/process/stub.go b/libs/process/stub.go index 8ab6fd705..528489098 100644 --- a/libs/process/stub.go +++ b/libs/process/stub.go @@ -168,7 +168,7 @@ func (s *processStub) run(cmd *exec.Cmd) error { } var zeroStub reponseStub if s.reponseStub == zeroStub { - return fmt.Errorf("no default process stub") + return errors.New("no default process stub") } err := s.reponseStub.err if s.reponseStub.stdout != "" { diff --git a/libs/process/stub_test.go b/libs/process/stub_test.go index 81afa3a89..158e8b3a6 100644 --- a/libs/process/stub_test.go +++ b/libs/process/stub_test.go @@ -2,7 +2,7 @@ package process_test import ( "context" - "fmt" + "errors" "os/exec" "testing" @@ -32,7 +32,7 @@ func TestStubOutput(t *testing.T) { func TestStubFailure(t *testing.T) { ctx := context.Background() ctx, stub := process.WithStub(ctx) - stub.WithFailure(fmt.Errorf("nope")) + stub.WithFailure(errors.New("nope")) _, err := process.Background(ctx, []string{"/bin/meeecho", "1"}) require.EqualError(t, err, "/bin/meeecho 1: nope") @@ -51,7 +51,7 @@ func TestStubCallback(t *testing.T) { if err != nil { return err } - return fmt.Errorf("yep") + return errors.New("yep") }) _, err := process.Background(ctx, []string{"/bin/meeecho", "1"}) @@ -70,7 +70,7 @@ func TestStubResponses(t *testing.T) { stub. WithStdoutFor("qux 1", "first"). WithStdoutFor("qux 2", "second"). - WithFailureFor("qux 3", fmt.Errorf("nope")) + WithFailureFor("qux 3", errors.New("nope")) first, err := process.Background(ctx, []string{"/path/is/irrelevant/qux", "1"}) require.NoError(t, err) diff --git a/libs/python/interpreters_unix_test.go b/libs/python/interpreters_unix_test.go index 8471644a1..57adc9279 100644 --- a/libs/python/interpreters_unix_test.go +++ b/libs/python/interpreters_unix_test.go @@ -18,7 +18,7 @@ func TestAtLeastOnePythonInstalled(t *testing.T) { assert.NoError(t, err) a := all.Latest() t.Logf("latest is: %s", a) - assert.True(t, len(all) > 0) + assert.NotEmpty(t, all) } func TestNoInterpretersFound(t *testing.T) { diff --git a/libs/sync/event.go b/libs/sync/event.go index 05821a477..510a01954 100644 --- a/libs/sync/event.go +++ b/libs/sync/event.go @@ -52,10 +52,10 @@ func (e *EventChanges) IsEmpty() bool { func (e *EventChanges) String() string { var changes []string if len(e.Put) > 0 { - changes = append(changes, fmt.Sprintf("PUT: %s", strings.Join(e.Put, ", "))) + changes = append(changes, "PUT: "+strings.Join(e.Put, ", ")) } if len(e.Delete) > 0 { - changes = append(changes, fmt.Sprintf("DELETE: %s", strings.Join(e.Delete, ", "))) + changes = append(changes, "DELETE: "+strings.Join(e.Delete, ", ")) } return strings.Join(changes, ", ") } @@ -70,7 +70,7 @@ func (e *EventStart) String() string { return "" } - return fmt.Sprintf("Action: %s", e.EventChanges.String()) + return "Action: " + e.EventChanges.String() } func newEventStart(seq int, put, delete []string) Event { @@ -98,9 +98,9 @@ func (e *EventSyncProgress) String() string { switch e.Action { case EventActionPut: - return fmt.Sprintf("Uploaded %s", e.Path) + return "Uploaded " + e.Path case EventActionDelete: - return fmt.Sprintf("Deleted %s", e.Path) + return "Deleted " + e.Path default: panic("invalid action") } diff --git a/libs/sync/path.go b/libs/sync/path.go index 97a908965..87397be4b 100644 --- a/libs/sync/path.go +++ b/libs/sync/path.go @@ -14,7 +14,7 @@ import ( ) func repoPathForPath(me *iam.User, remotePath string) string { - base := path.Clean(fmt.Sprintf("/Repos/%s", me.UserName)) + base := path.Clean("/Repos/" + me.UserName) remotePath = path.Clean(remotePath) for strings.HasPrefix(path.Dir(remotePath), base) && path.Dir(remotePath) != base { remotePath = path.Dir(remotePath) diff --git a/libs/sync/snapshot_test.go b/libs/sync/snapshot_test.go index eef526e58..4ba3874ae 100644 --- a/libs/sync/snapshot_test.go +++ b/libs/sync/snapshot_test.go @@ -51,7 +51,7 @@ func TestDiff(t *testing.T) { assert.NoError(t, err) change, err := state.diff(ctx, files) assert.NoError(t, err) - assert.Len(t, change.delete, 0) + assert.Empty(t, change.delete) assert.Len(t, change.put, 2) assert.Contains(t, change.put, "hello.txt") assert.Contains(t, change.put, "world.txt") @@ -67,7 +67,7 @@ func TestDiff(t *testing.T) { change, err = state.diff(ctx, files) assert.NoError(t, err) - assert.Len(t, change.delete, 0) + assert.Empty(t, change.delete) assert.Len(t, change.put, 1) assert.Contains(t, change.put, "world.txt") assertKeysOfMap(t, state.LastModifiedTimes, []string{"hello.txt", "world.txt"}) @@ -82,7 +82,7 @@ func TestDiff(t *testing.T) { change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) - assert.Len(t, change.put, 0) + assert.Empty(t, change.put) assert.Contains(t, change.delete, "hello.txt") assertKeysOfMap(t, state.LastModifiedTimes, []string{"world.txt"}) assert.Equal(t, map[string]string{"world.txt": "world.txt"}, state.LocalToRemoteNames) @@ -145,8 +145,8 @@ func TestFolderDiff(t *testing.T) { assert.NoError(t, err) change, err := state.diff(ctx, files) assert.NoError(t, err) - assert.Len(t, change.delete, 0) - assert.Len(t, change.rmdir, 0) + assert.Empty(t, change.delete) + assert.Empty(t, change.rmdir) assert.Len(t, change.mkdir, 1) assert.Len(t, change.put, 1) assert.Contains(t, change.mkdir, "foo") @@ -159,8 +159,8 @@ func TestFolderDiff(t *testing.T) { assert.NoError(t, err) assert.Len(t, change.delete, 1) assert.Len(t, change.rmdir, 1) - assert.Len(t, change.mkdir, 0) - assert.Len(t, change.put, 0) + assert.Empty(t, change.mkdir) + assert.Empty(t, change.put) assert.Contains(t, change.delete, "foo/bar") assert.Contains(t, change.rmdir, "foo") } @@ -189,7 +189,7 @@ func TestPythonNotebookDiff(t *testing.T) { foo.Overwrite(t, "# Databricks notebook source\nprint(\"abc\")") change, err := state.diff(ctx, files) assert.NoError(t, err) - assert.Len(t, change.delete, 0) + assert.Empty(t, change.delete) assert.Len(t, change.put, 1) assert.Contains(t, change.put, "foo.py") assertKeysOfMap(t, state.LastModifiedTimes, []string{"foo.py"}) @@ -233,9 +233,9 @@ func TestPythonNotebookDiff(t *testing.T) { change, err = state.diff(ctx, files) assert.NoError(t, err) assert.Len(t, change.delete, 1) - assert.Len(t, change.put, 0) + assert.Empty(t, change.put) assert.Contains(t, change.delete, "foo") - assert.Len(t, state.LastModifiedTimes, 0) + assert.Empty(t, state.LastModifiedTimes) assert.Equal(t, map[string]string{}, state.LocalToRemoteNames) assert.Equal(t, map[string]string{}, state.RemoteToLocalNames) } @@ -264,7 +264,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { assert.NoError(t, err) change, err := state.diff(ctx, files) assert.NoError(t, err) - assert.Len(t, change.delete, 0) + assert.Empty(t, change.delete) assert.Len(t, change.put, 2) assert.Contains(t, change.put, "foo.py") assert.Contains(t, change.put, "foo") @@ -300,7 +300,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { assert.NoError(t, err) change, err := state.diff(ctx, files) assert.NoError(t, err) - assert.Len(t, change.delete, 0) + assert.Empty(t, change.delete) assert.Len(t, change.put, 1) assert.Contains(t, change.put, "foo.py") diff --git a/libs/sync/sync.go b/libs/sync/sync.go index dc2c8992a..f13fa934a 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -2,6 +2,7 @@ package sync import ( "context" + "errors" "fmt" stdsync "sync" "time" @@ -93,7 +94,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { // specify the workspace by its resource ID. tracked in: https://databricks.atlassian.net/browse/DECO-194 opts.Host = opts.WorkspaceClient.Config.Host if opts.Host == "" { - return nil, fmt.Errorf("failed to resolve host for snapshot") + return nil, errors.New("failed to resolve host for snapshot") } // For full sync, we start with an empty snapshot. diff --git a/libs/sync/sync_test.go b/libs/sync/sync_test.go index 6168dc217..f30431770 100644 --- a/libs/sync/sync_test.go +++ b/libs/sync/sync_test.go @@ -59,7 +59,7 @@ func TestGetFileSet(t *testing.T) { fileList, err := s.GetFileList(ctx) require.NoError(t, err) - require.Equal(t, len(fileList), 10) + require.Len(t, fileList, 10) inc, err = fileset.NewGlobSet(root, []string{}) require.NoError(t, err) @@ -77,7 +77,7 @@ func TestGetFileSet(t *testing.T) { fileList, err = s.GetFileList(ctx) require.NoError(t, err) - require.Equal(t, len(fileList), 2) + require.Len(t, fileList, 2) inc, err = fileset.NewGlobSet(root, []string{"./.databricks/*.go"}) require.NoError(t, err) @@ -95,7 +95,7 @@ func TestGetFileSet(t *testing.T) { fileList, err = s.GetFileList(ctx) require.NoError(t, err) - require.Equal(t, len(fileList), 11) + require.Len(t, fileList, 11) } func TestRecursiveExclude(t *testing.T) { @@ -125,7 +125,7 @@ func TestRecursiveExclude(t *testing.T) { fileList, err := s.GetFileList(ctx) require.NoError(t, err) - require.Equal(t, len(fileList), 7) + require.Len(t, fileList, 7) } func TestNegateExclude(t *testing.T) { @@ -155,6 +155,6 @@ func TestNegateExclude(t *testing.T) { fileList, err := s.GetFileList(ctx) require.NoError(t, err) - require.Equal(t, len(fileList), 1) - require.Equal(t, fileList[0].Relative, "test/sub1/sub2/h.txt") + require.Len(t, fileList, 1) + require.Equal(t, "test/sub1/sub2/h.txt", fileList[0].Relative) } diff --git a/libs/sync/testdata/.ruff.toml b/libs/sync/testdata/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/libs/sync/testdata/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/libs/tags/tag.go b/libs/tags/tag.go index 4e9b329ca..64eab947e 100644 --- a/libs/tags/tag.go +++ b/libs/tags/tag.go @@ -1,6 +1,7 @@ package tags import ( + "errors" "fmt" "regexp" "strings" @@ -21,13 +22,13 @@ type tag struct { func (t *tag) ValidateKey(s string) error { if len(s) == 0 { - return fmt.Errorf("key must not be empty") + return errors.New("key must not be empty") } if len(s) > t.keyLength { return fmt.Errorf("key length %d exceeds maximum of %d", len(s), t.keyLength) } if strings.ContainsFunc(s, func(r rune) bool { return !unicode.Is(latin1, r) }) { - return fmt.Errorf("key contains non-latin1 characters") + return errors.New("key contains non-latin1 characters") } if !t.keyPattern.MatchString(s) { return fmt.Errorf("key %q does not match pattern %q", s, t.keyPattern) @@ -40,7 +41,7 @@ func (t *tag) ValidateValue(s string) error { return fmt.Errorf("value length %d exceeds maximum of %d", len(s), t.valueLength) } if strings.ContainsFunc(s, func(r rune) bool { return !unicode.Is(latin1, r) }) { - return fmt.Errorf("value contains non-latin1 characters") + return errors.New("value contains non-latin1 characters") } if !t.valuePattern.MatchString(s) { return fmt.Errorf("value %q does not match pattern %q", s, t.valuePattern) diff --git a/libs/template/builtin.go b/libs/template/builtin.go index dcb3a8858..5b10534ef 100644 --- a/libs/template/builtin.go +++ b/libs/template/builtin.go @@ -8,14 +8,14 @@ import ( //go:embed all:templates var builtinTemplates embed.FS -// BuiltinTemplate represents a template that is built into the CLI. -type BuiltinTemplate struct { +// builtinTemplate represents a template that is built into the CLI. +type builtinTemplate struct { Name string FS fs.FS } -// Builtin returns the list of all built-in templates. -func Builtin() ([]BuiltinTemplate, error) { +// builtin returns the list of all built-in templates. +func builtin() ([]builtinTemplate, error) { templates, err := fs.Sub(builtinTemplates, "templates") if err != nil { return nil, err @@ -26,7 +26,7 @@ func Builtin() ([]BuiltinTemplate, error) { return nil, err } - var out []BuiltinTemplate + var out []builtinTemplate for _, entry := range entries { if !entry.IsDir() { continue @@ -37,7 +37,7 @@ func Builtin() ([]BuiltinTemplate, error) { return nil, err } - out = append(out, BuiltinTemplate{ + out = append(out, builtinTemplate{ Name: entry.Name(), FS: templateFS, }) diff --git a/libs/template/builtin_test.go b/libs/template/builtin_test.go index 504e0acca..162a227ea 100644 --- a/libs/template/builtin_test.go +++ b/libs/template/builtin_test.go @@ -9,20 +9,26 @@ import ( ) func TestBuiltin(t *testing.T) { - out, err := Builtin() + out, err := builtin() require.NoError(t, err) - assert.Len(t, out, 3) + assert.GreaterOrEqual(t, len(out), 3) - // Confirm names. - assert.Equal(t, "dbt-sql", out[0].Name) - assert.Equal(t, "default-python", out[1].Name) - assert.Equal(t, "default-sql", out[2].Name) + // Create a map of templates by name for easier lookup + templates := make(map[string]*builtinTemplate) + for _, tmpl := range out { + templates[tmpl.Name] = &tmpl + } - // Confirm that the filesystems work. - _, err = fs.Stat(out[0].FS, `template/{{.project_name}}/dbt_project.yml.tmpl`) + // Verify all expected templates exist + assert.Contains(t, templates, "dbt-sql") + assert.Contains(t, templates, "default-python") + assert.Contains(t, templates, "default-sql") + + // Verify the filesystems work for each template + _, err = fs.Stat(templates["dbt-sql"].FS, `template/{{.project_name}}/dbt_project.yml.tmpl`) assert.NoError(t, err) - _, err = fs.Stat(out[1].FS, `template/{{.project_name}}/tests/main_test.py.tmpl`) + _, err = fs.Stat(templates["default-python"].FS, `template/{{.project_name}}/tests/main_test.py.tmpl`) assert.NoError(t, err) - _, err = fs.Stat(out[2].FS, `template/{{.project_name}}/src/orders_daily.sql.tmpl`) + _, err = fs.Stat(templates["default-sql"].FS, `template/{{.project_name}}/src/orders_daily.sql.tmpl`) assert.NoError(t, err) } diff --git a/libs/template/config.go b/libs/template/config.go index 8e7695b91..919ba2250 100644 --- a/libs/template/config.go +++ b/libs/template/config.go @@ -189,7 +189,7 @@ func (c *config) promptOnce(property *jsonschema.Schema, name, defaultVal, descr c.values[name], err = property.ParseString(userInput) if err != nil { // Show error and retry if validation fails - cmdio.LogString(c.ctx, fmt.Sprintf("Validation failed: %s", err.Error())) + cmdio.LogString(c.ctx, "Validation failed: "+err.Error()) return retriableError{err: err} } @@ -197,7 +197,7 @@ func (c *config) promptOnce(property *jsonschema.Schema, name, defaultVal, descr err = c.schema.ValidateInstance(c.values) if err != nil { // Show error and retry if validation fails - cmdio.LogString(c.ctx, fmt.Sprintf("Validation failed: %s", err.Error())) + cmdio.LogString(c.ctx, "Validation failed: "+err.Error()) return retriableError{err: err} } return nil diff --git a/libs/template/config_test.go b/libs/template/config_test.go index a855019b6..515a0b9f5 100644 --- a/libs/template/config_test.go +++ b/libs/template/config_test.go @@ -24,7 +24,7 @@ func TestTemplateConfigAssignValuesFromFile(t *testing.T) { err = c.assignValuesFromFile(filepath.Join(testDir, "config.json")) if assert.NoError(t, err) { assert.Equal(t, int64(1), c.values["int_val"]) - assert.Equal(t, float64(2), c.values["float_val"]) + assert.InDelta(t, float64(2), c.values["float_val"].(float64), 0.0001) assert.Equal(t, true, c.values["bool_val"]) assert.Equal(t, "hello", c.values["string_val"]) } @@ -44,7 +44,7 @@ func TestTemplateConfigAssignValuesFromFileDoesNotOverwriteExistingConfigs(t *te err = c.assignValuesFromFile(filepath.Join(testDir, "config.json")) if assert.NoError(t, err) { assert.Equal(t, int64(1), c.values["int_val"]) - assert.Equal(t, float64(2), c.values["float_val"]) + assert.InDelta(t, float64(2), c.values["float_val"].(float64), 0.0001) assert.Equal(t, true, c.values["bool_val"]) assert.Equal(t, "this-is-not-overwritten", c.values["string_val"]) } @@ -89,7 +89,7 @@ func TestTemplateConfigAssignValuesFromDefaultValues(t *testing.T) { err = c.assignDefaultValues(r) if assert.NoError(t, err) { assert.Equal(t, int64(123), c.values["int_val"]) - assert.Equal(t, float64(123), c.values["float_val"]) + assert.InDelta(t, float64(123), c.values["float_val"].(float64), 0.0001) assert.Equal(t, true, c.values["bool_val"]) assert.Equal(t, "hello", c.values["string_val"]) } @@ -110,7 +110,7 @@ func TestTemplateConfigAssignValuesFromTemplatedDefaultValues(t *testing.T) { err = c.assignDefaultValues(r) if assert.NoError(t, err) { assert.Equal(t, int64(123), c.values["int_val"]) - assert.Equal(t, float64(123), c.values["float_val"]) + assert.InDelta(t, float64(123), c.values["float_val"].(float64), 0.0001) assert.Equal(t, true, c.values["bool_val"]) assert.Equal(t, "world", c.values["string_val"]) } diff --git a/libs/template/file_test.go b/libs/template/file_test.go index ced38c284..f4bf5652c 100644 --- a/libs/template/file_test.go +++ b/libs/template/file_test.go @@ -8,6 +8,7 @@ import ( "runtime" "testing" + "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/filer" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -27,8 +28,8 @@ func testInMemoryFile(t *testing.T, ctx context.Context, perm fs.FileMode) { err = f.Write(ctx, out) assert.NoError(t, err) - assertFileContent(t, filepath.Join(tmpDir, "a/b/c"), "123") - assertFilePermissions(t, filepath.Join(tmpDir, "a/b/c"), perm) + testutil.AssertFileContents(t, filepath.Join(tmpDir, "a/b/c"), "123") + testutil.AssertFilePermissions(t, filepath.Join(tmpDir, "a/b/c"), perm) } func testCopyFile(t *testing.T, ctx context.Context, perm fs.FileMode) { @@ -48,8 +49,8 @@ func testCopyFile(t *testing.T, ctx context.Context, perm fs.FileMode) { err = f.Write(ctx, out) assert.NoError(t, err) - assertFileContent(t, filepath.Join(tmpDir, "a/b/c"), "qwerty") - assertFilePermissions(t, filepath.Join(tmpDir, "a/b/c"), perm) + testutil.AssertFileContents(t, filepath.Join(tmpDir, "source"), "qwerty") + testutil.AssertFilePermissions(t, filepath.Join(tmpDir, "source"), perm) } func TestTemplateInMemoryFilePersistToDisk(t *testing.T) { diff --git a/libs/template/helpers_test.go b/libs/template/helpers_test.go index d98f40b24..f8bc1f3da 100644 --- a/libs/template/helpers_test.go +++ b/libs/template/helpers_test.go @@ -86,7 +86,7 @@ func TestTemplateRandIntFunction(t *testing.T) { assert.Len(t, r.files, 1) randInt, err := strconv.Atoi(strings.TrimSpace(string(r.files[0].(*inMemoryFile).content))) assert.Less(t, randInt, 10) - assert.Empty(t, err) + assert.NoError(t, err) } func TestTemplateUuidFunction(t *testing.T) { diff --git a/libs/template/materialize.go b/libs/template/materialize.go deleted file mode 100644 index 86a6a8c37..000000000 --- a/libs/template/materialize.go +++ /dev/null @@ -1,94 +0,0 @@ -package template - -import ( - "context" - "errors" - "fmt" - "io/fs" - - "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/filer" -) - -const ( - libraryDirName = "library" - templateDirName = "template" - schemaFileName = "databricks_template_schema.json" -) - -// This function materializes the input templates as a project, using user defined -// configurations. -// Parameters: -// -// ctx: context containing a cmdio object. This is used to prompt the user -// configFilePath: file path containing user defined config values -// templateFS: root of the template definition -// outputFiler: filer to use for writing the initialized template -func Materialize(ctx context.Context, configFilePath string, templateFS fs.FS, outputFiler filer.Filer) error { - if _, err := fs.Stat(templateFS, schemaFileName); errors.Is(err, fs.ErrNotExist) { - return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName) - } - - config, err := newConfig(ctx, templateFS, schemaFileName) - if err != nil { - return err - } - - // Read and assign config values from file - if configFilePath != "" { - err = config.assignValuesFromFile(configFilePath) - if err != nil { - return err - } - } - - helpers := loadHelpers(ctx) - r, err := newRenderer(ctx, config.values, helpers, templateFS, templateDirName, libraryDirName) - if err != nil { - return err - } - - // Print welcome message - welcome := config.schema.WelcomeMessage - if welcome != "" { - welcome, err = r.executeTemplate(welcome) - if err != nil { - return err - } - cmdio.LogString(ctx, welcome) - } - - // Prompt user for any missing config values. Assign default values if - // terminal is not TTY - err = config.promptOrAssignDefaultValues(r) - if err != nil { - return err - } - err = config.validate() - if err != nil { - return err - } - - // Walk and render the template, since input configuration is complete - err = r.walk() - if err != nil { - return err - } - - err = r.persistToDisk(ctx, outputFiler) - if err != nil { - return err - } - - success := config.schema.SuccessMessage - if success == "" { - cmdio.LogString(ctx, "✨ Successfully initialized template") - } else { - success, err = r.executeTemplate(success) - if err != nil { - return err - } - cmdio.LogString(ctx, success) - } - return nil -} diff --git a/libs/template/materialize_test.go b/libs/template/materialize_test.go deleted file mode 100644 index f7cd916e3..000000000 --- a/libs/template/materialize_test.go +++ /dev/null @@ -1,24 +0,0 @@ -package template - -import ( - "context" - "fmt" - "os" - "testing" - - "github.com/databricks/cli/cmd/root" - "github.com/databricks/databricks-sdk-go" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestMaterializeForNonTemplateDirectory(t *testing.T) { - tmpDir := t.TempDir() - w, err := databricks.NewWorkspaceClient(&databricks.Config{}) - require.NoError(t, err) - ctx := root.SetWorkspaceClient(context.Background(), w) - - // Try to materialize a non-template directory. - err = Materialize(ctx, "", os.DirFS(tmpDir), nil) - assert.EqualError(t, err, fmt.Sprintf("not a bundle template: expected to find a template schema file at %s", schemaFileName)) -} diff --git a/libs/template/reader.go b/libs/template/reader.go new file mode 100644 index 000000000..8e32a75cf --- /dev/null +++ b/libs/template/reader.go @@ -0,0 +1,119 @@ +package template + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/log" +) + +type Reader interface { + // FS returns a file system that contains the template + // definition files. + FS(ctx context.Context) (fs.FS, error) + + // Cleanup releases any resources associated with the reader + // like cleaning up temporary directories. + Cleanup(ctx context.Context) +} + +type builtinReader struct { + name string +} + +func (r *builtinReader) FS(ctx context.Context) (fs.FS, error) { + builtin, err := builtin() + if err != nil { + return nil, err + } + + for _, entry := range builtin { + if entry.Name == r.name { + return entry.FS, nil + } + } + + return nil, fmt.Errorf("builtin template %s not found", r.name) +} + +func (r *builtinReader) Cleanup(ctx context.Context) {} + +type gitReader struct { + gitUrl string + // tag or branch to checkout + ref string + // subdirectory within the repository that contains the template + templateDir string + // temporary directory where the repository is cloned + tmpRepoDir string + + // Function to clone the repository. This is a function pointer to allow + // mocking in tests. + cloneFunc func(ctx context.Context, url, reference, targetPath string) error +} + +// Computes the repo name from the repo URL. Treats the last non empty word +// when splitting at '/' as the repo name. For example: for url git@github.com:databricks/cli.git +// the name would be "cli.git" +func repoName(url string) string { + parts := strings.Split(strings.TrimRight(url, "/"), "/") + return parts[len(parts)-1] +} + +func (r *gitReader) FS(ctx context.Context) (fs.FS, error) { + // Calling FS twice will lead to two downloaded copies of the git repo. + // In the future if you need to call FS twice, consider adding some caching + // logic here to avoid multiple downloads. + if r.tmpRepoDir != "" { + return nil, errors.New("FS called twice on git reader") + } + + // Create a temporary directory with the name of the repository. The '*' + // character is replaced by a random string in the generated temporary directory. + repoDir, err := os.MkdirTemp("", repoName(r.gitUrl)+"-*") + if err != nil { + return nil, err + } + r.tmpRepoDir = repoDir + + // start the spinner + promptSpinner := cmdio.Spinner(ctx) + promptSpinner <- "Downloading the template\n" + + err = r.cloneFunc(ctx, r.gitUrl, r.ref, repoDir) + close(promptSpinner) + if err != nil { + return nil, err + } + + return os.DirFS(filepath.Join(repoDir, r.templateDir)), nil +} + +func (r *gitReader) Cleanup(ctx context.Context) { + if r.tmpRepoDir == "" { + return + } + + // Cleanup is best effort. Only log errors. + err := os.RemoveAll(r.tmpRepoDir) + if err != nil { + log.Debugf(ctx, "Error cleaning up tmp directory %s for git template reader for URL %s: %s", r.tmpRepoDir, r.gitUrl, err) + } +} + +type localReader struct { + // Path on the local filesystem that contains the template + path string +} + +func (r *localReader) FS(ctx context.Context) (fs.FS, error) { + return os.DirFS(r.path), nil +} + +func (r *localReader) Cleanup(ctx context.Context) {} diff --git a/libs/template/reader_test.go b/libs/template/reader_test.go new file mode 100644 index 000000000..89c8d9fdf --- /dev/null +++ b/libs/template/reader_test.go @@ -0,0 +1,101 @@ +package template + +import ( + "context" + "io/fs" + "path/filepath" + "testing" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/cmdio" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuiltInReader(t *testing.T) { + exists := []string{ + "default-python", + "default-sql", + "dbt-sql", + "experimental-jobs-as-code", + } + + for _, name := range exists { + t.Run(name, func(t *testing.T) { + r := &builtinReader{name: name} + fsys, err := r.FS(context.Background()) + assert.NoError(t, err) + assert.NotNil(t, fsys) + + // Assert file content returned is accurate and every template has a welcome + // message defined. + b, err := fs.ReadFile(fsys, "databricks_template_schema.json") + require.NoError(t, err) + assert.Contains(t, string(b), "welcome_message") + }) + } + + t.Run("doesnotexist", func(t *testing.T) { + r := &builtinReader{name: "doesnotexist"} + _, err := r.FS(context.Background()) + assert.EqualError(t, err, "builtin template doesnotexist not found") + }) +} + +func TestGitUrlReader(t *testing.T) { + ctx := cmdio.MockDiscard(context.Background()) + + var args []string + numCalls := 0 + cloneFunc := func(ctx context.Context, url, reference, targetPath string) error { + numCalls++ + args = []string{url, reference, targetPath} + testutil.WriteFile(t, filepath.Join(targetPath, "a", "b", "c", "somefile"), "somecontent") + return nil + } + r := &gitReader{ + gitUrl: "someurl", + cloneFunc: cloneFunc, + ref: "sometag", + templateDir: "a/b/c", + } + + // Assert cloneFunc is called with the correct args. + fsys, err := r.FS(ctx) + require.NoError(t, err) + require.NotEmpty(t, r.tmpRepoDir) + assert.Equal(t, 1, numCalls) + assert.DirExists(t, r.tmpRepoDir) + assert.Equal(t, []string{"someurl", "sometag", r.tmpRepoDir}, args) + + // Assert the fs returned is rooted at the templateDir. + b, err := fs.ReadFile(fsys, "somefile") + require.NoError(t, err) + assert.Equal(t, "somecontent", string(b)) + + // Assert second call to FS returns an error. + _, err = r.FS(ctx) + assert.ErrorContains(t, err, "FS called twice on git reader") + + // Assert the downloaded repository is cleaned up. + _, err = fs.Stat(fsys, ".") + require.NoError(t, err) + r.Cleanup(ctx) + _, err = fs.Stat(fsys, ".") + assert.ErrorIs(t, err, fs.ErrNotExist) +} + +func TestLocalReader(t *testing.T) { + tmpDir := t.TempDir() + testutil.WriteFile(t, filepath.Join(tmpDir, "somefile"), "somecontent") + ctx := context.Background() + + r := &localReader{path: tmpDir} + fsys, err := r.FS(ctx) + require.NoError(t, err) + + // Assert the fs returned is rooted at correct location. + b, err := fs.ReadFile(fsys, "somefile") + require.NoError(t, err) + assert.Equal(t, "somecontent", string(b)) +} diff --git a/libs/template/renderer.go b/libs/template/renderer.go index 5030cd9df..679b7d8b7 100644 --- a/libs/template/renderer.go +++ b/libs/template/renderer.go @@ -150,6 +150,10 @@ func (r *renderer) computeFile(relPathTemplate string) (file, error) { } perm := info.Mode().Perm() + // Always include the write bit for the owner of the file. + // It does not make sense to have a file that is not writable by the owner. + perm |= 0o200 + // Execute relative path template to get destination path for the file relPath, err := r.executeTemplate(relPathTemplate) if err != nil { diff --git a/libs/template/renderer_test.go b/libs/template/renderer_test.go index eeb308732..b2ec388bd 100644 --- a/libs/template/renderer_test.go +++ b/libs/template/renderer_test.go @@ -2,7 +2,6 @@ package template import ( "context" - "fmt" "io/fs" "os" "path" @@ -17,6 +16,7 @@ import ( "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/filer" "github.com/databricks/cli/libs/tags" @@ -27,20 +27,23 @@ import ( "github.com/stretchr/testify/require" ) -func assertFileContent(t *testing.T, path, content string) { - b, err := os.ReadFile(path) - require.NoError(t, err) - assert.Equal(t, content, string(b)) -} +var ( + defaultFilePermissions fs.FileMode + defaultDirPermissions fs.FileMode +) -func assertFilePermissions(t *testing.T, path string, perm fs.FileMode) { - info, err := os.Stat(path) - require.NoError(t, err) - assert.Equal(t, perm, info.Mode().Perm()) +func init() { + if runtime.GOOS == "windows" { + defaultFilePermissions = fs.FileMode(0o666) + defaultDirPermissions = fs.FileMode(0o777) + } else { + defaultFilePermissions = fs.FileMode(0o644) + defaultDirPermissions = fs.FileMode(0o755) + } } func assertBuiltinTemplateValid(t *testing.T, template string, settings map[string]any, target string, isServicePrincipal, build bool, tempDir string) { - ctx := context.Background() + ctx := dbr.MockRuntime(context.Background(), false) templateFS, err := fs.Sub(builtinTemplates, path.Join("templates", template)) require.NoError(t, err) @@ -69,6 +72,10 @@ func assertBuiltinTemplateValid(t *testing.T, template string, settings map[stri err = renderer.persistToDisk(ctx, out) require.NoError(t, err) + // Verify permissions on file and directory + testutil.AssertFilePermissions(t, filepath.Join(tempDir, "my_project/README.md"), defaultFilePermissions) + testutil.AssertDirPermissions(t, filepath.Join(tempDir, "my_project/resources"), defaultDirPermissions) + b, err := bundle.Load(ctx, filepath.Join(tempDir, "my_project")) require.NoError(t, err) diags := bundle.Apply(ctx, b, phases.LoadNamedTarget(target)) @@ -347,10 +354,10 @@ func TestRendererPersistToDisk(t *testing.T) { assert.NoFileExists(t, filepath.Join(tmpDir, "a", "b", "c")) assert.NoFileExists(t, filepath.Join(tmpDir, "mno")) - assertFileContent(t, filepath.Join(tmpDir, "a", "b", "d"), "123") - assertFilePermissions(t, filepath.Join(tmpDir, "a", "b", "d"), 0o444) - assertFileContent(t, filepath.Join(tmpDir, "mmnn"), "456") - assertFilePermissions(t, filepath.Join(tmpDir, "mmnn"), 0o444) + testutil.AssertFileContents(t, filepath.Join(tmpDir, "a/b/d"), "123") + testutil.AssertFilePermissions(t, filepath.Join(tmpDir, "a/b/d"), fs.FileMode(0o444)) + testutil.AssertFileContents(t, filepath.Join(tmpDir, "mmnn"), "456") + testutil.AssertFilePermissions(t, filepath.Join(tmpDir, "mmnn"), fs.FileMode(0o444)) } func TestRendererWalk(t *testing.T) { @@ -434,7 +441,7 @@ func TestRendererSkipAllFilesInCurrentDirectory(t *testing.T) { entries, err := os.ReadDir(tmpDir) require.NoError(t, err) // Assert none of the files are persisted to disk, because of {{skip "*"}} - assert.Len(t, entries, 0) + assert.Empty(t, entries) } func TestRendererSkipPatternsAreRelativeToFileDirectory(t *testing.T) { @@ -544,7 +551,7 @@ func TestRendererErrorOnConflictingFile(t *testing.T) { out, err := filer.NewLocalClient(tmpDir) require.NoError(t, err) err = r.persistToDisk(ctx, out) - assert.EqualError(t, err, fmt.Sprintf("failed to initialize template, one or more files already exist: %s", "a")) + assert.EqualError(t, err, "failed to initialize template, one or more files already exist: "+"a") } func TestRendererNoErrorOnConflictingFileIfSkipped(t *testing.T) { @@ -588,8 +595,8 @@ func TestRendererNonTemplatesAreCreatedAsCopyFiles(t *testing.T) { assert.NoError(t, err) assert.Len(t, r.files, 1) - assert.Equal(t, r.files[0].(*copyFile).srcPath, "not-a-template") - assert.Equal(t, r.files[0].RelPath(), "not-a-template") + assert.Equal(t, "not-a-template", r.files[0].(*copyFile).srcPath) + assert.Equal(t, "not-a-template", r.files[0].RelPath()) } func TestRendererFileTreeRendering(t *testing.T) { @@ -609,7 +616,7 @@ func TestRendererFileTreeRendering(t *testing.T) { // Assert in memory representation is created. assert.Len(t, r.files, 1) - assert.Equal(t, r.files[0].RelPath(), "my_directory/my_file") + assert.Equal(t, "my_directory/my_file", r.files[0].RelPath()) out, err := filer.NewLocalClient(tmpDir) require.NoError(t, err) @@ -617,8 +624,8 @@ func TestRendererFileTreeRendering(t *testing.T) { require.NoError(t, err) // Assert files and directories are correctly materialized. - assert.DirExists(t, filepath.Join(tmpDir, "my_directory")) - assert.FileExists(t, filepath.Join(tmpDir, "my_directory", "my_file")) + testutil.AssertDirPermissions(t, filepath.Join(tmpDir, "my_directory"), defaultDirPermissions) + testutil.AssertFilePermissions(t, filepath.Join(tmpDir, "my_directory", "my_file"), defaultFilePermissions) } func TestRendererSubTemplateInPath(t *testing.T) { diff --git a/libs/template/resolver.go b/libs/template/resolver.go new file mode 100644 index 000000000..2cc8bf1c7 --- /dev/null +++ b/libs/template/resolver.go @@ -0,0 +1,122 @@ +package template + +import ( + "context" + "errors" + "strings" + + "github.com/databricks/cli/libs/git" +) + +var gitUrlPrefixes = []string{ + "https://", + "git@", +} + +func isRepoUrl(url string) bool { + result := false + for _, prefix := range gitUrlPrefixes { + if strings.HasPrefix(url, prefix) { + result = true + break + } + } + return result +} + +type Resolver struct { + // One of the following three: + // 1. Path to a local template directory. + // 2. URL to a Git repository containing a template. + // 3. Name of a built-in template. + TemplatePathOrUrl string + + // Path to a JSON file containing the configuration values to be used for + // template initialization. + ConfigFile string + + // Directory to write the initialized template to. + OutputDir string + + // Directory path within a Git repository containing the template. + TemplateDir string + + // Git tag or branch to download the template from. Only one of these can be + // specified. + Tag string + Branch string +} + +// ErrCustomSelected is returned when the user selects the "custom..." option +// in the prompt UI when they run `databricks bundle init`. This error signals +// the upstream callsite to show documentation to the user on how to use a custom +// template. +var ErrCustomSelected = errors.New("custom template selected") + +// Configures the reader and the writer for template and returns +// a handle to the template. +// Prompts the user if needed. +func (r Resolver) Resolve(ctx context.Context) (*Template, error) { + if r.Tag != "" && r.Branch != "" { + return nil, errors.New("only one of tag or branch can be specified") + } + + // Git ref to use for template initialization + ref := r.Branch + if r.Tag != "" { + ref = r.Tag + } + + var err error + var templateName TemplateName + + if r.TemplatePathOrUrl == "" { + // Prompt the user to select a template + // if a template path or URL is not provided. + templateName, err = SelectTemplate(ctx) + if err != nil { + return nil, err + } + } else { + templateName = TemplateName(r.TemplatePathOrUrl) + } + + tmpl := GetDatabricksTemplate(templateName) + + // If we could not find a databricks template with the name provided by the user, + // then we assume that the user provided us with a reference to a custom template. + // + // This reference could be one of: + // 1. Path to a local template directory. + // 2. URL to a Git repository containing a template. + // + // We resolve the appropriate reader according to the reference provided by the user. + if tmpl == nil { + tmpl = &Template{ + name: Custom, + // We use a writer that does not log verbose telemetry for custom templates. + // This is important because template definitions can contain PII that we + // do not want to centralize. + Writer: &defaultWriter{}, + } + + if isRepoUrl(r.TemplatePathOrUrl) { + tmpl.Reader = &gitReader{ + gitUrl: r.TemplatePathOrUrl, + ref: ref, + templateDir: r.TemplateDir, + cloneFunc: git.Clone, + } + } else { + tmpl.Reader = &localReader{ + path: r.TemplatePathOrUrl, + } + } + } + err = tmpl.Writer.Configure(ctx, r.ConfigFile, r.OutputDir) + if err != nil { + return nil, err + } + + return tmpl, nil +} diff --git a/libs/template/resolver_test.go b/libs/template/resolver_test.go new file mode 100644 index 000000000..1dee1c45f --- /dev/null +++ b/libs/template/resolver_test.go @@ -0,0 +1,110 @@ +package template + +import ( + "context" + "testing" + + "github.com/databricks/cli/libs/cmdio" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTemplateResolverBothTagAndBranch(t *testing.T) { + r := Resolver{ + Tag: "tag", + Branch: "branch", + } + + _, err := r.Resolve(context.Background()) + assert.EqualError(t, err, "only one of tag or branch can be specified") +} + +func TestTemplateResolverErrorsWhenPromptingIsNotSupported(t *testing.T) { + r := Resolver{} + ctx := cmdio.MockDiscard(context.Background()) + + _, err := r.Resolve(ctx) + assert.EqualError(t, err, "prompting is not supported. Please specify the path, name or URL of the template to use") +} + +func TestTemplateResolverForDefaultTemplates(t *testing.T) { + for _, name := range []string{ + "default-python", + "default-sql", + "dbt-sql", + } { + t.Run(name, func(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: name, + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + assert.Equal(t, &builtinReader{name: name}, tmpl.Reader) + assert.IsType(t, &writerWithFullTelemetry{}, tmpl.Writer) + }) + } + + t.Run("mlops-stacks", func(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: "mlops-stacks", + ConfigFile: "/config/file", + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + // Assert reader and writer configuration + assert.Equal(t, "https://github.com/databricks/mlops-stacks", tmpl.Reader.(*gitReader).gitUrl) + assert.Equal(t, "/config/file", tmpl.Writer.(*writerWithFullTelemetry).configPath) + }) +} + +func TestTemplateResolverForCustomUrl(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: "https://www.example.com/abc", + Tag: "tag", + TemplateDir: "/template/dir", + ConfigFile: "/config/file", + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + assert.Equal(t, Custom, tmpl.name) + + // Assert reader configuration + assert.Equal(t, "https://www.example.com/abc", tmpl.Reader.(*gitReader).gitUrl) + assert.Equal(t, "tag", tmpl.Reader.(*gitReader).ref) + assert.Equal(t, "/template/dir", tmpl.Reader.(*gitReader).templateDir) + + // Assert writer configuration + assert.Equal(t, "/config/file", tmpl.Writer.(*defaultWriter).configPath) +} + +func TestTemplateResolverForCustomPath(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: "/custom/path", + ConfigFile: "/config/file", + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + assert.Equal(t, Custom, tmpl.name) + + // Assert reader configuration + assert.Equal(t, "/custom/path", tmpl.Reader.(*localReader).path) + + // Assert writer configuration + assert.Equal(t, "/config/file", tmpl.Writer.(*defaultWriter).configPath) +} + +func TestBundleInitIsRepoUrl(t *testing.T) { + assert.True(t, isRepoUrl("git@github.com:databricks/cli.git")) + assert.True(t, isRepoUrl("https://github.com/databricks/cli.git")) + + assert.False(t, isRepoUrl("./local")) + assert.False(t, isRepoUrl("foo")) +} diff --git a/libs/template/template.go b/libs/template/template.go new file mode 100644 index 000000000..44834436b --- /dev/null +++ b/libs/template/template.go @@ -0,0 +1,140 @@ +package template + +import ( + "context" + "errors" + "fmt" + "slices" + "strings" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/git" +) + +type Template struct { + Reader Reader + Writer Writer + + name TemplateName + description string + aliases []string + hidden bool +} + +type TemplateName string + +const ( + DefaultPython TemplateName = "default-python" + DefaultSql TemplateName = "default-sql" + DbtSql TemplateName = "dbt-sql" + MlopsStacks TemplateName = "mlops-stacks" + DefaultPydabs TemplateName = "default-pydabs" + Custom TemplateName = "custom" + ExperimentalJobsAsCode TemplateName = "experimental-jobs-as-code" +) + +var databricksTemplates = []Template{ + { + name: DefaultPython, + description: "The default Python template for Notebooks / Delta Live Tables / Workflows", + Reader: &builtinReader{name: string(DefaultPython)}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: DefaultSql, + description: "The default SQL template for .sql files that run with Databricks SQL", + Reader: &builtinReader{name: string(DefaultSql)}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: DbtSql, + description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", + Reader: &builtinReader{name: string(DbtSql)}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: MlopsStacks, + description: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)", + aliases: []string{"mlops-stack"}, + Reader: &gitReader{gitUrl: "https://github.com/databricks/mlops-stacks", cloneFunc: git.Clone}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: DefaultPydabs, + hidden: true, + description: "The default PyDABs template", + Reader: &gitReader{gitUrl: "https://databricks.github.io/workflows-authoring-toolkit/pydabs-template.git", cloneFunc: git.Clone}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: ExperimentalJobsAsCode, + hidden: true, + description: "Jobs as code template (experimental)", + Reader: &builtinReader{name: string(ExperimentalJobsAsCode)}, + Writer: &writerWithFullTelemetry{}, + }, +} + +func HelpDescriptions() string { + var lines []string + for _, template := range databricksTemplates { + if template.name != Custom && !template.hidden { + lines = append(lines, fmt.Sprintf("- %s: %s", template.name, template.description)) + } + } + return strings.Join(lines, "\n") +} + +var customTemplateDescription = "Bring your own template" + +func options() []cmdio.Tuple { + names := make([]cmdio.Tuple, 0, len(databricksTemplates)) + for _, template := range databricksTemplates { + if template.hidden { + continue + } + tuple := cmdio.Tuple{ + Name: string(template.name), + Id: template.description, + } + names = append(names, tuple) + } + + names = append(names, cmdio.Tuple{ + Name: "custom...", + Id: customTemplateDescription, + }) + return names +} + +func SelectTemplate(ctx context.Context) (TemplateName, error) { + if !cmdio.IsPromptSupported(ctx) { + return "", errors.New("prompting is not supported. Please specify the path, name or URL of the template to use") + } + description, err := cmdio.SelectOrdered(ctx, options(), "Template to use") + if err != nil { + return "", err + } + + if description == customTemplateDescription { + return TemplateName(""), ErrCustomSelected + } + + for _, template := range databricksTemplates { + if template.description == description { + return template.name, nil + } + } + + return "", fmt.Errorf("template with description %s not found", description) +} + +func GetDatabricksTemplate(name TemplateName) *Template { + for _, template := range databricksTemplates { + if template.name == name || slices.Contains(template.aliases, string(name)) { + return &template + } + } + + return nil +} diff --git a/cmd/bundle/init_test.go b/libs/template/template_test.go similarity index 59% rename from cmd/bundle/init_test.go rename to libs/template/template_test.go index 475b2e149..80391e58b 100644 --- a/cmd/bundle/init_test.go +++ b/libs/template/template_test.go @@ -1,4 +1,4 @@ -package bundle +package template import ( "testing" @@ -7,12 +7,23 @@ import ( "github.com/stretchr/testify/assert" ) -func TestBundleInitIsRepoUrl(t *testing.T) { - assert.True(t, isRepoUrl("git@github.com:databricks/cli.git")) - assert.True(t, isRepoUrl("https://github.com/databricks/cli.git")) +func TestTemplateHelpDescriptions(t *testing.T) { + expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows +- default-sql: The default SQL template for .sql files that run with Databricks SQL +- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) +- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)` + assert.Equal(t, expected, HelpDescriptions()) +} - assert.False(t, isRepoUrl("./local")) - assert.False(t, isRepoUrl("foo")) +func TestTemplateOptions(t *testing.T) { + expected := []cmdio.Tuple{ + {Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"}, + {Name: "default-sql", Id: "The default SQL template for .sql files that run with Databricks SQL"}, + {Name: "dbt-sql", Id: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"}, + {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, + {Name: "custom...", Id: "Bring your own template"}, + } + assert.Equal(t, expected, options()) } func TestBundleInitRepoName(t *testing.T) { @@ -27,28 +38,41 @@ func TestBundleInitRepoName(t *testing.T) { assert.Equal(t, "www.github.com", repoName("https://www.github.com")) } -func TestNativeTemplateOptions(t *testing.T) { - expected := []cmdio.Tuple{ - {Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"}, - {Name: "default-sql", Id: "The default SQL template for .sql files that run with Databricks SQL"}, - {Name: "dbt-sql", Id: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"}, - {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, - {Name: "custom...", Id: "Bring your own template"}, +func TestTemplateTelemetryIsCapturedForAllDefaultTemplates(t *testing.T) { + for _, tmpl := range databricksTemplates { + w := tmpl.Writer + + // Assert telemetry is captured for all databricks templates, i.e. templates + // owned by databricks. + assert.IsType(t, &writerWithFullTelemetry{}, w) } - assert.Equal(t, expected, nativeTemplateOptions()) } -func TestNativeTemplateHelpDescriptions(t *testing.T) { - expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows -- default-sql: The default SQL template for .sql files that run with Databricks SQL -- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) -- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)` - assert.Equal(t, expected, nativeTemplateHelpDescriptions()) -} +func TestTemplateGetDatabricksTemplate(t *testing.T) { + names := []TemplateName{ + DefaultPython, + DefaultSql, + DbtSql, + MlopsStacks, + DefaultPydabs, + } -func TestGetUrlForNativeTemplate(t *testing.T) { - assert.Equal(t, "https://github.com/databricks/mlops-stacks", getUrlForNativeTemplate("mlops-stacks")) - assert.Equal(t, "https://github.com/databricks/mlops-stacks", getUrlForNativeTemplate("mlops-stack")) - assert.Equal(t, "", getUrlForNativeTemplate("default-python")) - assert.Equal(t, "", getUrlForNativeTemplate("invalid")) + for _, name := range names { + tmpl := GetDatabricksTemplate(name) + assert.Equal(t, tmpl.name, name) + } + + notExist := []string{ + "/some/path", + "doesnotexist", + "https://www.someurl.com", + } + + for _, name := range notExist { + tmpl := GetDatabricksTemplate(TemplateName(name)) + assert.Nil(t, tmpl) + } + + // Assert the alias works. + assert.Equal(t, MlopsStacks, GetDatabricksTemplate(TemplateName("mlops-stack")).name) } diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index f459a1cfb..d991c06ff 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -3,6 +3,7 @@ # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. bundle: name: {{.project_name}} + uuid: {{bundle_uuid}} include: - resources/*.yml diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl index 5e79c5459..04d22a764 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -2,6 +2,7 @@ # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. bundle: name: {{.project_name}} + uuid: {{bundle_uuid}} include: - resources/*.yml diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl index 42164dff0..d3e9beef3 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl @@ -29,7 +29,8 @@ "source": [ {{- if (eq .include_python "yes") }} "import sys\n", - "sys.path.append('../src')\n", + "\n", + "sys.path.append(\"../src\")\n", "from {{.project_name}} import main\n", "\n", "main.get_taxis(spark).show(10)" diff --git a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl index a0852c725..e3b70c605 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl @@ -5,28 +5,32 @@ This file is primarily used by the setuptools library and typically should not be executed directly. See README.md for how to deploy, test, and run the {{.project_name}} project. """ + from setuptools import setup, find_packages import sys -sys.path.append('./src') + +sys.path.append("./src") import datetime import {{.project_name}} +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + setup( name="{{.project_name}}", # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) # to ensure that changes to wheel package are picked up when used on all-purpose clusters - version={{.project_name}}.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"), + version={{.project_name}}.__version__ + "+" + local_version, url="https://databricks.com", author="{{user_name}}", description="wheel file based on {{.project_name}}/src", - packages=find_packages(where='./src'), - package_dir={'': 'src'}, + packages=find_packages(where="./src"), + package_dir={"": "src"}, entry_points={ "packages": [ - "main={{.project_name}}.main:main" - ] + "main={{.project_name}}.main:main", + ], }, install_requires=[ # Dependencies in case the output wheel file is used as a library dependency. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl index 253ed321c..d0286639f 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -35,6 +35,7 @@ "# Import DLT and src/{{.project_name}}\n", "import dlt\n", "import sys\n", + "\n", "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", "from pyspark.sql.functions import expr\n", "from {{.project_name}} import main" @@ -63,17 +64,18 @@ {{- if (eq .include_python "yes") }} "@dlt.view\n", "def taxi_raw():\n", - " return main.get_taxis(spark)\n", + " return main.get_taxis(spark)\n", {{else}} "\n", "@dlt.view\n", "def taxi_raw():\n", - " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", + " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", {{end -}} "\n", + "\n", "@dlt.table\n", "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" ] } ], diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl index c514c6dc5..5ae344c7e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -1,21 +1,25 @@ from pyspark.sql import SparkSession, DataFrame + def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") + return spark.read.table("samples.nyctaxi.trips") # Create a new Databricks Connect session. If this fails, # check that you have configured Databricks Connect correctly. # See https://docs.databricks.com/dev-tools/databricks-connect.html. def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + def main(): - get_taxis(get_spark()).show(5) + get_taxis(get_spark()).show(5) -if __name__ == '__main__': - main() + +if __name__ == "__main__": + main() diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl index 5993c123c..6acdf40e7 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -2,6 +2,7 @@ # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. bundle: name: {{.project_name}} + uuid: {{bundle_uuid}} include: - resources/*.yml diff --git a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json new file mode 100644 index 000000000..00d59af5f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json @@ -0,0 +1,28 @@ +{ + "welcome_message": "\nWelcome to (EXPERIMENTAL) \"Jobs as code\" template for Databricks Asset Bundles!", + "properties": { + "project_name": { + "type": "string", + "default": "jobs_as_code_project", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project", + "order": 1, + "pattern": "^[A-Za-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." + }, + "include_notebook": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", + "order": 2 + }, + "include_python": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a stub (sample) Python package in '{{.project_name}}/src'", + "order": 3 + } + }, + "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." +} diff --git a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl new file mode 100644 index 000000000..7d0c88e7d --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl @@ -0,0 +1,7 @@ +{{define "latest_lts_dbr_version" -}} + 15.4.x-scala2.12 +{{- end}} + +{{define "latest_lts_db_connect_version_spec" -}} + >=15.4,<15.5 +{{- end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl new file mode 100644 index 000000000..2f8e8ae3e --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl @@ -0,0 +1,30 @@ +# Preamble + +This file only template directives; it is skipped for the actual output. + +{{skip "__preamble"}} + +# TODO add DLT support, placeholder for now +{{$notDLT := true }} +{{$notNotebook := not (eq .include_notebook "yes")}} +{{$notPython := not (eq .include_python "yes")}} + +{{if $notPython}} + {{skip "{{.project_name}}/src/{{.project_name}}"}} + {{skip "{{.project_name}}/tests/main_test.py"}} +{{end}} + +{{if $notDLT}} + {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.py"}} +{{end}} + +{{if $notNotebook}} + {{skip "{{.project_name}}/src/notebook.ipynb"}} +{{end}} + +{{if (and $notDLT $notNotebook $notPython)}} + {{skip "{{.project_name}}/resources/{{.project_name}}_job.py"}} +{{else}} + {{skip "{{.project_name}}/resources/.gitkeep"}} +{{end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl new file mode 100644 index 000000000..497ce3723 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl @@ -0,0 +1,60 @@ +# {{.project_name}} + +The '{{.project_name}}' project was generated by using the "Jobs as code" template. + +## Prerequisites + +1. Install Databricks CLI 0.238 or later. + See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). + +2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). + We use uv to create a virtual environment and install the required dependencies. + +3. Authenticate to your Databricks workspace if you have not done so already: + ``` + $ databricks configure + ``` + +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. + {{- if (eq .include_python "yes") }} Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + {{- end}} + +5. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. + +## Deploy and run jobs + +1. Create a new virtual environment and install the required dependencies: + ``` + $ uv sync + ``` + +2. To deploy the bundle to the development target: + ``` + $ databricks bundle deploy --target dev + ``` + + *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] {{.project_name}}_job` to your workspace. + You can find that job by opening your workspace and clicking on **Workflows**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/{{.project_name}}_job.py). The schedule + is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). + +4. To run a job: + ``` + $ databricks bundle run + ``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 000000000..758ec3f16 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,51 @@ +# This is a Databricks asset bundle definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: {{.project_name}} + uuid: {{bundle_uuid}} + databricks_cli_version: ">= 0.238.0" + +experimental: + python: + # Activate virtual environment before loading resources defined in Python. + # If disabled, defaults to using the Python interpreter available in the current shell. + venv_path: .venv + # Functions called to load resources defined in Python. See resources/__init__.py + resources: + - "resources:load_resources" + +{{ if .include_python -}} +artifacts: + default: + type: whl + path: . + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build + +{{ end -}} +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: {{workspace_host}} + + prod: + mode: production + workspace: + host: {{workspace_host}} + # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE + run_as: + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl new file mode 100644 index 000000000..ee9570302 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl @@ -0,0 +1,27 @@ +# Fixtures +{{- /* +We don't want to have too many README.md files, since they +stand out so much. But we do need to have a file here to make +sure the folder is added to Git. +*/}} + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl new file mode 100644 index 000000000..cee0d8946 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl @@ -0,0 +1,57 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "{{.project_name}}" +requires-python = ">=3.10" +description = "wheel file based on {{.project_name}}" + +# Dependencies in case the output wheel file is used as a library dependency. +# For defining dependencies, when this package is used in Databricks, see: +# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +# +# Example: +# dependencies = [ +# "requests==x.y.z", +# ] +dependencies = [ +] + +# see setup.py +dynamic = ["version"] + +{{ if .include_python -}} +[project.entry-points.packages] +main = "{{.project_name}}.main:main" + +{{ end -}} + +[tool.setuptools] +{{ if .include_python -}} +py-modules = ["resources", "{{.project_name}}"] + +{{ else }} +py-modules = ["resources"] + +{{ end -}} +[tool.uv] +## Dependencies for local development +dev-dependencies = [ + "databricks-bundles==0.7.0", + + ## Add code completion support for DLT + # "databricks-dlt", + + ## databricks-connect can be used to run parts of this project locally. + ## See https://docs.databricks.com/dev-tools/databricks-connect.html. + ## + ## Uncomment line below to install a version of db-connect that corresponds to + ## the Databricks Runtime version used for this project. + # "databricks-connect{{template "latest_lts_db_connect_version_spec"}}", +] + +override-dependencies = [ + # pyspark package conflicts with 'databricks-connect' + "pyspark; sys_platform == 'never'", +] diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py new file mode 100644 index 000000000..fbcb9dc5f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import ( + Bundle, + Resources, + load_resources_from_current_package_module, +) + + +def load_resources(bundle: Bundle) -> Resources: + """ + 'load_resources' function is referenced in databricks.yml and is responsible for loading + bundle resources defined in Python code. This function is called by Databricks CLI during + bundle deployment. After deployment, this function is not used. + """ + + # the default implementation loads all Python files in 'resources' directory + return load_resources_from_current_package_module() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl new file mode 100644 index 000000000..7c7a0d33f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl @@ -0,0 +1,108 @@ +{{$include_dlt := "no" -}} +from databricks.bundles.jobs import Job + +""" +The main job for {{.project_name}}. + +{{- /* Clarify what this job is for for DLT-only users. */}} +{{if and (eq $include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} +This job runs {{.project_name}}_pipeline on a schedule. +{{end -}} +""" + + +{{.project_name}}_job = Job.from_dict( + { + "name": "{{.project_name}}_job", + "trigger": { + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + "periodic": { + "interval": 1, + "unit": "DAYS", + }, + }, + {{- if not is_service_principal}} + "email_notifications": { + "on_failure": [ + "{{user_name}}", + ], + }, + {{else}} + {{- end -}} + "tasks": [ + {{- if eq .include_notebook "yes" -}} + {{- "\n " -}} + { + "task_key": "notebook_task", + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "src/notebook.ipynb", + }, + }, + {{- end -}} + {{- if (eq $include_dlt "yes") -}} + {{- "\n " -}} + { + "task_key": "refresh_pipeline", + {{- if (eq .include_notebook "yes" )}} + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + {{- end}} + "pipeline_task": { + {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}} + "pipeline_id": "${resources.pipelines.{{.project_name}}_pipeline.id}", + }, + }, + {{- end -}} + {{- if (eq .include_python "yes") -}} + {{- "\n " -}} + { + "task_key": "main_task", + {{- if (eq $include_dlt "yes") }} + "depends_on": [ + { + "task_key": "refresh_pipeline", + }, + ], + {{- else if (eq .include_notebook "yes" )}} + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + {{- end}} + "job_cluster_key": "job_cluster", + "python_wheel_task": { + "package_name": "{{.project_name}}", + "entry_point": "main", + }, + "libraries": [ + # By default we just include the .whl file generated for the {{.project_name}} package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + { + "whl": "dist/*.whl", + }, + ], + }, + {{- end -}} + {{""}} + ], + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "spark_version": "{{template "latest_lts_dbr_version"}}", + "node_type_id": "{{smallest_node_type}}", + "autoscale": { + "min_workers": 1, + "max_workers": 4, + }, + }, + }, + ], + } +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl new file mode 100644 index 000000000..c8579ae65 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl @@ -0,0 +1,24 @@ +from databricks.bundles.pipelines import Pipeline + +{{.project_name}}_pipeline = Pipeline.from_dict( + { + "name": "{{.project_name}}_pipeline", + "target": "{{.project_name}}_${bundle.target}", + {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + "catalog": "catalog_name", + {{- else}} + "catalog": "{{default_catalog}}", + {{- end}} + "libraries": [ + { + "notebook": { + "path": "src/dlt_pipeline.ipynb", + }, + }, + ], + "configuration": { + "bundle.sourcePath": "${workspace.file_path}/src", + }, + } +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl new file mode 100644 index 000000000..19c9d0ebe --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl @@ -0,0 +1,18 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the {{.project_name}} project. +""" + +import os + +from setuptools import setup + +local_version = os.getenv("LOCAL_VERSION") +version = "0.0.1" + +setup( + version=f"{version}+{local_version}" if local_version else version, +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl new file mode 100644 index 000000000..629106dbf --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "# Import DLT and src/{{.project_name}}\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from {{.project_name}} import main" + {{else}} + "import dlt\n", + "from pyspark.sql.functions import expr\n", + "from pyspark.sql import SparkSession\n", + "\n", + "spark = SparkSession.builder.getOrCreate()" + {{end -}} + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + {{else}} + "@dlt.view\n", + "def taxi_raw():\n", + " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", + {{end -}} + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl new file mode 100644 index 000000000..6782a053b --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -0,0 +1,79 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "ee353e42-ff58-4955-9608-12865bd0950e", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "from {{.project_name}} import main\n", + "\n", + "main.get_taxis(spark).show(10)" + {{else}} + "spark.range(10)" + {{end -}} + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl new file mode 100644 index 000000000..e69de29bb diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl new file mode 100644 index 000000000..6f89fca53 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl @@ -0,0 +1,8 @@ +from {{.project_name}}.main import get_taxis, get_spark + +# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/libs/template/writer.go b/libs/template/writer.go new file mode 100644 index 000000000..e3d5af583 --- /dev/null +++ b/libs/template/writer.go @@ -0,0 +1,171 @@ +package template + +import ( + "context" + "errors" + "fmt" + "io/fs" + "path/filepath" + "strings" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/filer" +) + +const ( + libraryDirName = "library" + templateDirName = "template" + schemaFileName = "databricks_template_schema.json" +) + +type Writer interface { + // Configure the writer with: + // 1. The path to the config file (if any) that contains input values for the + // template. + // 2. The output directory where the template will be materialized. + Configure(ctx context.Context, configPath, outputDir string) error + + // Materialize the template to the local file system. + Materialize(ctx context.Context, r Reader) error +} + +type defaultWriter struct { + configPath string + outputFiler filer.Filer + + // Internal state + config *config + renderer *renderer +} + +func constructOutputFiler(ctx context.Context, outputDir string) (filer.Filer, error) { + outputDir, err := filepath.Abs(outputDir) + if err != nil { + return nil, err + } + + // If the CLI is running on DBR and we're writing to the workspace file system, + // use the extension-aware workspace filesystem filer to instantiate the template. + // + // It is not possible to write notebooks through the workspace filesystem's FUSE mount. + // Therefore this is the only way we can initialize templates that contain notebooks + // when running the CLI on DBR and initializing a template to the workspace. + // + if strings.HasPrefix(outputDir, "/Workspace/") && dbr.RunsOnRuntime(ctx) { + return filer.NewWorkspaceFilesExtensionsClient(root.WorkspaceClient(ctx), outputDir) + } + + return filer.NewLocalClient(outputDir) +} + +func (tmpl *defaultWriter) Configure(ctx context.Context, configPath, outputDir string) error { + tmpl.configPath = configPath + + outputFiler, err := constructOutputFiler(ctx, outputDir) + if err != nil { + return err + } + + tmpl.outputFiler = outputFiler + return nil +} + +func (tmpl *defaultWriter) promptForInput(ctx context.Context, reader Reader) error { + readerFs, err := reader.FS(ctx) + if err != nil { + return err + } + if _, err := fs.Stat(readerFs, schemaFileName); errors.Is(err, fs.ErrNotExist) { + return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName) + } + + tmpl.config, err = newConfig(ctx, readerFs, schemaFileName) + if err != nil { + return err + } + + // Read and assign config values from file + if tmpl.configPath != "" { + err = tmpl.config.assignValuesFromFile(tmpl.configPath) + if err != nil { + return err + } + } + + helpers := loadHelpers(ctx) + tmpl.renderer, err = newRenderer(ctx, tmpl.config.values, helpers, readerFs, templateDirName, libraryDirName) + if err != nil { + return err + } + + // Print welcome message + welcome := tmpl.config.schema.WelcomeMessage + if welcome != "" { + welcome, err = tmpl.renderer.executeTemplate(welcome) + if err != nil { + return err + } + cmdio.LogString(ctx, welcome) + } + + // Prompt user for any missing config values. Assign default values if + // terminal is not TTY + err = tmpl.config.promptOrAssignDefaultValues(tmpl.renderer) + if err != nil { + return err + } + return tmpl.config.validate() +} + +func (tmpl *defaultWriter) printSuccessMessage(ctx context.Context) error { + success := tmpl.config.schema.SuccessMessage + if success == "" { + cmdio.LogString(ctx, "✨ Successfully initialized template") + return nil + } + + success, err := tmpl.renderer.executeTemplate(success) + if err != nil { + return err + } + cmdio.LogString(ctx, success) + return nil +} + +func (tmpl *defaultWriter) Materialize(ctx context.Context, reader Reader) error { + err := tmpl.promptForInput(ctx, reader) + if err != nil { + return err + } + + // Walk the template file tree and compute in-memory representations of the + // output files. + err = tmpl.renderer.walk() + if err != nil { + return err + } + + // Flush the output files to disk. + err = tmpl.renderer.persistToDisk(ctx, tmpl.outputFiler) + if err != nil { + return err + } + + return tmpl.printSuccessMessage(ctx) +} + +func (tmpl *defaultWriter) LogTelemetry(ctx context.Context) error { + // TODO, only log the template name and uuid. + return nil +} + +type writerWithFullTelemetry struct { + defaultWriter +} + +func (tmpl *writerWithFullTelemetry) LogTelemetry(ctx context.Context) error { + // TODO, log template name, uuid and enum args as well. + return nil +} diff --git a/libs/template/writer_test.go b/libs/template/writer_test.go new file mode 100644 index 000000000..9d57966ee --- /dev/null +++ b/libs/template/writer_test.go @@ -0,0 +1,58 @@ +package template + +import ( + "context" + "runtime" + "testing" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go" + workspaceConfig "github.com/databricks/databricks-sdk-go/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDefaultWriterConfigure(t *testing.T) { + // Test on local file system. + w := &defaultWriter{} + err := w.Configure(context.Background(), "/foo/bar", "/out/abc") + assert.NoError(t, err) + + assert.Equal(t, "/foo/bar", w.configPath) + assert.IsType(t, &filer.LocalClient{}, w.outputFiler) +} + +func TestDefaultWriterConfigureOnDBR(t *testing.T) { + // This test is not valid on windows because a DBR image is always based on + // Linux. + if runtime.GOOS == "windows" { + t.Skip("Skipping test on Windows") + } + + ctx := dbr.MockRuntime(context.Background(), true) + ctx = root.SetWorkspaceClient(ctx, &databricks.WorkspaceClient{ + Config: &workspaceConfig.Config{Host: "https://myhost.com"}, + }) + w := &defaultWriter{} + err := w.Configure(ctx, "/foo/bar", "/Workspace/out/abc") + assert.NoError(t, err) + + assert.Equal(t, "/foo/bar", w.configPath) + assert.IsType(t, &filer.WorkspaceFilesExtensionsClient{}, w.outputFiler) +} + +func TestMaterializeForNonTemplateDirectory(t *testing.T) { + tmpDir1 := t.TempDir() + tmpDir2 := t.TempDir() + ctx := context.Background() + + w := &defaultWriter{} + err := w.Configure(ctx, "/foo/bar", tmpDir1) + require.NoError(t, err) + + // Try to materialize a non-template directory. + err = w.Materialize(ctx, &localReader{path: tmpDir2}) + assert.EqualError(t, err, "not a bundle template: expected to find a template schema file at databricks_template_schema.json") +} diff --git a/libs/testdiff/context.go b/libs/testdiff/context.go new file mode 100644 index 000000000..7b6f5ff88 --- /dev/null +++ b/libs/testdiff/context.go @@ -0,0 +1,34 @@ +package testdiff + +import ( + "context" +) + +type key int + +const ( + replacementsMapKey = key(1) +) + +func WithReplacementsMap(ctx context.Context) (context.Context, *ReplacementsContext) { + value := ctx.Value(replacementsMapKey) + if value != nil { + if existingMap, ok := value.(*ReplacementsContext); ok { + return ctx, existingMap + } + } + + newMap := &ReplacementsContext{} + ctx = context.WithValue(ctx, replacementsMapKey, newMap) + return ctx, newMap +} + +func GetReplacementsMap(ctx context.Context) *ReplacementsContext { + value := ctx.Value(replacementsMapKey) + if value != nil { + if existingMap, ok := value.(*ReplacementsContext); ok { + return existingMap + } + } + return nil +} diff --git a/libs/testdiff/context_test.go b/libs/testdiff/context_test.go new file mode 100644 index 000000000..5a0191009 --- /dev/null +++ b/libs/testdiff/context_test.go @@ -0,0 +1,30 @@ +package testdiff + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetReplacementsMap_Nil(t *testing.T) { + ctx := context.Background() + repls := GetReplacementsMap(ctx) + assert.Nil(t, repls) +} + +func TestGetReplacementsMap_NotNil(t *testing.T) { + ctx := context.Background() + ctx, _ = WithReplacementsMap(ctx) + repls := GetReplacementsMap(ctx) + assert.NotNil(t, repls) +} + +func TestWithReplacementsMap_UseExisting(t *testing.T) { + ctx := context.Background() + ctx, r1 := WithReplacementsMap(ctx) + ctx, r2 := WithReplacementsMap(ctx) + repls := GetReplacementsMap(ctx) + assert.Equal(t, r1, repls) + assert.Equal(t, r2, repls) +} diff --git a/libs/testdiff/golden.go b/libs/testdiff/golden.go new file mode 100644 index 000000000..c1c51b6c5 --- /dev/null +++ b/libs/testdiff/golden.go @@ -0,0 +1,81 @@ +package testdiff + +import ( + "context" + "flag" + "os" + "strings" + "testing" + + "github.com/databricks/cli/internal/testutil" + "github.com/stretchr/testify/assert" +) + +var OverwriteMode = false + +func init() { + flag.BoolVar(&OverwriteMode, "update", false, "Overwrite golden files") +} + +func ReadFile(t testutil.TestingT, ctx context.Context, filename string) string { + t.Helper() + data, err := os.ReadFile(filename) + if os.IsNotExist(err) { + return "" + } + assert.NoError(t, err, "Failed to read %s", filename) + // On CI, on Windows \n in the file somehow end up as \r\n + return NormalizeNewlines(string(data)) +} + +func WriteFile(t testutil.TestingT, filename, data string) { + t.Helper() + t.Logf("Overwriting %s", filename) + err := os.WriteFile(filename, []byte(data), 0o644) + assert.NoError(t, err, "Failed to write %s", filename) +} + +func AssertOutput(t testutil.TestingT, ctx context.Context, out, outTitle, expectedPath string) { + t.Helper() + expected := ReadFile(t, ctx, expectedPath) + + out = ReplaceOutput(t, ctx, out) + + if out != expected { + AssertEqualTexts(t, expectedPath, outTitle, expected, out) + + if OverwriteMode { + WriteFile(t, expectedPath, out) + } + } +} + +func AssertOutputJQ(t testutil.TestingT, ctx context.Context, out, outTitle, expectedPath string, ignorePaths []string) { + t.Helper() + expected := ReadFile(t, ctx, expectedPath) + + out = ReplaceOutput(t, ctx, out) + + if out != expected { + AssertEqualJQ(t.(*testing.T), expectedPath, outTitle, expected, out, ignorePaths) + + if OverwriteMode { + WriteFile(t, expectedPath, out) + } + } +} + +func ReplaceOutput(t testutil.TestingT, ctx context.Context, out string) string { + t.Helper() + out = NormalizeNewlines(out) + replacements := GetReplacementsMap(ctx) + if replacements == nil { + t.Fatal("WithReplacementsMap was not called") + } + return replacements.Replace(out) +} + +func NormalizeNewlines(input string) string { + output := strings.ReplaceAll(input, "\r\n", "\n") + return strings.ReplaceAll(output, "\r", "\n") +} diff --git a/libs/testdiff/replacement.go b/libs/testdiff/replacement.go new file mode 100644 index 000000000..865192662 --- /dev/null +++ b/libs/testdiff/replacement.go @@ -0,0 +1,209 @@ +package testdiff + +import ( + "encoding/json" + "fmt" + "path/filepath" + "regexp" + "runtime" + "slices" + "strings" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/iamutil" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/iam" +) + +const ( + testerName = "$USERNAME" +) + +var ( + uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`) + numIdRegex = regexp.MustCompile(`[0-9]{3,}`) + privatePathRegex = regexp.MustCompile(`(/tmp|/private)(/.*)/([a-zA-Z0-9]+)`) +) + +type Replacement struct { + Old *regexp.Regexp + New string +} + +type ReplacementsContext struct { + Repls []Replacement +} + +func (r *ReplacementsContext) Clone() ReplacementsContext { + return ReplacementsContext{Repls: slices.Clone(r.Repls)} +} + +func (r *ReplacementsContext) Replace(s string) string { + // QQQ Should probably only replace whole words + for _, repl := range r.Repls { + s = repl.Old.ReplaceAllString(s, repl.New) + } + return s +} + +func (r *ReplacementsContext) append(pattern *regexp.Regexp, replacement string) { + r.Repls = append(r.Repls, Replacement{ + Old: pattern, + New: replacement, + }) +} + +func (r *ReplacementsContext) appendLiteral(old, new string) { + r.append( + // Transform the input strings such that they can be used as literal strings in regular expressions. + regexp.MustCompile(regexp.QuoteMeta(old)), + // Transform the replacement string such that `$` is interpreted as a literal dollar sign. + // For more information about how the replacement string is used, see [regexp.Regexp.Expand]. + strings.ReplaceAll(new, `$`, `$$`), + ) +} + +func (r *ReplacementsContext) Set(old, new string) { + if old == "" || new == "" { + return + } + + // Always include both verbatim and json version of replacement. + // This helps when the string in question contains \ or other chars that need to be quoted. + // In that case we cannot rely that json(old) == '"{old}"' and need to add it explicitly. + + encodedNew, err := json.Marshal(new) + if err == nil { + encodedOld, err := json.Marshal(old) + if err == nil { + r.appendLiteral(trimQuotes(string(encodedOld)), trimQuotes(string(encodedNew))) + } + } + + r.appendLiteral(old, new) +} + +func trimQuotes(s string) string { + if len(s) > 0 && s[0] == '"' { + s = s[1:] + } + if len(s) > 0 && s[len(s)-1] == '"' { + s = s[:len(s)-1] + } + return s +} + +func (r *ReplacementsContext) SetPath(old, new string) { + if old != "" && old != "." { + // Converts C:\Users\DENIS~1.BIL -> C:\Users\denis.bilenko + oldEvalled, err1 := filepath.EvalSymlinks(old) + if err1 == nil && oldEvalled != old { + r.SetPathNoEval(oldEvalled, new) + } + } + + r.SetPathNoEval(old, new) +} + +func (r *ReplacementsContext) SetPathNoEval(old, new string) { + r.Set(old, new) + + if runtime.GOOS != "windows" { + return + } + + // Support both forward and backward slashes + m1 := strings.ReplaceAll(old, "\\", "/") + if m1 != old { + r.Set(m1, new) + } + + m2 := strings.ReplaceAll(old, "/", "\\") + if m2 != old && m2 != m1 { + r.Set(m2, new) + } +} + +func (r *ReplacementsContext) SetPathWithParents(old, new string) { + r.SetPath(old, new) + r.SetPath(filepath.Dir(old), new+"_PARENT") + r.SetPath(filepath.Dir(filepath.Dir(old)), new+"_GPARENT") +} + +func PrepareReplacementsWorkspaceClient(t testutil.TestingT, r *ReplacementsContext, w *databricks.WorkspaceClient) { + t.Helper() + // in some clouds (gcp) w.Config.Host includes "https://" prefix in others it's really just a host (azure) + host := strings.TrimPrefix(strings.TrimPrefix(w.Config.Host, "http://"), "https://") + r.Set("https://"+host, "$DATABRICKS_URL") + r.Set("http://"+host, "$DATABRICKS_URL") + r.Set(host, "$DATABRICKS_HOST") + r.Set(w.Config.ClusterID, "$DATABRICKS_CLUSTER_ID") + r.Set(w.Config.WarehouseID, "$DATABRICKS_WAREHOUSE_ID") + r.Set(w.Config.ServerlessComputeID, "$DATABRICKS_SERVERLESS_COMPUTE_ID") + r.Set(w.Config.MetadataServiceURL, "$DATABRICKS_METADATA_SERVICE_URL") + r.Set(w.Config.AccountID, "$DATABRICKS_ACCOUNT_ID") + r.Set(w.Config.Token, "$DATABRICKS_TOKEN") + r.Set(w.Config.Username, "$DATABRICKS_USERNAME") + r.Set(w.Config.Password, "$DATABRICKS_PASSWORD") + r.SetPath(w.Config.Profile, "$DATABRICKS_CONFIG_PROFILE") + r.Set(w.Config.ConfigFile, "$DATABRICKS_CONFIG_FILE") + r.Set(w.Config.GoogleServiceAccount, "$DATABRICKS_GOOGLE_SERVICE_ACCOUNT") + r.Set(w.Config.GoogleCredentials, "$GOOGLE_CREDENTIALS") + r.Set(w.Config.AzureResourceID, "$DATABRICKS_AZURE_RESOURCE_ID") + r.Set(w.Config.AzureClientSecret, "$ARM_CLIENT_SECRET") + // r.Set(w.Config.AzureClientID, "$ARM_CLIENT_ID") + r.Set(w.Config.AzureClientID, testerName) + r.Set(w.Config.AzureTenantID, "$ARM_TENANT_ID") + r.Set(w.Config.ActionsIDTokenRequestURL, "$ACTIONS_ID_TOKEN_REQUEST_URL") + r.Set(w.Config.ActionsIDTokenRequestToken, "$ACTIONS_ID_TOKEN_REQUEST_TOKEN") + r.Set(w.Config.AzureEnvironment, "$ARM_ENVIRONMENT") + r.Set(w.Config.ClientID, "$DATABRICKS_CLIENT_ID") + r.Set(w.Config.ClientSecret, "$DATABRICKS_CLIENT_SECRET") + r.SetPath(w.Config.DatabricksCliPath, "$DATABRICKS_CLI_PATH") + // This is set to words like "path" that happen too frequently + // r.Set(w.Config.AuthType, "$DATABRICKS_AUTH_TYPE") +} + +func PrepareReplacementsUser(t testutil.TestingT, r *ReplacementsContext, u iam.User) { + t.Helper() + // There could be exact matches or overlap between different name fields, so sort them by length + // to ensure we match the largest one first and map them all to the same token + + r.Set(u.UserName, testerName) + r.Set(u.DisplayName, testerName) + if u.Name != nil { + r.Set(u.Name.FamilyName, testerName) + r.Set(u.Name.GivenName, testerName) + } + + for _, val := range u.Emails { + r.Set(val.Value, testerName) + } + + r.Set(iamutil.GetShortUserName(&u), testerName) + + for ind, val := range u.Groups { + r.Set(val.Value, fmt.Sprintf("$USER.Groups[%d]", ind)) + } + + r.Set(u.Id, "$USER.Id") + + for ind, val := range u.Roles { + r.Set(val.Value, fmt.Sprintf("$USER.Roles[%d]", ind)) + } +} + +func PrepareReplacementsUUID(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(uuidRegex, "") +} + +func PrepareReplacementsNumber(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(numIdRegex, "") +} + +func PrepareReplacementsTemporaryDirectory(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(privatePathRegex, "/tmp/.../$3") +} diff --git a/libs/testdiff/replacement_test.go b/libs/testdiff/replacement_test.go new file mode 100644 index 000000000..de247c03e --- /dev/null +++ b/libs/testdiff/replacement_test.go @@ -0,0 +1,46 @@ +package testdiff + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestReplacement_Literal(t *testing.T) { + var repls ReplacementsContext + + repls.Set(`foobar`, `[replacement]`) + assert.Equal(t, `[replacement]`, repls.Replace(`foobar`)) +} + +func TestReplacement_Encoded(t *testing.T) { + var repls ReplacementsContext + + repls.Set(`foo"bar`, `[replacement]`) + assert.Equal(t, `"[replacement]"`, repls.Replace(`"foo\"bar"`)) +} + +func TestReplacement_UUID(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsUUID(t, &repls) + + assert.Equal(t, "", repls.Replace("123e4567-e89b-12d3-a456-426614174000")) +} + +func TestReplacement_Number(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsNumber(t, &repls) + + assert.Equal(t, "12", repls.Replace("12")) + assert.Equal(t, "", repls.Replace("123")) +} + +func TestReplacement_TemporaryDirectory(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsTemporaryDirectory(t, &repls) + + assert.Equal(t, "/tmp/.../tail", repls.Replace("/tmp/foo/bar/qux/tail")) +} diff --git a/libs/testdiff/testdiff.go b/libs/testdiff/testdiff.go index 1e1df727a..f65adf7f7 100644 --- a/libs/testdiff/testdiff.go +++ b/libs/testdiff/testdiff.go @@ -17,18 +17,24 @@ func UnifiedDiff(filename1, filename2, s1, s2 string) string { return fmt.Sprint(gotextdiff.ToUnified(filename1, filename2, s1, edits)) } -func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out string) { +func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out string) bool { + t.Helper() if len(out) < 1000 && len(expected) < 1000 { // This shows full strings + diff which could be useful when debugging newlines - assert.Equal(t, expected, out) + return assert.Equal(t, expected, out, "%s vs %s", filename1, filename2) } else { // only show diff for large texts diff := UnifiedDiff(filename1, filename2, expected, out) - t.Errorf("Diff:\n" + diff) + if diff != "" { + t.Error("Diff:\n" + diff) + return false + } } + return true } func AssertEqualJQ(t testutil.TestingT, expectedName, outName, expected, out string, ignorePaths []string) { + t.Helper() patch, err := jsondiff.CompareJSON([]byte(expected), []byte(out)) if err != nil { t.Logf("CompareJSON error for %s vs %s: %s (fallback to textual comparison)", outName, expectedName, err) diff --git a/libs/vfs/filer_test.go b/libs/vfs/filer_test.go index ee1397521..6987c288e 100644 --- a/libs/vfs/filer_test.go +++ b/libs/vfs/filer_test.go @@ -2,7 +2,6 @@ package vfs import ( "context" - "errors" "io/fs" "os" "path/filepath" @@ -42,7 +41,7 @@ func TestFilerPath(t *testing.T) { // Open non-existent file. _, err = p.Open("doesntexist_test.go") - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorIs(t, err, fs.ErrNotExist) // Stat self. s, err = p.Stat("filer_test.go") @@ -52,7 +51,7 @@ func TestFilerPath(t *testing.T) { // Stat non-existent file. _, err = p.Stat("doesntexist_test.go") - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorIs(t, err, fs.ErrNotExist) // ReadDir self. entries, err := p.ReadDir(".") @@ -61,7 +60,7 @@ func TestFilerPath(t *testing.T) { // ReadDir non-existent directory. _, err = p.ReadDir("doesntexist") - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorIs(t, err, fs.ErrNotExist) // ReadFile self. buf, err = p.ReadFile("filer_test.go") @@ -70,7 +69,7 @@ func TestFilerPath(t *testing.T) { // ReadFile non-existent file. _, err = p.ReadFile("doesntexist_test.go") - assert.True(t, errors.Is(err, fs.ErrNotExist)) + assert.ErrorIs(t, err, fs.ErrNotExist) // Parent self. pp := p.Parent()