diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index 8622b29ca..dfe78790a 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -a6a317df8327c9b1e5cb59a03a42ffa2aabeef6d \ No newline at end of file +779817ed8d63031f5ea761fbd25ee84f38feec0d \ No newline at end of file diff --git a/.codegen/service.go.tmpl b/.codegen/service.go.tmpl index ee2c7b0fd..0c9fa089a 100644 --- a/.codegen/service.go.tmpl +++ b/.codegen/service.go.tmpl @@ -140,9 +140,9 @@ func new{{.PascalName}}() *cobra.Command { {{- end}} {{$method := .}} {{ if not .IsJsonOnly }} - {{range $request.Fields -}} + {{range .AllFields -}} {{- if not .Required -}} - {{if .Entity.IsObject }}// TODO: complex arg: {{.Name}} + {{if .Entity.IsObject}}{{if not (eq . $method.RequestBodyField) }}// TODO: complex arg: {{.Name}}{{end}} {{else if .Entity.IsAny }}// TODO: any: {{.Name}} {{else if .Entity.ArrayValue }}// TODO: array: {{.Name}} {{else if .Entity.MapValue }}// TODO: map via StringToStringVar: {{.Name}} diff --git a/.github/workflows/close-stale-issues.yml b/.github/workflows/close-stale-issues.yml index 7bf754319..ea9558caf 100644 --- a/.github/workflows/close-stale-issues.yml +++ b/.github/workflows/close-stale-issues.yml @@ -18,7 +18,7 @@ jobs: pull-requests: write steps: - - uses: actions/stale@v9 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0 with: stale-issue-message: This issue has not received a response in a while. If you want to keep this issue open, please leave a comment below and auto-close will be canceled. stale-pr-message: This PR has not received an update in a while. If you want to keep this PR open, please leave a comment below or push a new commit and auto-close will be canceled. diff --git a/.github/workflows/external-message.yml b/.github/workflows/external-message.yml index f06d81a47..108ca9162 100644 --- a/.github/workflows/external-message.yml +++ b/.github/workflows/external-message.yml @@ -25,7 +25,7 @@ jobs: if: "${{ github.event.pull_request.head.repo.fork }}" steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Delete old comments env: diff --git a/.github/workflows/integration-main.yml b/.github/workflows/integration-main.yml index 0b6032d50..84dd7263a 100644 --- a/.github/workflows/integration-main.yml +++ b/.github/workflows/integration-main.yml @@ -20,7 +20,7 @@ jobs: steps: - name: Generate GitHub App Token id: generate-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@c1a285145b9d317df6ced56c09f525b5c2b6f755 # v1.11.1 with: app-id: ${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }} private-key: ${{ secrets.DECO_WORKFLOW_TRIGGER_PRIVATE_KEY }} diff --git a/.github/workflows/integration-pr.yml b/.github/workflows/integration-pr.yml index 0f9c4797a..7a62113cd 100644 --- a/.github/workflows/integration-pr.yml +++ b/.github/workflows/integration-pr.yml @@ -23,7 +23,7 @@ jobs: steps: - name: Generate GitHub App Token id: generate-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@c1a285145b9d317df6ced56c09f525b5c2b6f755 # v1.11.1 with: app-id: ${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }} private-key: ${{ secrets.DECO_WORKFLOW_TRIGGER_PRIVATE_KEY }} diff --git a/.github/workflows/publish-winget.yml b/.github/workflows/publish-winget.yml index 267077102..cbd24856b 100644 --- a/.github/workflows/publish-winget.yml +++ b/.github/workflows/publish-winget.yml @@ -10,19 +10,65 @@ on: jobs: publish-to-winget-pkgs: runs-on: - group: databricks-protected-runner-group - labels: windows-server-latest + group: databricks-deco-testing-runner-group + labels: ubuntu-latest-deco environment: release steps: - - uses: vedantmgoyal2009/winget-releaser@93fd8b606a1672ec3e5c6c3bb19426be68d1a8b0 # https://github.com/vedantmgoyal2009/winget-releaser/releases/tag/v2 - with: - identifier: Databricks.DatabricksCLI - installers-regex: 'windows_.*-signed\.zip$' # Only signed Windows releases - token: ${{ secrets.ENG_DEV_ECOSYSTEM_BOT_TOKEN }} - fork-user: eng-dev-ecosystem-bot + - name: Checkout repository and submodules + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - # Use the tag from the input, or the ref name if the input is not provided. - # The ref name is equal to the tag name when this workflow is triggered by the "sign-cli" command. - release-tag: ${{ inputs.tag || github.ref_name }} + # When updating the version of komac, make sure to update the checksum in the next step. + # Find both at https://github.com/russellbanks/Komac/releases. + - name: Download komac binary + run: | + curl -s -L -o $RUNNER_TEMP/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz https://github.com/russellbanks/Komac/releases/download/v2.9.0/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz + + - name: Verify komac binary + run: | + echo "d07a12831ad5418fee715488542a98ce3c0e591d05c850dd149fe78432be8c4c $RUNNER_TEMP/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz" | sha256sum -c - + + - name: Untar komac binary to temporary path + run: | + mkdir -p $RUNNER_TEMP/komac + tar -xzf $RUNNER_TEMP/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz -C $RUNNER_TEMP/komac + + - name: Add komac to PATH + run: echo "$RUNNER_TEMP/komac" >> $GITHUB_PATH + + - name: Confirm komac version + run: komac --version + + # Use the tag from the input, or the ref name if the input is not provided. + # The ref name is equal to the tag name when this workflow is triggered by the "sign-cli" command. + - name: Strip "v" prefix from version + id: strip_version + run: echo "version=$(echo ${{ inputs.tag || github.ref_name }} | sed 's/^v//')" >> "$GITHUB_OUTPUT" + + - name: Get URLs of signed Windows binaries + id: get_windows_urls + run: | + urls=$( + gh api https://api.github.com/repos/databricks/cli/releases/tags/${{ inputs.tag || github.ref_name }} | \ + jq -r .assets[].browser_download_url | \ + grep -E '_windows_.*-signed\.zip$' | \ + tr '\n' ' ' + ) + if [ -z "$urls" ]; then + echo "No signed Windows binaries found" >&2 + exit 1 + fi + echo "urls=$urls" >> "$GITHUB_OUTPUT" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Publish to Winget + run: | + komac update Databricks.DatabricksCLI \ + --version ${{ steps.strip_version.outputs.version }} \ + --submit \ + --urls ${{ steps.get_windows_urls.outputs.urls }} \ + env: + KOMAC_FORK_OWNER: eng-dev-ecosystem-bot + GITHUB_TOKEN: ${{ secrets.ENG_DEV_ECOSYSTEM_BOT_TOKEN }} diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index ddb2fb002..2a8a68862 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -45,20 +45,20 @@ jobs: steps: - name: Checkout repository and submodules - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.4 - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: python-version: '3.9' - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@887a942a15af3a7626099df99e897a18d9e5ab3a # v5.1.0 - name: Set go env run: | @@ -71,16 +71,16 @@ jobs: make vendor pip3 install wheel - - name: Run tests - run: make test + - name: Run tests with coverage + run: make cover - golangci: + linters: needs: cleanups name: lint runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.4 # Use different schema from regular job, to avoid overwriting the same key @@ -95,10 +95,15 @@ jobs: # Exit with status code 1 if there are differences (i.e. unformatted files) git diff --exit-code - name: golangci-lint - uses: golangci/golangci-lint-action@v6 + uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 with: version: v1.63.4 args: --timeout=15m + - name: Run ruff + uses: astral-sh/ruff-action@31a518504640beb4897d0b9f9e50a2a9196e75ba # v3.0.1 + with: + version: "0.9.1" + args: "format --check" validate-bundle-schema: needs: cleanups @@ -106,10 +111,10 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.4 # Use different schema from regular job, to avoid overwriting the same key diff --git a/.github/workflows/release-snapshot.yml b/.github/workflows/release-snapshot.yml index 5c56a294e..548d93e90 100644 --- a/.github/workflows/release-snapshot.yml +++ b/.github/workflows/release-snapshot.yml @@ -26,13 +26,13 @@ jobs: steps: - name: Checkout repository and submodules - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 fetch-tags: true - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.4 @@ -48,27 +48,27 @@ jobs: - name: Run GoReleaser id: releaser - uses: goreleaser/goreleaser-action@v6 + uses: goreleaser/goreleaser-action@9ed2f89a662bf1735a48bc8557fd212fa902bebf # v6.1.0 with: version: ~> v2 args: release --snapshot --skip docker - name: Upload macOS binaries - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: cli_darwin_snapshot path: | dist/*_darwin_*/ - name: Upload Linux binaries - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: cli_linux_snapshot path: | dist/*_linux_*/ - name: Upload Windows binaries - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0 with: name: cli_windows_snapshot path: | @@ -88,7 +88,7 @@ jobs: # Snapshot release may only be updated for commits to the main branch. if: github.ref == 'refs/heads/main' - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # v1 with: name: Snapshot prerelease: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 061688506..5d5811b19 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -18,13 +18,13 @@ jobs: steps: - name: Checkout repository and submodules - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 fetch-tags: true - name: Setup Go - uses: actions/setup-go@v5 + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.4 @@ -37,7 +37,7 @@ jobs: # Log into the GitHub Container Registry. The goreleaser action will create # the docker images and push them to the GitHub Container Registry. - - uses: "docker/login-action@v3" + - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 with: registry: "ghcr.io" username: "${{ github.actor }}" @@ -46,11 +46,11 @@ jobs: # QEMU is required to build cross platform docker images using buildx. # It allows virtualization of the CPU architecture at the application level. - name: Set up QEMU dependency - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@53851d14592bedcffcf25ea515637cff71ef929a # v3.3.0 - name: Run GoReleaser id: releaser - uses: goreleaser/goreleaser-action@v6 + uses: goreleaser/goreleaser-action@9ed2f89a662bf1735a48bc8557fd212fa902bebf # v6.1.0 with: version: ~> v2 args: release @@ -71,7 +71,7 @@ jobs: echo "VERSION=${VERSION:1}" >> $GITHUB_ENV - name: Update setup-cli - uses: actions/github-script@v7 + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 with: github-token: ${{ secrets.DECO_GITHUB_TOKEN }} script: | @@ -99,7 +99,7 @@ jobs: echo "VERSION=${VERSION:1}" >> $GITHUB_ENV - name: Update homebrew-tap - uses: actions/github-script@v7 + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 with: github-token: ${{ secrets.DECO_GITHUB_TOKEN }} script: | @@ -140,7 +140,7 @@ jobs: echo "VERSION=${VERSION:1}" >> $GITHUB_ENV - name: Update CLI version in the VSCode extension - uses: actions/github-script@v7 + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 with: github-token: ${{ secrets.DECO_GITHUB_TOKEN }} script: | diff --git a/.gitignore b/.gitignore index edd1409ae..2060b6bac 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ dist/ *.log coverage.txt +coverage-acceptance.txt __pycache__ *.pyc @@ -31,3 +32,4 @@ __pycache__ .vscode/tasks.json .databricks +.ruff_cache diff --git a/.golangci.yaml b/.golangci.yaml index 07a6afdc5..8a83135ee 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -15,12 +15,20 @@ linters: - intrange - mirror - perfsprint + - unconvert linters-settings: govet: enable-all: true disable: - fieldalignment - shadow + settings: + printf: + funcs: + - (github.com/databricks/cli/internal/testutil.TestingT).Infof + - (github.com/databricks/cli/internal/testutil.TestingT).Errorf + - (github.com/databricks/cli/internal/testutil.TestingT).Fatalf + - (github.com/databricks/cli/internal/testutil.TestingT).Skipf gofmt: rewrite-rules: - pattern: 'a[b:len(a)]' @@ -41,6 +49,8 @@ linters-settings: disable: # good check, but we have too many assert.(No)?Errorf? so excluding for now - require-error + copyloopvar: + check-alias: true issues: exclude-dirs-use-default: false # recommended by docs https://golangci-lint.run/usage/false-positives/ max-issues-per-linter: 1000 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b59fa540..255bfb0a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,69 @@ # Version changelog +## [Release] Release v0.239.1 + +CLI: + * Added text output templates for apps list and list-deployments ([#2175](https://github.com/databricks/cli/pull/2175)). + * Fix duplicate "apps" entry in help output ([#2191](https://github.com/databricks/cli/pull/2191)). + +Bundles: + * Allow yaml-anchors in schema ([#2200](https://github.com/databricks/cli/pull/2200)). + * Show an error when non-yaml files used in include section ([#2201](https://github.com/databricks/cli/pull/2201)). + * Set WorktreeRoot to sync root outside git repo ([#2197](https://github.com/databricks/cli/pull/2197)). + * fix: Detailed message for using source-linked deployment with file_path specified ([#2119](https://github.com/databricks/cli/pull/2119)). + * Allow using variables in enum fields ([#2199](https://github.com/databricks/cli/pull/2199)). + * Add experimental-jobs-as-code template ([#2177](https://github.com/databricks/cli/pull/2177)). + * Reading variables from file ([#2171](https://github.com/databricks/cli/pull/2171)). + * Fixed an apps message order and added output test ([#2174](https://github.com/databricks/cli/pull/2174)). + * Default to forward slash-separated paths for path translation ([#2145](https://github.com/databricks/cli/pull/2145)). + * Include a materialized copy of built-in templates ([#2146](https://github.com/databricks/cli/pull/2146)). + + + +## [Release] Release v0.239.0 + +### New feature announcement + +#### Databricks Apps support + +You can now manage Databricks Apps using DABs by defining an `app` resource in your bundle configuration. +For more information see Databricks documentation https://docs.databricks.com/en/dev-tools/bundles/resources.html#app + +#### Referencing complex variables in complex variables + +You can now reference complex variables within other complex variables. +For more details see https://github.com/databricks/cli/pull/2157 + +CLI: + * Filter out system clusters in cluster picker ([#2131](https://github.com/databricks/cli/pull/2131)). + * Add command line flags for fields that are not in the API request body ([#2155](https://github.com/databricks/cli/pull/2155)). + +Bundles: + * Added support for Databricks Apps in DABs ([#1928](https://github.com/databricks/cli/pull/1928)). + * Allow artifact path to be located outside the sync root ([#2128](https://github.com/databricks/cli/pull/2128)). + * Retry app deployment if there is an active deployment in progress ([#2153](https://github.com/databricks/cli/pull/2153)). + * Resolve variables in a loop ([#2164](https://github.com/databricks/cli/pull/2164)). + * Improve resolution of complex variables within complex variables ([#2157](https://github.com/databricks/cli/pull/2157)). + * Added output message to warn about slower deployments with apps ([#2161](https://github.com/databricks/cli/pull/2161)). + * Patch references to UC schemas to capture dependencies automatically ([#1989](https://github.com/databricks/cli/pull/1989)). + * Format default-python template ([#2110](https://github.com/databricks/cli/pull/2110)). + * Encourage the use of root_path in production to ensure single deployment ([#1712](https://github.com/databricks/cli/pull/1712)). + * Log warnings to stderr for "bundle validate -o json" ([#2109](https://github.com/databricks/cli/pull/2109)). + +API Changes: + * Changed `databricks account federation-policy update` command with new required argument order. + * Changed `databricks account service-principal-federation-policy update` command with new required argument order. + +OpenAPI commit 779817ed8d63031f5ea761fbd25ee84f38feec0d (2025-01-08) +Dependency updates: + * Upgrade TF provider to 1.63.0 ([#2162](https://github.com/databricks/cli/pull/2162)). + * Bump golangci-lint version to v1.63.4 from v1.63.1 ([#2114](https://github.com/databricks/cli/pull/2114)). + * Bump astral-sh/setup-uv from 4 to 5 ([#2116](https://github.com/databricks/cli/pull/2116)). + * Bump golang.org/x/oauth2 from 0.24.0 to 0.25.0 ([#2080](https://github.com/databricks/cli/pull/2080)). + * Bump github.com/hashicorp/hc-install from 0.9.0 to 0.9.1 ([#2079](https://github.com/databricks/cli/pull/2079)). + * Bump golang.org/x/term from 0.27.0 to 0.28.0 ([#2078](https://github.com/databricks/cli/pull/2078)). + * Bump github.com/databricks/databricks-sdk-go from 0.54.0 to 0.55.0 ([#2126](https://github.com/databricks/cli/pull/2126)). + ## [Release] Release v0.238.0 Bundles: diff --git a/Makefile b/Makefile index 2c84d88ba..00dadcb0c 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,10 @@ -default: build +default: vendor fmt lint PACKAGES=./acceptance/... ./libs/... ./internal/... ./cmd/... ./bundle/... . GOTESTSUM_FORMAT ?= pkgname-and-test-fails +GOTESTSUM_CMD ?= gotestsum --format ${GOTESTSUM_FORMAT} --no-summary=skipped + lint: golangci-lint run --fix @@ -14,17 +16,26 @@ lintcheck: # formatting/goimports will not be applied by 'make lint'. However, it will be applied by 'make fmt'. # If you need to ensure that formatting & imports are always fixed, do "make fmt lint" fmt: + ruff format -q golangci-lint run --enable-only="gofmt,gofumpt,goimports" --fix ./... test: - gotestsum --format ${GOTESTSUM_FORMAT} --no-summary=skipped -- ${PACKAGES} + ${GOTESTSUM_CMD} -- ${PACKAGES} cover: - gotestsum --format ${GOTESTSUM_FORMAT} --no-summary=skipped -- -coverprofile=coverage.txt ${PACKAGES} + rm -fr ./acceptance/build/cover/ + CLI_GOCOVERDIR=build/cover ${GOTESTSUM_CMD} -- -coverprofile=coverage.txt ${PACKAGES} + rm -fr ./acceptance/build/cover-merged/ + mkdir -p acceptance/build/cover-merged/ + go tool covdata merge -i $$(printf '%s,' acceptance/build/cover/* | sed 's/,$$//') -o acceptance/build/cover-merged/ + go tool covdata textfmt -i acceptance/build/cover-merged -o coverage-acceptance.txt showcover: go tool cover -html=coverage.txt +acc-showcover: + go tool cover -html=coverage-acceptance.txt + build: vendor go build -mod vendor @@ -33,7 +44,7 @@ snapshot: vendor: go mod vendor - + schema: go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json @@ -45,4 +56,4 @@ integration: integration-short: $(INTEGRATION) -short -.PHONY: lint lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short +.PHONY: lint lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short acc-cover acc-showcover diff --git a/NOTICE b/NOTICE index f6b59e0b0..ed22084cf 100644 --- a/NOTICE +++ b/NOTICE @@ -105,3 +105,7 @@ License - https://github.com/wI2L/jsondiff/blob/master/LICENSE https://github.com/hexops/gotextdiff Copyright (c) 2009 The Go Authors. All rights reserved. License - https://github.com/hexops/gotextdiff/blob/main/LICENSE + +https://github.com/BurntSushi/toml +Copyright (c) 2013 TOML authors +https://github.com/BurntSushi/toml/blob/master/COPYING diff --git a/acceptance/README.md b/acceptance/README.md index 42a37d253..75ac1d5fc 100644 --- a/acceptance/README.md +++ b/acceptance/README.md @@ -17,3 +17,5 @@ For more complex tests one can also use: - `errcode` helper: if the command fails with non-zero code, it appends `Exit code: N` to the output but returns success to caller (bash), allowing continuation of script. - `trace` helper: prints the arguments before executing the command. - custom output files: redirect output to custom file (it must start with `out`), e.g. `$CLI bundle validate > out.txt 2> out.error.txt`. + +See [selftest](./selftest) for a toy test. diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index b9fb219dc..e48bd9908 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -1,7 +1,9 @@ package acceptance_test import ( + "context" "errors" + "flag" "fmt" "io" "os" @@ -17,10 +19,26 @@ import ( "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/testdiff" + "github.com/databricks/databricks-sdk-go" "github.com/stretchr/testify/require" ) -var KeepTmp = os.Getenv("KEEP_TMP") != "" +var KeepTmp bool + +// In order to debug CLI running under acceptance test, set this to full subtest name, e.g. "bundle/variables/empty" +// Then install your breakpoints and click "debug test" near TestAccept in VSCODE. +// example: var SingleTest = "bundle/variables/empty" +var SingleTest = "" + +// If enabled, instead of compiling and running CLI externally, we'll start in-process server that accepts and runs +// CLI commands. The $CLI in test scripts is a helper that just forwards command-line arguments to this server (see bin/callserver.py). +// Also disables parallelism in tests. +var InprocessMode bool + +func init() { + flag.BoolVar(&InprocessMode, "inprocess", SingleTest != "", "Run CLI in the same process as test (for debugging)") + flag.BoolVar(&KeepTmp, "keeptmp", false, "Do not delete TMP directory after run") +} const ( EntryPointScript = "script" @@ -35,37 +53,100 @@ var Scripts = map[string]bool{ } func TestAccept(t *testing.T) { + testAccept(t, InprocessMode, SingleTest) +} + +func TestInprocessMode(t *testing.T) { + if InprocessMode { + t.Skip("Already tested by TestAccept") + } + require.Equal(t, 1, testAccept(t, true, "selftest")) +} + +func testAccept(t *testing.T, InprocessMode bool, singleTest string) int { + repls := testdiff.ReplacementsContext{} cwd, err := os.Getwd() require.NoError(t, err) - execPath := BuildCLI(t, cwd) - // $CLI is what test scripts are using + coverDir := os.Getenv("CLI_GOCOVERDIR") + + if coverDir != "" { + require.NoError(t, os.MkdirAll(coverDir, os.ModePerm)) + coverDir, err = filepath.Abs(coverDir) + require.NoError(t, err) + t.Logf("Writing coverage to %s", coverDir) + } + + execPath := "" + + if InprocessMode { + cmdServer := StartCmdServer(t) + t.Setenv("CMD_SERVER_URL", cmdServer.URL) + execPath = filepath.Join(cwd, "bin", "callserver.py") + } else { + execPath = BuildCLI(t, cwd, coverDir) + } + t.Setenv("CLI", execPath) + repls.SetPath(execPath, "$CLI") // Make helper scripts available t.Setenv("PATH", fmt.Sprintf("%s%c%s", filepath.Join(cwd, "bin"), os.PathListSeparator, os.Getenv("PATH"))) - server := StartServer(t) - AddHandlers(server) - // Redirect API access to local server: - t.Setenv("DATABRICKS_HOST", fmt.Sprintf("http://127.0.0.1:%d", server.Port)) - t.Setenv("DATABRICKS_TOKEN", "dapi1234") + tempHomeDir := t.TempDir() + repls.SetPath(tempHomeDir, "$TMPHOME") + t.Logf("$TMPHOME=%v", tempHomeDir) - homeDir := t.TempDir() - // Do not read user's ~/.databrickscfg - t.Setenv(env.HomeEnvVar(), homeDir) + // Prevent CLI from downloading terraform in each test: + t.Setenv("DATABRICKS_TF_EXEC_PATH", tempHomeDir) - repls := testdiff.ReplacementsContext{} - repls.Set(execPath, "$CLI") + ctx := context.Background() + cloudEnv := os.Getenv("CLOUD_ENV") + + if cloudEnv == "" { + server := StartServer(t) + AddHandlers(server) + // Redirect API access to local server: + t.Setenv("DATABRICKS_HOST", server.URL) + t.Setenv("DATABRICKS_TOKEN", "dapi1234") + + homeDir := t.TempDir() + // Do not read user's ~/.databrickscfg + t.Setenv(env.HomeEnvVar(), homeDir) + } + + workspaceClient, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + user, err := workspaceClient.CurrentUser.Me(ctx) + require.NoError(t, err) + require.NotNil(t, user) + testdiff.PrepareReplacementsUser(t, &repls, *user) + testdiff.PrepareReplacementsWorkspaceClient(t, &repls, workspaceClient) + testdiff.PrepareReplacementsUUID(t, &repls) testDirs := getTests(t) require.NotEmpty(t, testDirs) + + if singleTest != "" { + testDirs = slices.DeleteFunc(testDirs, func(n string) bool { + return n != singleTest + }) + require.NotEmpty(t, testDirs, "singleTest=%#v did not match any tests\n%#v", singleTest, testDirs) + } + for _, dir := range testDirs { - t.Run(dir, func(t *testing.T) { - t.Parallel() - runTest(t, dir, repls) + testName := strings.ReplaceAll(dir, "\\", "/") + t.Run(testName, func(t *testing.T) { + if !InprocessMode { + t.Parallel() + } + + runTest(t, dir, coverDir, repls.Clone()) }) } + + return len(testDirs) } func getTests(t *testing.T) []string { @@ -88,7 +169,14 @@ func getTests(t *testing.T) []string { return testDirs } -func runTest(t *testing.T, dir string, repls testdiff.ReplacementsContext) { +func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsContext) { + config, configPath := LoadConfig(t, dir) + + isEnabled, isPresent := config.GOOS[runtime.GOOS] + if isPresent && !isEnabled { + t.Skipf("Disabled via GOOS.%s setting in %s", runtime.GOOS, configPath) + } + var tmpDir string var err error if KeepTmp { @@ -101,6 +189,9 @@ func runTest(t *testing.T, dir string, repls testdiff.ReplacementsContext) { tmpDir = t.TempDir() } + repls.SetPathWithParents(tmpDir, "$TMPDIR") + repls.Repls = append(repls.Repls, config.Repls...) + scriptContents := readMergedScriptContents(t, dir) testutil.WriteFile(t, filepath.Join(tmpDir, EntryPointScript), scriptContents) @@ -111,70 +202,105 @@ func runTest(t *testing.T, dir string, repls testdiff.ReplacementsContext) { args := []string{"bash", "-euo", "pipefail", EntryPointScript} cmd := exec.Command(args[0], args[1:]...) + if coverDir != "" { + // Creating individual coverage directory for each test, because writing to the same one + // results in sporadic failures like this one (only if tests are running in parallel): + // +error: coverage meta-data emit failed: writing ... rename .../tmp.covmeta.b3f... .../covmeta.b3f2c...: no such file or directory + coverDir = filepath.Join(coverDir, strings.ReplaceAll(dir, string(os.PathSeparator), "--")) + err := os.MkdirAll(coverDir, os.ModePerm) + require.NoError(t, err) + cmd.Env = append(os.Environ(), "GOCOVERDIR="+coverDir) + } + + // Write combined output to a file + out, err := os.Create(filepath.Join(tmpDir, "output.txt")) + require.NoError(t, err) + cmd.Stdout = out + cmd.Stderr = out cmd.Dir = tmpDir - outB, err := cmd.CombinedOutput() + err = cmd.Run() - out := formatOutput(string(outB), err) - out = repls.Replace(out) - doComparison(t, filepath.Join(dir, "output.txt"), "script output", out) + // Include exit code in output (if non-zero) + formatOutput(out, err) + require.NoError(t, out.Close()) - for key := range outputs { - if key == "output.txt" { - // handled above - continue - } - pathNew := filepath.Join(tmpDir, key) - newValBytes, err := os.ReadFile(pathNew) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - t.Errorf("%s: expected to find this file but could not (%s)", key, tmpDir) - } else { - t.Errorf("%s: could not read: %s", key, err) - } - continue - } - pathExpected := filepath.Join(dir, key) - newVal := repls.Replace(string(newValBytes)) - doComparison(t, pathExpected, pathNew, newVal) + printedRepls := false + + // Compare expected outputs + for relPath := range outputs { + doComparison(t, repls, dir, tmpDir, relPath, &printedRepls) } // Make sure there are not unaccounted for new files - files, err := os.ReadDir(tmpDir) - require.NoError(t, err) - - for _, f := range files { - name := f.Name() - if _, ok := inputs[name]; ok { + files := ListDir(t, tmpDir) + for _, relPath := range files { + if _, ok := inputs[relPath]; ok { continue } - if _, ok := outputs[name]; ok { + if _, ok := outputs[relPath]; ok { continue } - t.Errorf("Unexpected output: %s", f) - if strings.HasPrefix(name, "out") { + if strings.HasPrefix(relPath, "out") { // We have a new file starting with "out" // Show the contents & support overwrite mode for it: - pathNew := filepath.Join(tmpDir, name) - newVal := testutil.ReadFile(t, pathNew) - newVal = repls.Replace(newVal) - doComparison(t, filepath.Join(dir, name), filepath.Join(tmpDir, name), newVal) + doComparison(t, repls, dir, tmpDir, relPath, &printedRepls) } } } -func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) { - valueNew = testdiff.NormalizeNewlines(valueNew) - valueExpected := string(readIfExists(t, pathExpected)) - valueExpected = testdiff.NormalizeNewlines(valueExpected) - testdiff.AssertEqualTexts(t, pathExpected, pathNew, valueExpected, valueNew) - if testdiff.OverwriteMode { - if valueNew != "" { - t.Logf("Overwriting: %s", pathExpected) - testutil.WriteFile(t, pathExpected, valueNew) - } else { - t.Logf("Removing: %s", pathExpected) - _ = os.Remove(pathExpected) +func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirNew, relPath string, printedRepls *bool) { + pathRef := filepath.Join(dirRef, relPath) + pathNew := filepath.Join(dirNew, relPath) + bufRef, okRef := readIfExists(t, pathRef) + bufNew, okNew := readIfExists(t, pathNew) + if !okRef && !okNew { + t.Errorf("Both files are missing: %s, %s", pathRef, pathNew) + return + } + + valueRef := testdiff.NormalizeNewlines(string(bufRef)) + valueNew := testdiff.NormalizeNewlines(string(bufNew)) + + // Apply replacements to the new value only. + // The reference value is stored after applying replacements. + valueNew = repls.Replace(valueNew) + + // The test did not produce an expected output file. + if okRef && !okNew { + t.Errorf("Missing output file: %s", relPath) + testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if testdiff.OverwriteMode { + t.Logf("Removing output file: %s", relPath) + require.NoError(t, os.Remove(pathRef)) } + return + } + + // The test produced an unexpected output file. + if !okRef && okNew { + t.Errorf("Unexpected output file: %s", relPath) + testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if testdiff.OverwriteMode { + t.Logf("Writing output file: %s", relPath) + testutil.WriteFile(t, pathRef, valueNew) + } + return + } + + // Compare the reference and new values. + equal := testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if !equal && testdiff.OverwriteMode { + t.Logf("Overwriting existing output file: %s", relPath) + testutil.WriteFile(t, pathRef, valueNew) + } + + if !equal && printedRepls != nil && !*printedRepls { + *printedRepls = true + var items []string + for _, item := range repls.Repls { + items = append(items, fmt.Sprintf("REPL %s => %s", item.Old, item.New)) + } + t.Log("Available replacements:\n" + strings.Join(items, "\n")) } } @@ -182,17 +308,22 @@ func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) { // Note, cleanups are not executed if main script fails; that's not a huge issue, since it runs it temp dir. func readMergedScriptContents(t *testing.T, dir string) string { scriptContents := testutil.ReadFile(t, filepath.Join(dir, EntryPointScript)) + + // Wrap script contents in a subshell such that changing the working + // directory only affects the main script and not cleanup. + scriptContents = "(\n" + scriptContents + ")\n" + prepares := []string{} cleanups := []string{} for { - x := readIfExists(t, filepath.Join(dir, CleanupScript)) - if len(x) > 0 { + x, ok := readIfExists(t, filepath.Join(dir, CleanupScript)) + if ok { cleanups = append(cleanups, string(x)) } - x = readIfExists(t, filepath.Join(dir, PrepareScript)) - if len(x) > 0 { + x, ok = readIfExists(t, filepath.Join(dir, PrepareScript)) + if ok { prepares = append(prepares, string(x)) } @@ -210,14 +341,30 @@ func readMergedScriptContents(t *testing.T, dir string) string { return strings.Join(prepares, "\n") } -func BuildCLI(t *testing.T, cwd string) string { +func BuildCLI(t *testing.T, cwd, coverDir string) string { execPath := filepath.Join(cwd, "build", "databricks") if runtime.GOOS == "windows" { execPath += ".exe" } start := time.Now() - args := []string{"go", "build", "-mod", "vendor", "-o", execPath} + args := []string{ + "go", "build", + "-mod", "vendor", + "-o", execPath, + } + + if coverDir != "" { + args = append(args, "-cover") + } + + if runtime.GOOS == "windows" { + // Get this error on my local Windows: + // error obtaining VCS status: exit status 128 + // Use -buildvcs=false to disable VCS stamping. + args = append(args, "-buildvcs=false") + } + cmd := exec.Command(args[0], args[1:]...) cmd.Dir = ".." out, err := cmd.CombinedOutput() @@ -252,29 +399,28 @@ func copyFile(src, dst string) error { return err } -func formatOutput(out string, err error) string { +func formatOutput(w io.Writer, err error) { if err == nil { - return out + return } if exiterr, ok := err.(*exec.ExitError); ok { exitCode := exiterr.ExitCode() - out += fmt.Sprintf("\nExit code: %d\n", exitCode) + fmt.Fprintf(w, "\nExit code: %d\n", exitCode) } else { - out += fmt.Sprintf("\nError: %s\n", err) + fmt.Fprintf(w, "\nError: %s\n", err) } - return out } -func readIfExists(t *testing.T, path string) []byte { +func readIfExists(t *testing.T, path string) ([]byte, bool) { data, err := os.ReadFile(path) if err == nil { - return data + return data, true } if !errors.Is(err, os.ErrNotExist) { t.Fatalf("%s: %s", path, err) } - return []byte{} + return []byte{}, false } func CopyDir(src, dst string, inputs, outputs map[string]bool) error { @@ -289,8 +435,10 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error { return err } - if strings.HasPrefix(name, "out") { - outputs[relPath] = true + if strings.HasPrefix(relPath, "out") { + if !info.IsDir() { + outputs[relPath] = true + } return nil } else { inputs[relPath] = true @@ -309,3 +457,32 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error { return copyFile(path, destPath) }) } + +func ListDir(t *testing.T, src string) []string { + var files []string + err := filepath.Walk(src, func(path string, info os.FileInfo, err error) error { + if err != nil { + // Do not FailNow here. + // The output comparison is happening after this call which includes output.txt which + // includes errors printed by commands which include explanation why a given file cannot be read. + t.Errorf("Error when listing %s: path=%s: %s", src, path, err) + return nil + } + + if info.IsDir() { + return nil + } + + relPath, err := filepath.Rel(src, path) + if err != nil { + return err + } + + files = append(files, relPath) + return nil + }) + if err != nil { + t.Errorf("Failed to list %s: %s", src, err) + } + return files +} diff --git a/acceptance/bin/callserver.py b/acceptance/bin/callserver.py new file mode 100755 index 000000000..294ef8fdb --- /dev/null +++ b/acceptance/bin/callserver.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +import sys +import os +import json +import urllib.request +from urllib.parse import urlencode + +env = {} +for key, value in os.environ.items(): + if len(value) > 10_000: + sys.stderr.write(f"Dropping key={key} value len={len(value)}\n") + continue + env[key] = value + +q = { + "args": " ".join(sys.argv[1:]), + "cwd": os.getcwd(), + "env": json.dumps(env), +} + +url = os.environ["CMD_SERVER_URL"] + "/?" + urlencode(q) +if len(url) > 100_000: + sys.exit("url too large") + +resp = urllib.request.urlopen(url) +assert resp.status == 200, (resp.status, resp.url, resp.headers) +result = json.load(resp) +sys.stderr.write(result["stderr"]) +sys.stdout.write(result["stdout"]) +exitcode = int(result["exitcode"]) +sys.exit(exitcode) diff --git a/acceptance/bin/sort_blocks.py b/acceptance/bin/sort_blocks.py index f50c6f50f..d558f252a 100755 --- a/acceptance/bin/sort_blocks.py +++ b/acceptance/bin/sort_blocks.py @@ -4,6 +4,7 @@ Helper to sort blocks in text file. A block is a set of lines separated from oth This is to workaround non-determinism in the output. """ + import sys blocks = [] @@ -11,10 +12,10 @@ blocks = [] for line in sys.stdin: if not line.strip(): if blocks and blocks[-1]: - blocks.append('') + blocks.append("") continue if not blocks: - blocks.append('') + blocks.append("") blocks[-1] += line blocks.sort() diff --git a/acceptance/bundle/git-permerror/databricks.yml b/acceptance/bundle/git-permerror/databricks.yml new file mode 100644 index 000000000..83e0acda8 --- /dev/null +++ b/acceptance/bundle/git-permerror/databricks.yml @@ -0,0 +1,2 @@ +bundle: + name: git-permerror diff --git a/acceptance/bundle/git-permerror/output.txt b/acceptance/bundle/git-permerror/output.txt new file mode 100644 index 000000000..2b52134ab --- /dev/null +++ b/acceptance/bundle/git-permerror/output.txt @@ -0,0 +1,78 @@ +=== No permission to access .git. Badness: inferred flag is set to true even though we did not infer branch. bundle_root_path is not correct in subdir case. + +>>> chmod 000 .git + +>>> $CLI bundle validate +Error: unable to load repository specific gitconfig: open config: permission denied + +Name: git-permerror +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/git-permerror/default + +Found 1 error + +Exit code: 1 + +>>> $CLI bundle validate -o json +Error: unable to load repository specific gitconfig: open config: permission denied + + +Exit code: 1 +{ + "bundle_root_path": ".", + "inferred": true +} + +>>> withdir subdir/a/b $CLI bundle validate -o json +Error: unable to load repository specific gitconfig: open config: permission denied + + +Exit code: 1 +{ + "bundle_root_path": ".", + "inferred": true +} + + +=== No permissions to read .git/HEAD. Badness: warning is not shown. inferred is incorrectly set to true. bundle_root_path is not correct in subdir case. + +>>> chmod 000 .git/HEAD + +>>> $CLI bundle validate -o json +{ + "bundle_root_path": ".", + "inferred": true +} + +>>> withdir subdir/a/b $CLI bundle validate -o json +{ + "bundle_root_path": ".", + "inferred": true +} + + +=== No permissions to read .git/config. Badness: inferred is incorretly set to true. bundle_root_path is not correct is subdir case. + +>>> chmod 000 .git/config + +>>> $CLI bundle validate -o json +Error: unable to load repository specific gitconfig: open config: permission denied + + +Exit code: 1 +{ + "bundle_root_path": ".", + "inferred": true +} + +>>> withdir subdir/a/b $CLI bundle validate -o json +Error: unable to load repository specific gitconfig: open config: permission denied + + +Exit code: 1 +{ + "bundle_root_path": ".", + "inferred": true +} diff --git a/acceptance/bundle/git-permerror/script b/acceptance/bundle/git-permerror/script new file mode 100644 index 000000000..782cbf5bc --- /dev/null +++ b/acceptance/bundle/git-permerror/script @@ -0,0 +1,25 @@ +mkdir myrepo +cd myrepo +cp ../databricks.yml . +git-repo-init +mkdir -p subdir/a/b + +printf "=== No permission to access .git. Badness: inferred flag is set to true even though we did not infer branch. bundle_root_path is not correct in subdir case.\n" +trace chmod 000 .git +errcode trace $CLI bundle validate +errcode trace $CLI bundle validate -o json | jq .bundle.git +errcode trace withdir subdir/a/b $CLI bundle validate -o json | jq .bundle.git + +printf "\n\n=== No permissions to read .git/HEAD. Badness: warning is not shown. inferred is incorrectly set to true. bundle_root_path is not correct in subdir case.\n" +chmod 700 .git +trace chmod 000 .git/HEAD +errcode trace $CLI bundle validate -o json | jq .bundle.git +errcode trace withdir subdir/a/b $CLI bundle validate -o json | jq .bundle.git + +printf "\n\n=== No permissions to read .git/config. Badness: inferred is incorretly set to true. bundle_root_path is not correct is subdir case.\n" +chmod 666 .git/HEAD +trace chmod 000 .git/config +errcode trace $CLI bundle validate -o json | jq .bundle.git +errcode trace withdir subdir/a/b $CLI bundle validate -o json | jq .bundle.git + +rm -fr .git diff --git a/acceptance/bundle/git-permerror/test.toml b/acceptance/bundle/git-permerror/test.toml new file mode 100644 index 000000000..3f96e551c --- /dev/null +++ b/acceptance/bundle/git-permerror/test.toml @@ -0,0 +1,5 @@ +Badness = "Warning logs not shown; inferred flag is set to true incorrect; bundle_root_path is not correct" + +[GOOS] +# This test relies on chmod which does not work on Windows +windows = false diff --git a/acceptance/bundle/help/bundle-deploy/output.txt b/acceptance/bundle/help/bundle-deploy/output.txt new file mode 100644 index 000000000..13c903f3e --- /dev/null +++ b/acceptance/bundle/help/bundle-deploy/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle deploy --help +Deploy bundle + +Usage: + databricks bundle deploy [flags] + +Flags: + --auto-approve Skip interactive approvals that might be required for deployment. + -c, --cluster-id string Override cluster in the deployment with the given cluster ID. + --fail-on-active-runs Fail if there are running jobs or pipelines in the deployment. + --force Force-override Git branch validation. + --force-lock Force acquisition of deployment lock. + -h, --help help for deploy + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-deploy/script b/acceptance/bundle/help/bundle-deploy/script new file mode 100644 index 000000000..6375cfea3 --- /dev/null +++ b/acceptance/bundle/help/bundle-deploy/script @@ -0,0 +1 @@ +trace $CLI bundle deploy --help diff --git a/acceptance/bundle/help/bundle-deployment/output.txt b/acceptance/bundle/help/bundle-deployment/output.txt new file mode 100644 index 000000000..ddf5b3305 --- /dev/null +++ b/acceptance/bundle/help/bundle-deployment/output.txt @@ -0,0 +1,22 @@ + +>>> $CLI bundle deployment --help +Deployment related commands + +Usage: + databricks bundle deployment [command] + +Available Commands: + bind Bind bundle-defined resources to existing resources + unbind Unbind bundle-defined resources from its managed remote resource + +Flags: + -h, --help help for deployment + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Use "databricks bundle deployment [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle-deployment/script b/acceptance/bundle/help/bundle-deployment/script new file mode 100644 index 000000000..ef93f7dc2 --- /dev/null +++ b/acceptance/bundle/help/bundle-deployment/script @@ -0,0 +1 @@ +trace $CLI bundle deployment --help diff --git a/acceptance/bundle/help/bundle-destroy/output.txt b/acceptance/bundle/help/bundle-destroy/output.txt new file mode 100644 index 000000000..d70164301 --- /dev/null +++ b/acceptance/bundle/help/bundle-destroy/output.txt @@ -0,0 +1,18 @@ + +>>> $CLI bundle destroy --help +Destroy deployed bundle resources + +Usage: + databricks bundle destroy [flags] + +Flags: + --auto-approve Skip interactive approvals for deleting resources and files + --force-lock Force acquisition of deployment lock. + -h, --help help for destroy + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-destroy/script b/acceptance/bundle/help/bundle-destroy/script new file mode 100644 index 000000000..955d7b7f9 --- /dev/null +++ b/acceptance/bundle/help/bundle-destroy/script @@ -0,0 +1 @@ +trace $CLI bundle destroy --help diff --git a/acceptance/bundle/help/bundle-generate-dashboard/output.txt b/acceptance/bundle/help/bundle-generate-dashboard/output.txt new file mode 100644 index 000000000..a63ce0ff8 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-dashboard/output.txt @@ -0,0 +1,24 @@ + +>>> $CLI bundle generate dashboard --help +Generate configuration for a dashboard + +Usage: + databricks bundle generate dashboard [flags] + +Flags: + -s, --dashboard-dir string directory to write the dashboard representation to (default "src") + --existing-id string ID of the dashboard to generate configuration for + --existing-path string workspace path of the dashboard to generate configuration for + -f, --force force overwrite existing files in the output directory + -h, --help help for dashboard + --resource string resource key of dashboard to watch for changes + -d, --resource-dir string directory to write the configuration to (default "resources") + --watch watch for changes to the dashboard and update the configuration + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-dashboard/script b/acceptance/bundle/help/bundle-generate-dashboard/script new file mode 100644 index 000000000..320156129 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-dashboard/script @@ -0,0 +1 @@ +trace $CLI bundle generate dashboard --help diff --git a/acceptance/bundle/help/bundle-generate-job/output.txt b/acceptance/bundle/help/bundle-generate-job/output.txt new file mode 100644 index 000000000..adc3f45ae --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-job/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle generate job --help +Generate bundle configuration for a job + +Usage: + databricks bundle generate job [flags] + +Flags: + -d, --config-dir string Dir path where the output config will be stored (default "resources") + --existing-job-id int Job ID of the job to generate config for + -f, --force Force overwrite existing files in the output directory + -h, --help help for job + -s, --source-dir string Dir path where the downloaded files will be stored (default "src") + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-job/script b/acceptance/bundle/help/bundle-generate-job/script new file mode 100644 index 000000000..109ed59aa --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-job/script @@ -0,0 +1 @@ +trace $CLI bundle generate job --help diff --git a/acceptance/bundle/help/bundle-generate-pipeline/output.txt b/acceptance/bundle/help/bundle-generate-pipeline/output.txt new file mode 100644 index 000000000..cf5f70920 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-pipeline/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle generate pipeline --help +Generate bundle configuration for a pipeline + +Usage: + databricks bundle generate pipeline [flags] + +Flags: + -d, --config-dir string Dir path where the output config will be stored (default "resources") + --existing-pipeline-id string ID of the pipeline to generate config for + -f, --force Force overwrite existing files in the output directory + -h, --help help for pipeline + -s, --source-dir string Dir path where the downloaded files will be stored (default "src") + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-pipeline/script b/acceptance/bundle/help/bundle-generate-pipeline/script new file mode 100644 index 000000000..c6af62d0a --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-pipeline/script @@ -0,0 +1 @@ +trace $CLI bundle generate pipeline --help diff --git a/acceptance/bundle/help/bundle-generate/output.txt b/acceptance/bundle/help/bundle-generate/output.txt new file mode 100644 index 000000000..1d77dfdbd --- /dev/null +++ b/acceptance/bundle/help/bundle-generate/output.txt @@ -0,0 +1,25 @@ + +>>> $CLI bundle generate --help +Generate bundle configuration + +Usage: + databricks bundle generate [command] + +Available Commands: + app Generate bundle configuration for a Databricks app + dashboard Generate configuration for a dashboard + job Generate bundle configuration for a job + pipeline Generate bundle configuration for a pipeline + +Flags: + -h, --help help for generate + --key string resource key to use for the generated configuration + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Use "databricks bundle generate [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle-generate/script b/acceptance/bundle/help/bundle-generate/script new file mode 100644 index 000000000..932588768 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate/script @@ -0,0 +1 @@ +trace $CLI bundle generate --help diff --git a/acceptance/bundle/help/bundle-init/output.txt b/acceptance/bundle/help/bundle-init/output.txt new file mode 100644 index 000000000..bafe5a187 --- /dev/null +++ b/acceptance/bundle/help/bundle-init/output.txt @@ -0,0 +1,31 @@ + +>>> $CLI bundle init --help +Initialize using a bundle template. + +TEMPLATE_PATH optionally specifies which template to use. It can be one of the following: +- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows +- default-sql: The default SQL template for .sql files that run with Databricks SQL +- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) +- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks) +- a local file system path with a template directory +- a Git repository URL, e.g. https://github.com/my/repository + +See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates. + +Usage: + databricks bundle init [TEMPLATE_PATH] [flags] + +Flags: + --branch string Git branch to use for template initialization + --config-file string JSON file containing key value pairs of input parameters required for template initialization. + -h, --help help for init + --output-dir string Directory to write the initialized template to. + --tag string Git tag to use for template initialization + --template-dir string Directory path within a Git repository containing the template. + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-init/script b/acceptance/bundle/help/bundle-init/script new file mode 100644 index 000000000..9bcce7559 --- /dev/null +++ b/acceptance/bundle/help/bundle-init/script @@ -0,0 +1 @@ +trace $CLI bundle init --help diff --git a/acceptance/bundle/help/bundle-open/output.txt b/acceptance/bundle/help/bundle-open/output.txt new file mode 100644 index 000000000..8b98aa850 --- /dev/null +++ b/acceptance/bundle/help/bundle-open/output.txt @@ -0,0 +1,17 @@ + +>>> $CLI bundle open --help +Open a resource in the browser + +Usage: + databricks bundle open [flags] + +Flags: + --force-pull Skip local cache and load the state from the remote workspace + -h, --help help for open + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-open/script b/acceptance/bundle/help/bundle-open/script new file mode 100644 index 000000000..b4dfa2222 --- /dev/null +++ b/acceptance/bundle/help/bundle-open/script @@ -0,0 +1 @@ +trace $CLI bundle open --help diff --git a/acceptance/bundle/help/bundle-run/output.txt b/acceptance/bundle/help/bundle-run/output.txt new file mode 100644 index 000000000..17763a295 --- /dev/null +++ b/acceptance/bundle/help/bundle-run/output.txt @@ -0,0 +1,57 @@ + +>>> $CLI bundle run --help +Run the job or pipeline identified by KEY. + +The KEY is the unique identifier of the resource to run. In addition to +customizing the run using any of the available flags, you can also specify +keyword or positional arguments as shown in these examples: + + databricks bundle run my_job -- --key1 value1 --key2 value2 + +Or: + + databricks bundle run my_job -- value1 value2 value3 + +If the specified job uses job parameters or the job has a notebook task with +parameters, the first example applies and flag names are mapped to the +parameter names. + +If the specified job does not use job parameters and the job has a Python file +task or a Python wheel task, the second example applies. + +Usage: + databricks bundle run [flags] KEY + +Job Flags: + --params stringToString comma separated k=v pairs for job parameters (default []) + +Job Task Flags: + Note: please prefer use of job-level parameters (--param) over task-level parameters. + For more information, see https://docs.databricks.com/en/workflows/jobs/create-run-jobs.html#pass-parameters-to-a-databricks-job-task + --dbt-commands strings A list of commands to execute for jobs with DBT tasks. + --jar-params strings A list of parameters for jobs with Spark JAR tasks. + --notebook-params stringToString A map from keys to values for jobs with notebook tasks. (default []) + --pipeline-params stringToString A map from keys to values for jobs with pipeline tasks. (default []) + --python-named-params stringToString A map from keys to values for jobs with Python wheel tasks. (default []) + --python-params strings A list of parameters for jobs with Python tasks. + --spark-submit-params strings A list of parameters for jobs with Spark submit tasks. + --sql-params stringToString A map from keys to values for jobs with SQL tasks. (default []) + +Pipeline Flags: + --full-refresh strings List of tables to reset and recompute. + --full-refresh-all Perform a full graph reset and recompute. + --refresh strings List of tables to update. + --refresh-all Perform a full graph update. + --validate-only Perform an update to validate graph correctness. + +Flags: + -h, --help help for run + --no-wait Don't wait for the run to complete. + --restart Restart the run if it is already running. + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-run/script b/acceptance/bundle/help/bundle-run/script new file mode 100644 index 000000000..edcf1786a --- /dev/null +++ b/acceptance/bundle/help/bundle-run/script @@ -0,0 +1 @@ +trace $CLI bundle run --help diff --git a/acceptance/bundle/help/bundle-schema/output.txt b/acceptance/bundle/help/bundle-schema/output.txt new file mode 100644 index 000000000..8f2983f5b --- /dev/null +++ b/acceptance/bundle/help/bundle-schema/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle schema --help +Generate JSON Schema for bundle configuration + +Usage: + databricks bundle schema [flags] + +Flags: + -h, --help help for schema + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-schema/script b/acceptance/bundle/help/bundle-schema/script new file mode 100644 index 000000000..a084fec8e --- /dev/null +++ b/acceptance/bundle/help/bundle-schema/script @@ -0,0 +1 @@ +trace $CLI bundle schema --help diff --git a/acceptance/bundle/help/bundle-summary/output.txt b/acceptance/bundle/help/bundle-summary/output.txt new file mode 100644 index 000000000..935c4bdc5 --- /dev/null +++ b/acceptance/bundle/help/bundle-summary/output.txt @@ -0,0 +1,17 @@ + +>>> $CLI bundle summary --help +Summarize resources deployed by this bundle + +Usage: + databricks bundle summary [flags] + +Flags: + --force-pull Skip local cache and load the state from the remote workspace + -h, --help help for summary + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-summary/script b/acceptance/bundle/help/bundle-summary/script new file mode 100644 index 000000000..967279d86 --- /dev/null +++ b/acceptance/bundle/help/bundle-summary/script @@ -0,0 +1 @@ +trace $CLI bundle summary --help diff --git a/acceptance/bundle/help/bundle-sync/output.txt b/acceptance/bundle/help/bundle-sync/output.txt new file mode 100644 index 000000000..6588e6978 --- /dev/null +++ b/acceptance/bundle/help/bundle-sync/output.txt @@ -0,0 +1,19 @@ + +>>> $CLI bundle sync --help +Synchronize bundle tree to the workspace + +Usage: + databricks bundle sync [flags] + +Flags: + --full perform full synchronization (default is incremental) + -h, --help help for sync + --interval duration file system polling interval (for --watch) (default 1s) + --output type type of the output format + --watch watch local file system for changes + +Global Flags: + --debug enable debug logging + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-sync/script b/acceptance/bundle/help/bundle-sync/script new file mode 100644 index 000000000..fe1d6c7e3 --- /dev/null +++ b/acceptance/bundle/help/bundle-sync/script @@ -0,0 +1 @@ +trace $CLI bundle sync --help diff --git a/acceptance/bundle/help/bundle-validate/output.txt b/acceptance/bundle/help/bundle-validate/output.txt new file mode 100644 index 000000000..a0c350faf --- /dev/null +++ b/acceptance/bundle/help/bundle-validate/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle validate --help +Validate configuration + +Usage: + databricks bundle validate [flags] + +Flags: + -h, --help help for validate + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-validate/script b/acceptance/bundle/help/bundle-validate/script new file mode 100644 index 000000000..8b8434b2d --- /dev/null +++ b/acceptance/bundle/help/bundle-validate/script @@ -0,0 +1 @@ +trace $CLI bundle validate --help diff --git a/acceptance/bundle/help/bundle/output.txt b/acceptance/bundle/help/bundle/output.txt new file mode 100644 index 000000000..e0e2ea47c --- /dev/null +++ b/acceptance/bundle/help/bundle/output.txt @@ -0,0 +1,33 @@ + +>>> $CLI bundle --help +Databricks Asset Bundles let you express data/AI/analytics projects as code. + +Online documentation: https://docs.databricks.com/en/dev-tools/bundles/index.html + +Usage: + databricks bundle [command] + +Available Commands: + deploy Deploy bundle + deployment Deployment related commands + destroy Destroy deployed bundle resources + generate Generate bundle configuration + init Initialize using a bundle template + open Open a resource in the browser + run Run a job or pipeline update + schema Generate JSON Schema for bundle configuration + summary Summarize resources deployed by this bundle + sync Synchronize bundle tree to the workspace + validate Validate configuration + +Flags: + -h, --help help for bundle + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + +Use "databricks bundle [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle/script b/acceptance/bundle/help/bundle/script new file mode 100644 index 000000000..eac116817 --- /dev/null +++ b/acceptance/bundle/help/bundle/script @@ -0,0 +1 @@ +trace $CLI bundle --help diff --git a/acceptance/bundle/includes/non_yaml_in_include/databricks.yml b/acceptance/bundle/includes/non_yaml_in_include/databricks.yml new file mode 100644 index 000000000..162bd6013 --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: non_yaml_in_includes + +include: + - test.py + - resources/*.yml diff --git a/acceptance/bundle/includes/non_yaml_in_include/output.txt b/acceptance/bundle/includes/non_yaml_in_include/output.txt new file mode 100644 index 000000000..6006ca14e --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/output.txt @@ -0,0 +1,10 @@ +Error: Files in the 'include' configuration section must be YAML files. + in databricks.yml:5:4 + +The file test.py in the 'include' configuration section is not a YAML file, and only YAML files are supported. To include files to sync, specify them in the 'sync.include' configuration section instead. + +Name: non_yaml_in_includes + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/includes/non_yaml_in_include/script b/acceptance/bundle/includes/non_yaml_in_include/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/includes/non_yaml_in_include/test.py b/acceptance/bundle/includes/non_yaml_in_include/test.py new file mode 100644 index 000000000..44159b395 --- /dev/null +++ b/acceptance/bundle/includes/non_yaml_in_include/test.py @@ -0,0 +1 @@ +print("Hello world") diff --git a/acceptance/bundle/override/job_cluster/output.txt b/acceptance/bundle/override/job_cluster/output.txt index 947d19032..ff6e8316e 100644 --- a/acceptance/bundle/override/job_cluster/output.txt +++ b/acceptance/bundle/override/job_cluster/output.txt @@ -4,7 +4,7 @@ "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/development/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", @@ -32,7 +32,7 @@ "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/override/job_cluster_var/databricks.yml b/acceptance/bundle/override/job_cluster_var/databricks.yml index 546cc2d8a..48e68c926 100644 --- a/acceptance/bundle/override/job_cluster_var/databricks.yml +++ b/acceptance/bundle/override/job_cluster_var/databricks.yml @@ -20,7 +20,6 @@ targets: jobs: foo: job_clusters: - # This does not work because merging is done before resolution - job_cluster_key: "${var.mykey}" new_cluster: node_type_id: i3.xlarge diff --git a/acceptance/bundle/override/job_cluster_var/output.txt b/acceptance/bundle/override/job_cluster_var/output.txt index dee2a3b5b..0b19e5eb2 100644 --- a/acceptance/bundle/override/job_cluster_var/output.txt +++ b/acceptance/bundle/override/job_cluster_var/output.txt @@ -4,22 +4,17 @@ "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/development/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", "job_clusters": [ - { - "job_cluster_key": "key", - "new_cluster": { - "spark_version": "13.3.x-scala2.12" - } - }, { "job_cluster_key": "key", "new_cluster": { "node_type_id": "i3.xlarge", - "num_workers": 1 + "num_workers": 1, + "spark_version": "13.3.x-scala2.12" } } ], @@ -36,8 +31,8 @@ Name: override_job_cluster Target: development Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/override_job_cluster/development Validation OK! @@ -46,22 +41,17 @@ Validation OK! "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", "job_clusters": [ - { - "job_cluster_key": "key", - "new_cluster": { - "spark_version": "13.3.x-scala2.12" - } - }, { "job_cluster_key": "key", "new_cluster": { "node_type_id": "i3.2xlarge", - "num_workers": 4 + "num_workers": 4, + "spark_version": "13.3.x-scala2.12" } } ], @@ -78,7 +68,7 @@ Validation OK! Name: override_job_cluster Target: staging Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging Validation OK! diff --git a/acceptance/bundle/override/job_tasks/output.txt b/acceptance/bundle/override/job_tasks/output.txt index 0bb0b1812..915351d4e 100644 --- a/acceptance/bundle/override/job_tasks/output.txt +++ b/acceptance/bundle/override/job_tasks/output.txt @@ -69,8 +69,8 @@ Error: file ./test1.py not found Name: override_job_tasks Target: staging Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_tasks/staging + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/override_job_tasks/staging Found 1 error diff --git a/acceptance/bundle/override/merge-string-map/output.txt b/acceptance/bundle/override/merge-string-map/output.txt index 986da8174..b566aa07f 100644 --- a/acceptance/bundle/override/merge-string-map/output.txt +++ b/acceptance/bundle/override/merge-string-map/output.txt @@ -21,7 +21,7 @@ Warning: expected map, found string Name: merge-string-map Target: dev Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/merge-string-map/dev + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/merge-string-map/dev Found 1 warning diff --git a/acceptance/bundle/override/pipeline_cluster/output.txt b/acceptance/bundle/override/pipeline_cluster/output.txt index 81bf58180..8babed0ec 100644 --- a/acceptance/bundle/override/pipeline_cluster/output.txt +++ b/acceptance/bundle/override/pipeline_cluster/output.txt @@ -14,7 +14,7 @@ ], "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_pipeline_cluster/development/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_pipeline_cluster/development/state/metadata.json" }, "name": "job", "permissions": [] @@ -36,7 +36,7 @@ ], "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_pipeline_cluster/staging/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_pipeline_cluster/staging/state/metadata.json" }, "name": "job", "permissions": [] diff --git a/bundle/tests/path_translation/fallback/README.md b/acceptance/bundle/paths/fallback/README.md similarity index 100% rename from bundle/tests/path_translation/fallback/README.md rename to acceptance/bundle/paths/fallback/README.md diff --git a/bundle/tests/path_translation/nominal/databricks.yml b/acceptance/bundle/paths/fallback/databricks.yml similarity index 80% rename from bundle/tests/path_translation/nominal/databricks.yml rename to acceptance/bundle/paths/fallback/databricks.yml index cd425920d..c6d0abe0a 100644 --- a/bundle/tests/path_translation/nominal/databricks.yml +++ b/acceptance/bundle/paths/fallback/databricks.yml @@ -1,5 +1,5 @@ bundle: - name: path_translation_nominal + name: fallback include: - "resources/*.yml" diff --git a/acceptance/bundle/paths/fallback/output.job.json b/acceptance/bundle/paths/fallback/output.job.json new file mode 100644 index 000000000..fe9e1cf3d --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.job.json @@ -0,0 +1,67 @@ +[ + { + "job_cluster_key": "default", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook" + }, + "task_key": "notebook_example" + }, + { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file.py" + }, + "task_key": "spark_python_example" + }, + { + "dbt_task": { + "commands": [ + "dbt run", + "dbt run" + ], + "project_directory": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/dbt_project" + }, + "job_cluster_key": "default", + "task_key": "dbt_example" + }, + { + "job_cluster_key": "default", + "sql_task": { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/sql.sql" + }, + "warehouse_id": "cafef00d" + }, + "task_key": "sql_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "whl": "dist/wheel1.whl" + }, + { + "whl": "dist/wheel2.whl" + } + ], + "python_wheel_task": { + "package_name": "my_package" + }, + "task_key": "python_wheel_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "jar": "target/jar1.jar" + }, + { + "jar": "target/jar2.jar" + } + ], + "spark_jar_task": { + "main_class_name": "com.example.Main" + }, + "task_key": "spark_jar_example" + } +] diff --git a/acceptance/bundle/paths/fallback/output.pipeline.json b/acceptance/bundle/paths/fallback/output.pipeline.json new file mode 100644 index 000000000..38521cb22 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.pipeline.json @@ -0,0 +1,22 @@ +[ + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file1.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook1" + } + }, + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file2.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook2" + } + } +] diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt new file mode 100644 index 000000000..63121f3d7 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle validate -t development -o json + +>>> $CLI bundle validate -t error +Error: notebook this value is overridden not found. Local notebook references are expected +to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] + +Name: fallback +Target: error +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/fallback/error + +Found 1 error + +Exit code: 1 diff --git a/bundle/tests/path_translation/fallback/override_job.yml b/acceptance/bundle/paths/fallback/override_job.yml similarity index 100% rename from bundle/tests/path_translation/fallback/override_job.yml rename to acceptance/bundle/paths/fallback/override_job.yml diff --git a/bundle/tests/path_translation/fallback/override_pipeline.yml b/acceptance/bundle/paths/fallback/override_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/fallback/override_pipeline.yml rename to acceptance/bundle/paths/fallback/override_pipeline.yml diff --git a/bundle/tests/path_translation/fallback/resources/my_job.yml b/acceptance/bundle/paths/fallback/resources/my_job.yml similarity index 71% rename from bundle/tests/path_translation/fallback/resources/my_job.yml rename to acceptance/bundle/paths/fallback/resources/my_job.yml index 4907df4f0..921ee412b 100644 --- a/bundle/tests/path_translation/fallback/resources/my_job.yml +++ b/acceptance/bundle/paths/fallback/resources/my_job.yml @@ -4,33 +4,45 @@ resources: name: "placeholder" tasks: - task_key: notebook_example + job_cluster_key: default notebook_task: notebook_path: "this value is overridden" - task_key: spark_python_example + job_cluster_key: default spark_python_task: python_file: "this value is overridden" - task_key: dbt_example + job_cluster_key: default dbt_task: project_directory: "this value is overridden" commands: - "dbt run" - task_key: sql_example + job_cluster_key: default sql_task: file: path: "this value is overridden" warehouse_id: cafef00d - task_key: python_wheel_example + job_cluster_key: default python_wheel_task: package_name: my_package libraries: - whl: ../dist/wheel1.whl - task_key: spark_jar_example + job_cluster_key: default spark_jar_task: main_class_name: com.example.Main libraries: - jar: ../target/jar1.jar + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/bundle/tests/path_translation/fallback/resources/my_pipeline.yml b/acceptance/bundle/paths/fallback/resources/my_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/fallback/resources/my_pipeline.yml rename to acceptance/bundle/paths/fallback/resources/my_pipeline.yml diff --git a/acceptance/bundle/paths/fallback/script b/acceptance/bundle/paths/fallback/script new file mode 100644 index 000000000..29aa420c5 --- /dev/null +++ b/acceptance/bundle/paths/fallback/script @@ -0,0 +1,10 @@ +errcode trace $CLI bundle validate -t development -o json > output.tmp.json + +# Capture job tasks +jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json + +# Capture pipeline libraries +jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json + +# Expect failure for the "error" target +errcode trace $CLI bundle validate -t error diff --git a/acceptance/bundle/paths/fallback/script.cleanup b/acceptance/bundle/paths/fallback/script.cleanup new file mode 100644 index 000000000..f93425dff --- /dev/null +++ b/acceptance/bundle/paths/fallback/script.cleanup @@ -0,0 +1 @@ +rm -f output.tmp.json diff --git a/bundle/tests/path_translation/fallback/src/dbt_project/.gitkeep b/acceptance/bundle/paths/fallback/src/dbt_project/.gitkeep similarity index 100% rename from bundle/tests/path_translation/fallback/src/dbt_project/.gitkeep rename to acceptance/bundle/paths/fallback/src/dbt_project/.gitkeep diff --git a/bundle/tests/path_translation/fallback/src/file.py b/acceptance/bundle/paths/fallback/src/file.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file.py rename to acceptance/bundle/paths/fallback/src/file.py diff --git a/bundle/tests/path_translation/fallback/src/file1.py b/acceptance/bundle/paths/fallback/src/file1.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file1.py rename to acceptance/bundle/paths/fallback/src/file1.py diff --git a/bundle/tests/path_translation/fallback/src/file2.py b/acceptance/bundle/paths/fallback/src/file2.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file2.py rename to acceptance/bundle/paths/fallback/src/file2.py diff --git a/bundle/tests/path_translation/fallback/src/notebook.py b/acceptance/bundle/paths/fallback/src/notebook.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook.py rename to acceptance/bundle/paths/fallback/src/notebook.py diff --git a/bundle/tests/path_translation/fallback/src/notebook1.py b/acceptance/bundle/paths/fallback/src/notebook1.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook1.py rename to acceptance/bundle/paths/fallback/src/notebook1.py diff --git a/bundle/tests/path_translation/fallback/src/notebook2.py b/acceptance/bundle/paths/fallback/src/notebook2.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook2.py rename to acceptance/bundle/paths/fallback/src/notebook2.py diff --git a/bundle/tests/path_translation/fallback/src/sql.sql b/acceptance/bundle/paths/fallback/src/sql.sql similarity index 100% rename from bundle/tests/path_translation/fallback/src/sql.sql rename to acceptance/bundle/paths/fallback/src/sql.sql diff --git a/bundle/tests/path_translation/nominal/README.md b/acceptance/bundle/paths/nominal/README.md similarity index 100% rename from bundle/tests/path_translation/nominal/README.md rename to acceptance/bundle/paths/nominal/README.md diff --git a/bundle/tests/path_translation/fallback/databricks.yml b/acceptance/bundle/paths/nominal/databricks.yml similarity index 79% rename from bundle/tests/path_translation/fallback/databricks.yml rename to acceptance/bundle/paths/nominal/databricks.yml index 92be3f921..5d3c22f91 100644 --- a/bundle/tests/path_translation/fallback/databricks.yml +++ b/acceptance/bundle/paths/nominal/databricks.yml @@ -1,5 +1,5 @@ bundle: - name: path_translation_fallback + name: nominal include: - "resources/*.yml" diff --git a/acceptance/bundle/paths/nominal/output.job.json b/acceptance/bundle/paths/nominal/output.job.json new file mode 100644 index 000000000..9e1cb4d90 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.job.json @@ -0,0 +1,89 @@ +[ + { + "job_cluster_key": "default", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook" + }, + "task_key": "notebook_example" + }, + { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file.py" + }, + "task_key": "spark_python_example" + }, + { + "dbt_task": { + "commands": [ + "dbt run", + "dbt run" + ], + "project_directory": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/dbt_project" + }, + "job_cluster_key": "default", + "task_key": "dbt_example" + }, + { + "job_cluster_key": "default", + "sql_task": { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/sql.sql" + }, + "warehouse_id": "cafef00d" + }, + "task_key": "sql_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "whl": "dist/wheel1.whl" + }, + { + "whl": "dist/wheel2.whl" + } + ], + "python_wheel_task": { + "package_name": "my_package" + }, + "task_key": "python_wheel_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "jar": "target/jar1.jar" + }, + { + "jar": "target/jar2.jar" + } + ], + "spark_jar_task": { + "main_class_name": "com.example.Main" + }, + "task_key": "spark_jar_example" + }, + { + "for_each_task": { + "task": { + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook" + } + } + }, + "job_cluster_key": "default", + "task_key": "for_each_notebook_example" + }, + { + "for_each_task": { + "task": { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file.py" + } + } + }, + "task_key": "for_each_spark_python_example" + } +] diff --git a/acceptance/bundle/paths/nominal/output.pipeline.json b/acceptance/bundle/paths/nominal/output.pipeline.json new file mode 100644 index 000000000..277b0c4a1 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.pipeline.json @@ -0,0 +1,22 @@ +[ + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file1.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook1" + } + }, + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file2.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook2" + } + } +] diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt new file mode 100644 index 000000000..1badcdec6 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle validate -t development -o json + +>>> $CLI bundle validate -t error +Error: notebook this value is overridden not found. Local notebook references are expected +to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] + +Name: nominal +Target: error +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/nominal/error + +Found 1 error + +Exit code: 1 diff --git a/bundle/tests/path_translation/nominal/override_job.yml b/acceptance/bundle/paths/nominal/override_job.yml similarity index 100% rename from bundle/tests/path_translation/nominal/override_job.yml rename to acceptance/bundle/paths/nominal/override_job.yml diff --git a/bundle/tests/path_translation/nominal/override_pipeline.yml b/acceptance/bundle/paths/nominal/override_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/nominal/override_pipeline.yml rename to acceptance/bundle/paths/nominal/override_pipeline.yml diff --git a/bundle/tests/path_translation/nominal/resources/my_job.yml b/acceptance/bundle/paths/nominal/resources/my_job.yml similarity index 74% rename from bundle/tests/path_translation/nominal/resources/my_job.yml rename to acceptance/bundle/paths/nominal/resources/my_job.yml index 2020c9dc8..13996a20c 100644 --- a/bundle/tests/path_translation/nominal/resources/my_job.yml +++ b/acceptance/bundle/paths/nominal/resources/my_job.yml @@ -4,38 +4,45 @@ resources: name: "placeholder" tasks: - task_key: notebook_example + job_cluster_key: default notebook_task: notebook_path: "this value is overridden" - task_key: spark_python_example + job_cluster_key: default spark_python_task: python_file: "this value is overridden" - task_key: dbt_example + job_cluster_key: default dbt_task: project_directory: "this value is overridden" commands: - "dbt run" - task_key: sql_example + job_cluster_key: default sql_task: file: path: "this value is overridden" warehouse_id: cafef00d - task_key: python_wheel_example + job_cluster_key: default python_wheel_task: package_name: my_package libraries: - whl: ../dist/wheel1.whl - task_key: spark_jar_example + job_cluster_key: default spark_jar_task: main_class_name: com.example.Main libraries: - jar: ../target/jar1.jar - task_key: for_each_notebook_example + job_cluster_key: default for_each_task: task: notebook_task: @@ -44,5 +51,12 @@ resources: - task_key: for_each_spark_python_example for_each_task: task: + job_cluster_key: default spark_python_task: python_file: "this value is overridden" + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/bundle/tests/path_translation/nominal/resources/my_pipeline.yml b/acceptance/bundle/paths/nominal/resources/my_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/nominal/resources/my_pipeline.yml rename to acceptance/bundle/paths/nominal/resources/my_pipeline.yml diff --git a/acceptance/bundle/paths/nominal/script b/acceptance/bundle/paths/nominal/script new file mode 100644 index 000000000..29aa420c5 --- /dev/null +++ b/acceptance/bundle/paths/nominal/script @@ -0,0 +1,10 @@ +errcode trace $CLI bundle validate -t development -o json > output.tmp.json + +# Capture job tasks +jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json + +# Capture pipeline libraries +jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json + +# Expect failure for the "error" target +errcode trace $CLI bundle validate -t error diff --git a/acceptance/bundle/paths/nominal/script.cleanup b/acceptance/bundle/paths/nominal/script.cleanup new file mode 100644 index 000000000..f93425dff --- /dev/null +++ b/acceptance/bundle/paths/nominal/script.cleanup @@ -0,0 +1 @@ +rm -f output.tmp.json diff --git a/bundle/tests/path_translation/nominal/src/dbt_project/.gitkeep b/acceptance/bundle/paths/nominal/src/dbt_project/.gitkeep similarity index 100% rename from bundle/tests/path_translation/nominal/src/dbt_project/.gitkeep rename to acceptance/bundle/paths/nominal/src/dbt_project/.gitkeep diff --git a/bundle/tests/path_translation/nominal/src/file.py b/acceptance/bundle/paths/nominal/src/file.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file.py rename to acceptance/bundle/paths/nominal/src/file.py diff --git a/bundle/tests/path_translation/nominal/src/file1.py b/acceptance/bundle/paths/nominal/src/file1.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file1.py rename to acceptance/bundle/paths/nominal/src/file1.py diff --git a/bundle/tests/path_translation/nominal/src/file2.py b/acceptance/bundle/paths/nominal/src/file2.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file2.py rename to acceptance/bundle/paths/nominal/src/file2.py diff --git a/bundle/tests/path_translation/nominal/src/notebook.py b/acceptance/bundle/paths/nominal/src/notebook.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook.py rename to acceptance/bundle/paths/nominal/src/notebook.py diff --git a/bundle/tests/path_translation/nominal/src/notebook1.py b/acceptance/bundle/paths/nominal/src/notebook1.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook1.py rename to acceptance/bundle/paths/nominal/src/notebook1.py diff --git a/bundle/tests/path_translation/nominal/src/notebook2.py b/acceptance/bundle/paths/nominal/src/notebook2.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook2.py rename to acceptance/bundle/paths/nominal/src/notebook2.py diff --git a/bundle/tests/path_translation/nominal/src/sql.sql b/acceptance/bundle/paths/nominal/src/sql.sql similarity index 100% rename from bundle/tests/path_translation/nominal/src/sql.sql rename to acceptance/bundle/paths/nominal/src/sql.sql diff --git a/bundle/tests/relative_path_translation/databricks.yml b/acceptance/bundle/paths/relative_path_translation/databricks.yml similarity index 100% rename from bundle/tests/relative_path_translation/databricks.yml rename to acceptance/bundle/paths/relative_path_translation/databricks.yml diff --git a/acceptance/bundle/paths/relative_path_translation/output.default.json b/acceptance/bundle/paths/relative_path_translation/output.default.json new file mode 100644 index 000000000..e2514b392 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.default.json @@ -0,0 +1,6 @@ +{ + "paths": [ + "/Workspace/remote/src/file1.py", + "/Workspace/remote/src/file1.py" + ] +} diff --git a/acceptance/bundle/paths/relative_path_translation/output.override.json b/acceptance/bundle/paths/relative_path_translation/output.override.json new file mode 100644 index 000000000..729d2eaa0 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.override.json @@ -0,0 +1,6 @@ +{ + "paths": [ + "/Workspace/remote/src/file2.py", + "/Workspace/remote/src/file2.py" + ] +} diff --git a/acceptance/bundle/paths/relative_path_translation/output.txt b/acceptance/bundle/paths/relative_path_translation/output.txt new file mode 100644 index 000000000..362f2ec7b --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.txt @@ -0,0 +1,4 @@ + +>>> $CLI bundle validate -t default -o json + +>>> $CLI bundle validate -t override -o json diff --git a/bundle/tests/relative_path_translation/resources/job.yml b/acceptance/bundle/paths/relative_path_translation/resources/job.yml similarity index 66% rename from bundle/tests/relative_path_translation/resources/job.yml rename to acceptance/bundle/paths/relative_path_translation/resources/job.yml index 93f121f25..9540ff1ad 100644 --- a/bundle/tests/relative_path_translation/resources/job.yml +++ b/acceptance/bundle/paths/relative_path_translation/resources/job.yml @@ -3,12 +3,20 @@ resources: job: tasks: - task_key: local + job_cluster_key: default spark_python_task: python_file: ../src/file1.py - task_key: variable_reference + job_cluster_key: default spark_python_task: # Note: this is a pure variable reference yet needs to persist the location # of the reference, not the location of the variable value. # Also see https://github.com/databricks/cli/issues/1330. python_file: ${var.file_path} + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/acceptance/bundle/paths/relative_path_translation/script b/acceptance/bundle/paths/relative_path_translation/script new file mode 100644 index 000000000..252e9a07f --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate -t default -o json | \ + jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.default.json +trace $CLI bundle validate -t override -o json | \ + jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.override.json diff --git a/bundle/tests/relative_path_translation/src/file1.py b/acceptance/bundle/paths/relative_path_translation/src/file1.py similarity index 100% rename from bundle/tests/relative_path_translation/src/file1.py rename to acceptance/bundle/paths/relative_path_translation/src/file1.py diff --git a/bundle/tests/relative_path_translation/src/file2.py b/acceptance/bundle/paths/relative_path_translation/src/file2.py similarity index 100% rename from bundle/tests/relative_path_translation/src/file2.py rename to acceptance/bundle/paths/relative_path_translation/src/file2.py diff --git a/bundle/tests/quality_monitor/databricks.yml b/acceptance/bundle/quality_monitor/databricks.yml similarity index 100% rename from bundle/tests/quality_monitor/databricks.yml rename to acceptance/bundle/quality_monitor/databricks.yml diff --git a/acceptance/bundle/quality_monitor/output.txt b/acceptance/bundle/quality_monitor/output.txt new file mode 100644 index 000000000..b3718c802 --- /dev/null +++ b/acceptance/bundle/quality_monitor/output.txt @@ -0,0 +1,73 @@ + +>>> $CLI bundle validate -o json -t development +{ + "mode": "development", + "quality_monitors": { + "my_monitor": { + "assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1", + "inference_log": { + "granularities": [ + "1 day" + ], + "model_id_col": "model_id", + "prediction_col": "prediction", + "problem_type": "PROBLEM_TYPE_REGRESSION", + "timestamp_col": "timestamp" + }, + "output_schema_name": "main.dev", + "schedule": null, + "table_name": "main.test.dev" + } + } +} + +>>> $CLI bundle validate -o json -t staging +{ + "mode": null, + "quality_monitors": { + "my_monitor": { + "assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1", + "inference_log": { + "granularities": [ + "1 day" + ], + "model_id_col": "model_id", + "prediction_col": "prediction", + "problem_type": "PROBLEM_TYPE_REGRESSION", + "timestamp_col": "timestamp" + }, + "output_schema_name": "main.staging", + "schedule": { + "quartz_cron_expression": "0 0 12 * * ?", + "timezone_id": "UTC" + }, + "table_name": "main.test.staging" + } + } +} + +>>> $CLI bundle validate -o json -t production +{ + "mode": null, + "quality_monitors": { + "my_monitor": { + "assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1", + "inference_log": { + "granularities": [ + "1 day", + "1 hour" + ], + "model_id_col": "model_id_prod", + "prediction_col": "prediction_prod", + "problem_type": "PROBLEM_TYPE_REGRESSION", + "timestamp_col": "timestamp_prod" + }, + "output_schema_name": "main.prod", + "schedule": { + "quartz_cron_expression": "0 0 12 * * ?", + "timezone_id": "UTC" + }, + "table_name": "main.test.prod" + } + } +} diff --git a/acceptance/bundle/quality_monitor/script b/acceptance/bundle/quality_monitor/script new file mode 100644 index 000000000..85a69d5e7 --- /dev/null +++ b/acceptance/bundle/quality_monitor/script @@ -0,0 +1,3 @@ +trace $CLI bundle validate -o json -t development | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }' +trace $CLI bundle validate -o json -t staging | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }' +trace $CLI bundle validate -o json -t production | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }' diff --git a/acceptance/bundle/syncroot/dotdot-git/databricks.yml b/acceptance/bundle/syncroot/dotdot-git/databricks.yml new file mode 100644 index 000000000..7215ffea2 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-git/databricks.yml @@ -0,0 +1,5 @@ +bundle: + name: test-bundle +sync: + paths: + - .. diff --git a/acceptance/bundle/syncroot/dotdot-git/output.txt b/acceptance/bundle/syncroot/dotdot-git/output.txt new file mode 100644 index 000000000..f1dc5fb01 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-git/output.txt @@ -0,0 +1,11 @@ +Error: path "$TMPDIR" is not within repository root "$TMPDIR/myrepo" + +Name: test-bundle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/test-bundle/default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/syncroot/dotdot-git/script b/acceptance/bundle/syncroot/dotdot-git/script new file mode 100644 index 000000000..0706a1d5e --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-git/script @@ -0,0 +1,6 @@ +# This should error, we do not allow syncroot outside of git repo. +mkdir myrepo +cd myrepo +cp ../databricks.yml . +git-repo-init +$CLI bundle validate | sed 's/\\\\/\//g' diff --git a/acceptance/bundle/syncroot/dotdot-nogit/databricks.yml b/acceptance/bundle/syncroot/dotdot-nogit/databricks.yml new file mode 100644 index 000000000..7215ffea2 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-nogit/databricks.yml @@ -0,0 +1,5 @@ +bundle: + name: test-bundle +sync: + paths: + - .. diff --git a/acceptance/bundle/syncroot/dotdot-nogit/output.txt b/acceptance/bundle/syncroot/dotdot-nogit/output.txt new file mode 100644 index 000000000..46f617f35 --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-nogit/output.txt @@ -0,0 +1,7 @@ +Name: test-bundle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/test-bundle/default + +Validation OK! diff --git a/acceptance/bundle/syncroot/dotdot-nogit/script b/acceptance/bundle/syncroot/dotdot-nogit/script new file mode 100644 index 000000000..d3388903e --- /dev/null +++ b/acceptance/bundle/syncroot/dotdot-nogit/script @@ -0,0 +1,2 @@ +# This should not error, syncroot can be outside bundle root. +$CLI bundle validate diff --git a/acceptance/bundle/templates/dbt-sql/input.json b/acceptance/bundle/templates/dbt-sql/input.json new file mode 100644 index 000000000..201ac9667 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_dbt_sql", + "http_path": "/sql/2.0/warehouses/f00dcafe", + "default_catalog": "main", + "personal_schemas": "yes, use a schema based on the current user name during development" +} diff --git a/acceptance/bundle/templates/dbt-sql/output.txt b/acceptance/bundle/templates/dbt-sql/output.txt new file mode 100644 index 000000000..972c7e152 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output.txt @@ -0,0 +1,32 @@ + +>>> $CLI bundle init dbt-sql --config-file ./input.json --output-dir output + +Welcome to the dbt template for Databricks Asset Bundles! + +A workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html. +workspace_host: $DATABRICKS_URL + +📊 Your new project has been created in the 'my_dbt_sql' directory! +If you already have dbt installed, just type 'cd my_dbt_sql; dbt init' to get started. +Refer to the README.md file for full "getting started" guide and production setup instructions. + + +>>> $CLI bundle validate -t dev +Name: my_dbt_sql +Target: dev +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/dev + +Validation OK! + +>>> $CLI bundle validate -t prod +Name: my_dbt_sql +Target: prod +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/prod + +Validation OK! diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore new file mode 100644 index 000000000..de811f118 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore @@ -0,0 +1,2 @@ + +.databricks diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json new file mode 100644 index 000000000..28fe943fd --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "redhat.vscode-yaml", + "innoverio.vscode-dbt-power-user", + ] +} diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json new file mode 100644 index 000000000..e8dcd1a83 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json @@ -0,0 +1,32 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "python.envFile": "${workspaceFolder}/.databricks/.databricks.env", + "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "hive_metastore", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/2.0/warehouses/f00dcafe" + } + ], + "sqltools.autoConnectTo": "", + "[jinja-sql]": { + "editor.defaultFormatter": "innoverio.vscode-dbt-power-user" + } +} diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md new file mode 100644 index 000000000..756a2eda4 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md @@ -0,0 +1,138 @@ +# my_dbt_sql + +The 'my_dbt_sql' project was generated by using the dbt template for +Databricks Asset Bundles. It follows the standard dbt project structure +and has an additional `resources` directory to define Databricks resources such as jobs +that run dbt models. + +* Learn more about dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. +* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html + +The remainder of this file includes instructions for local development (using dbt) +and deployment to production (using Databricks Asset Bundles). + +## Development setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. Install dbt + + To install dbt, you need a recent version of Python. For the instructions below, + we assume `python3` refers to the Python version you want to use. On some systems, + you may need to refer to a different Python version, e.g. `python` or `/usr/bin/python`. + + Run these instructions from the `my_dbt_sql` directory. We recommend making + use of a Python virtual environment and installing dbt as follows: + + ``` + $ python3 -m venv .venv + $ . .venv/bin/activate + $ pip install -r requirements-dev.txt + ``` + +4. Initialize your dbt profile + + Use `dbt init` to initialize your profile. + + ``` + $ dbt init + ``` + + Note that dbt authentication uses personal access tokens by default + (see https://docs.databricks.com/dev-tools/auth/pat.html). + You can use OAuth as an alternative, but this currently requires manual configuration. + See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md + for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605 + for advice on setting up OAuth for Azure Databricks. + + To setup up additional profiles, such as a 'prod' profile, + see https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles. + +5. Activate dbt so it can be used from the terminal + + ``` + $ . .venv/bin/activate + ``` + +## Local development with dbt + +Use `dbt` to [run this project locally using a SQL warehouse](https://docs.databricks.com/partners/prep/dbt.html): + +``` +$ dbt seed +$ dbt run +``` + +(Did you get an error that the dbt command could not be found? You may need +to try the last step from the development setup above to re-activate +your Python virtual environment!) + + +To just evaluate a single model defined in a file called orders.sql, use: + +``` +$ dbt run --model orders +``` + +Use `dbt test` to run tests generated from yml files such as `models/schema.yml` +and any SQL tests from `tests/` + +``` +$ dbt test +``` + +## Production setup + +Your production dbt profiles are defined in dbt_profiles/profiles.yml. +These profiles define the default catalog, schema, and any other +target-specific settings. Read more about dbt profiles on Databricks at +https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. + +The target workspaces for staging and prod are defined in databricks.yml. +You can manually deploy based on these configurations (see below). +Or you can use CI/CD to automate deployment. See +https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation +on CI/CD setup. + +## Manually deploying to Databricks with Databricks Asset Bundles + +Databricks Asset Bundles can be used to deploy to Databricks and to execute +dbt commands as a job using Databricks Workflows. See +https://docs.databricks.com/dev-tools/bundles/index.html to learn more. + +Use the Databricks CLI to deploy a development copy of this project to a workspace: + +``` +$ databricks bundle deploy --target dev +``` + +(Note that "dev" is the default target, so the `--target` parameter +is optional here.) + +This deploys everything that's defined for this project. +For example, the default template would deploy a job called +`[dev yourname] my_dbt_sql_job` to your workspace. +You can find that job by opening your workpace and clicking on **Workflows**. + +You can also deploy to your production target directly from the command-line. +The warehouse, catalog, and schema for that target are configured in databricks.yml. +When deploying to this target, note that the default job at resources/my_dbt_sql.job.yml +has a schedule set that runs every day. The schedule is paused when deploying in development mode +(see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +To deploy a production copy, type: + +``` +$ databricks bundle deploy --target prod +``` + +## IDE support + +Optionally, install developer tools such as the Databricks extension for Visual Studio Code from +https://docs.databricks.com/dev-tools/vscode-ext.html. Third-party extensions +related to dbt may further enhance your dbt development experience! diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml new file mode 100644 index 000000000..1962bc543 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml @@ -0,0 +1,34 @@ +# This file defines the structure of this project and how it is deployed +# to production using Databricks Asset Bundles. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_dbt_sql + uuid: + +include: + - resources/*.yml + +# Deployment targets. +# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml +targets: + dev: + default: true + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml new file mode 100644 index 000000000..fdaf30dda --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml @@ -0,0 +1,38 @@ + +# This file defines dbt profiles for deployed dbt jobs. +my_dbt_sql: + target: dev # default target + outputs: + + # Doing local development with the dbt CLI? + # Then you should create your own profile in your .dbt/profiles.yml using 'dbt init' + # (See README.md) + + # The default target when deployed with the Databricks CLI + # N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml + dev: + type: databricks + method: http + catalog: main + schema: "{{ var('dev_schema') }}" + + http_path: /sql/2.0/warehouses/f00dcafe + + # The workspace host / token are provided by Databricks + # see databricks.yml for the workspace host used for 'dev' + host: "{{ env_var('DBT_HOST') }}" + token: "{{ env_var('DBT_ACCESS_TOKEN') }}" + + # The production target when deployed with the Databricks CLI + prod: + type: databricks + method: http + catalog: main + schema: default + + http_path: /sql/2.0/warehouses/f00dcafe + + # The workspace host / token are provided by Databricks + # see databricks.yml for the workspace host used for 'prod' + host: "{{ env_var('DBT_HOST') }}" + token: "{{ env_var('DBT_ACCESS_TOKEN') }}" diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml new file mode 100644 index 000000000..4218640d8 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml @@ -0,0 +1,32 @@ +name: 'my_dbt_sql' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'my_dbt_sql' + +# These configurations specify where dbt should look for different types of files. +# For Databricks asset bundles, we put everything in src, as you may have +# non-dbt resources in your project. +model-paths: ["src/models"] +analysis-paths: ["src/analyses"] +test-paths: ["src/tests"] +seed-paths: ["src/seeds"] +macro-paths: ["src/macros"] +snapshot-paths: ["src/snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views by default. These settings can be overridden in the +# individual model files using the `{{ config(...) }}` macro. +models: + my_dbt_sql: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml new file mode 100644 index 000000000..5e0f0fc29 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml @@ -0,0 +1,23 @@ +# This file defines prompts with defaults for dbt initializaton. +# It is used when the `dbt init` command is invoked. +# +fixed: + type: databricks +prompts: + host: + default: $DATABRICKS_HOST + token: + hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + hide_input: true + http_path: + hint: 'HTTP path of SQL warehouse to use' + default: /sql/2.0/warehouses/f00dcafe + catalog: + hint: 'initial catalog' + default: main + schema: + hint: 'personal schema where dbt will build objects during development, example: $USERNAME' + threads: + hint: 'threads to use during development, 1 or more' + type: 'int' + default: 4 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt new file mode 100644 index 000000000..e6b861203 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt @@ -0,0 +1,3 @@ +## requirements-dev.txt: dependencies for local development. + +dbt-databricks>=1.8.0,<2.0.0 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml new file mode 100644 index 000000000..d52f8ed50 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml @@ -0,0 +1,43 @@ +resources: + jobs: + my_dbt_sql_job: + name: my_dbt_sql_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + + tasks: + - task_key: dbt + + dbt_task: + project_directory: ../ + # The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml + profiles_directory: dbt_profiles/ + commands: + # The dbt commands to run (see also dbt_profiles/profiles.yml; dev_schema is used in the dev profile) + - 'dbt deps --target=${bundle.target}' + - 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + - 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + + libraries: + - pypi: + package: dbt-databricks>=1.8.0,<2.0.0 + + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/analyses/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/macros/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql new file mode 100644 index 000000000..e32736ceb --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql @@ -0,0 +1,17 @@ + +-- This model file defines a materialized view called 'orders_daily' +-- +-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'materialized_view') }} + +select order_date, count(*) AS number_of_orders + +from {{ ref('orders_raw') }} + +-- During development, only process a smaller range of data +{% if target.name != 'prod' %} +where order_date >= '2019-08-01' and order_date < '2019-09-01' +{% endif %} + +group by order_date diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql new file mode 100644 index 000000000..8faf8f38b --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql @@ -0,0 +1,16 @@ +-- This model file defines a streaming table called 'orders_raw' +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ +-- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'streaming_table') }} + +select + customer_name, + date(timestamp(from_unixtime(try_cast(order_datetime as bigint)))) as order_date, + order_number +from stream read_files( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml new file mode 100644 index 000000000..c64f1bfce --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml @@ -0,0 +1,21 @@ + +version: 2 + +models: + - name: orders_raw + description: "Raw ingested orders" + columns: + - name: customer_name + description: "The name of a customer" + data_tests: + - unique + - not_null + + - name: orders_daily + description: "Number of orders by day" + columns: + - name: order_date + description: "The date on which orders took place" + data_tests: + - unique + - not_null diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/seeds/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/snapshots/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/tests/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/script b/acceptance/bundle/templates/dbt-sql/script new file mode 100644 index 000000000..c4ca817fe --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/script @@ -0,0 +1,5 @@ +trace $CLI bundle init dbt-sql --config-file ./input.json --output-dir output + +cd output/my_dbt_sql +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/default-python/input.json b/acceptance/bundle/templates/default-python/input.json new file mode 100644 index 000000000..3e1d79c68 --- /dev/null +++ b/acceptance/bundle/templates/default-python/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_default_python", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes" +} diff --git a/acceptance/bundle/templates/default-python/output.txt b/acceptance/bundle/templates/default-python/output.txt new file mode 100644 index 000000000..5493ac2cf --- /dev/null +++ b/acceptance/bundle/templates/default-python/output.txt @@ -0,0 +1,30 @@ + +>>> $CLI bundle init default-python --config-file ./input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): $DATABRICKS_URL + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> $CLI bundle validate -t dev +Name: my_default_python +Target: dev +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/dev + +Validation OK! + +>>> $CLI bundle validate -t prod +Name: my_default_python +Target: prod +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore b/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json new file mode 100644 index 000000000..5d15eba36 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json new file mode 100644 index 000000000..8ee87c30d --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json @@ -0,0 +1,16 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/output/my_default_python/README.md new file mode 100644 index 000000000..97d7d7949 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/README.md @@ -0,0 +1,47 @@ +# my_default_python + +The 'my_default_python' project was generated by using the default-python template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_python_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_default_python.job.yml). The schedule + is paused when deploying in development mode (see + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +5. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` + +6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + +7. For documentation on the Databricks asset bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml new file mode 100644 index 000000000..9deca9cf5 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml @@ -0,0 +1,31 @@ +# This is a Databricks asset bundle definition for my_default_python. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_python + uuid: + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini b/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini new file mode 100644 index 000000000..80432c220 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = src diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt b/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt new file mode 100644 index 000000000..0ffbf6aed --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt @@ -0,0 +1,29 @@ +## requirements-dev.txt: dependencies for local development. +## +## For defining dependencies used by jobs in Databricks Workflows, see +## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + +## Add code completion support for DLT +databricks-dlt + +## pytest is the default package used for testing +pytest + +## Dependencies for building wheel files +setuptools +wheel + +## databricks-connect can be used to run parts of this project locally. +## See https://docs.databricks.com/dev-tools/databricks-connect.html. +## +## databricks-connect is automatically installed if you're using Databricks +## extension for Visual Studio Code +## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). +## +## To manually install databricks-connect, either follow the instructions +## at https://docs.databricks.com/dev-tools/databricks-connect.html +## to install the package system-wide. Or uncomment the line below to install a +## version of db-connect that corresponds to the Databricks Runtime version used +## for this project. +# +# databricks-connect>=15.4,<15.5 diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml new file mode 100644 index 000000000..e6148a4ad --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml @@ -0,0 +1,49 @@ +# The main job for my_default_python. +resources: + jobs: + my_default_python_job: + name: my_default_python_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + tasks: + - task_key: notebook_task + job_cluster_key: job_cluster + notebook_task: + notebook_path: ../src/notebook.ipynb + + - task_key: refresh_pipeline + depends_on: + - task_key: notebook_task + pipeline_task: + pipeline_id: ${resources.pipelines.my_default_python_pipeline.id} + + - task_key: main_task + depends_on: + - task_key: refresh_pipeline + job_cluster_key: job_cluster + python_wheel_task: + package_name: my_default_python + entry_point: main + libraries: + # By default we just include the .whl file generated for the my_default_python package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + - whl: ../dist/*.whl + + job_clusters: + - job_cluster_key: job_cluster + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + autoscale: + min_workers: 1 + max_workers: 4 diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml new file mode 100644 index 000000000..f9e083f4f --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml @@ -0,0 +1,13 @@ +# The main pipeline for my_default_python +resources: + pipelines: + my_default_python_pipeline: + name: my_default_python_pipeline + catalog: main + target: my_default_python_${bundle.target} + libraries: + - notebook: + path: ../src/dlt_pipeline.ipynb + + configuration: + bundle.sourcePath: ${workspace.file_path}/src diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb new file mode 100644 index 000000000..3b2fef4b4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append(\"../src\")\n", + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "ipynb-notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/setup.py b/acceptance/bundle/templates/default-python/output/my_default_python/setup.py new file mode 100644 index 000000000..84b24ecb8 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/setup.py @@ -0,0 +1,41 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_default_python project. +""" + +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import datetime +import my_default_python + +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + +setup( + name="my_default_python", + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + version=my_default_python.__version__ + "+" + local_version, + url="https://databricks.com", + author="$USERNAME", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb new file mode 100644 index 000000000..36e993af7 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Import DLT and src/my_default_python\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from my_default_python import main" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb new file mode 100644 index 000000000..0d560443b --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py new file mode 100644 index 000000000..dc449154a --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py @@ -0,0 +1,6 @@ +from my_default_python.main import get_taxis, get_spark + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/default-python/script b/acceptance/bundle/templates/default-python/script new file mode 100644 index 000000000..b11a7ea21 --- /dev/null +++ b/acceptance/bundle/templates/default-python/script @@ -0,0 +1,5 @@ +trace $CLI bundle init default-python --config-file ./input.json --output-dir output + +cd output/my_default_python +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/default-sql/.ruff.toml b/acceptance/bundle/templates/default-sql/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/acceptance/bundle/templates/default-sql/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/acceptance/bundle/templates/default-sql/input.json b/acceptance/bundle/templates/default-sql/input.json new file mode 100644 index 000000000..c728d25de --- /dev/null +++ b/acceptance/bundle/templates/default-sql/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_default_sql", + "http_path": "/sql/2.0/warehouses/f00dcafe", + "default_catalog": "main", + "personal_schemas": "yes, automatically use a schema based on the current user name during development" +} diff --git a/acceptance/bundle/templates/default-sql/output.txt b/acceptance/bundle/templates/default-sql/output.txt new file mode 100644 index 000000000..fe0139093 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output.txt @@ -0,0 +1,32 @@ + +>>> $CLI bundle init default-sql --config-file ./input.json --output-dir output + +Welcome to the default SQL template for Databricks Asset Bundles! + +A workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html. +workspace_host: $DATABRICKS_URL + +✨ Your new project has been created in the 'my_default_sql' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> $CLI bundle validate -t dev +Name: my_default_sql +Target: dev +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_sql/dev + +Validation OK! + +>>> $CLI bundle validate -t prod +Name: my_default_sql +Target: prod +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_sql/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore b/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore new file mode 100644 index 000000000..de811f118 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore @@ -0,0 +1,2 @@ + +.databricks diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json new file mode 100644 index 000000000..8e1023465 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "redhat.vscode-yaml", + "databricks.sqltools-databricks-driver", + ] +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json new file mode 100644 index 000000000..c641abe39 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json @@ -0,0 +1,27 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "main", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/2.0/warehouses/f00dcafe" + } + ], + "sqltools.autoConnectTo": "", +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md new file mode 100644 index 000000000..67ded153f --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md @@ -0,0 +1,41 @@ +# my_default_sql + +The 'my_default_sql' project was generated by using the default-sql template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/install.html + +2. Authenticate to your Databricks workspace (if you have not done so already): + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_sql_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + +5. To run a job, use the "run" command: + ``` + $ databricks bundle run + ``` + +6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. + +7. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml new file mode 100644 index 000000000..ab857287e --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks asset bundle definition for my_default_sql. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_sql + uuid: + +include: + - resources/*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + warehouse_id: + description: The warehouse to use + catalog: + description: The catalog to use + schema: + description: The schema to use + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + variables: + warehouse_id: f00dcafe + catalog: main + schema: ${workspace.current_user.short_name} + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + variables: + warehouse_id: f00dcafe + catalog: main + schema: default + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml new file mode 100644 index 000000000..86de0f9db --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml @@ -0,0 +1,38 @@ +# A job running SQL queries on a SQL warehouse +resources: + jobs: + my_default_sql_sql_job: + name: my_default_sql_sql_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} + - name: bundle_target + default: ${bundle.target} + + tasks: + - task_key: orders_raw + sql_task: + warehouse_id: ${var.warehouse_id} + file: + path: ../src/orders_raw.sql + + - task_key: orders_daily + depends_on: + - task_key: orders_raw + sql_task: + warehouse_id: ${var.warehouse_id} + file: + path: ../src/orders_daily.sql diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md new file mode 100644 index 000000000..5350d09cf --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks and SQL files. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb new file mode 100644 index 000000000..c3fd072e5 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb @@ -0,0 +1,35 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "SELECT * FROM json.`/databricks-datasets/nyctaxi/sample/json/`" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "exploration", + "widgets": {} + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql new file mode 100644 index 000000000..ea7b80b54 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql @@ -0,0 +1,21 @@ +-- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) + +USE CATALOG {{catalog}}; +USE IDENTIFIER({{schema}}); + +CREATE OR REPLACE MATERIALIZED VIEW + orders_daily +AS SELECT + order_date, count(*) AS number_of_orders +FROM + orders_raw + +WHERE if( + {{bundle_target}} = "prod", + true, + + -- During development, only process a smaller range of data + order_date >= '2019-08-01' AND order_date < '2019-09-01' +) + +GROUP BY order_date diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql new file mode 100644 index 000000000..79b1354cf --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql @@ -0,0 +1,19 @@ +-- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ +-- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html + +USE CATALOG {{catalog}}; +USE IDENTIFIER({{schema}}); + +CREATE OR REFRESH STREAMING TABLE + orders_raw +AS SELECT + customer_name, + DATE(TIMESTAMP(FROM_UNIXTIME(TRY_CAST(order_datetime AS BIGINT)))) AS order_date, + order_number +FROM STREAM READ_FILES( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/acceptance/bundle/templates/default-sql/script b/acceptance/bundle/templates/default-sql/script new file mode 100644 index 000000000..66e7a14a2 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/script @@ -0,0 +1,5 @@ +trace $CLI bundle init default-sql --config-file ./input.json --output-dir output + +cd output/my_default_sql +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/input.json b/acceptance/bundle/templates/experimental-jobs-as-code/input.json new file mode 100644 index 000000000..748076c75 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/input.json @@ -0,0 +1,5 @@ +{ + "project_name": "my_jobs_as_code", + "include_notebook": "yes", + "include_python": "yes" +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt new file mode 100644 index 000000000..1aa8a94d5 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt @@ -0,0 +1,85 @@ + +>>> $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output + +Welcome to (EXPERIMENTAL) "Jobs as code" template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): $DATABRICKS_URL + +✨ Your new project has been created in the 'my_jobs_as_code' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> $CLI bundle validate -t dev --output json +{ + "jobs": { + "my_jobs_as_code_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "email_notifications": { + "on_failure": [ + "$USERNAME" + ] + }, + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "autoscale": { + "max_workers": 4, + "min_workers": 1 + }, + "node_type_id": "i3.xlarge", + "spark_version": "15.4.x-scala2.12" + } + } + ], + "max_concurrent_runs": 4, + "name": "[dev $USERNAME] my_jobs_as_code_job", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": { + "dev": "$USERNAME" + }, + "tasks": [ + { + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/files/src/notebook" + }, + "task_key": "notebook_task" + }, + { + "depends_on": [ + { + "task_key": "notebook_task" + } + ], + "job_cluster_key": "job_cluster", + "libraries": [ + { + "whl": "dist/*.whl" + } + ], + "python_wheel_task": { + "entry_point": "main", + "package_name": "my_jobs_as_code" + }, + "task_key": "main_task" + } + ], + "trigger": { + "pause_status": "PAUSED", + "periodic": { + "interval": 1, + "unit": "DAYS" + } + } + } + } +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md new file mode 100644 index 000000000..8c429c6e5 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md @@ -0,0 +1,58 @@ +# my_jobs_as_code + +The 'my_jobs_as_code' project was generated by using the "Jobs as code" template. + +## Prerequisites + +1. Install Databricks CLI 0.238 or later. + See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). + +2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). + We use uv to create a virtual environment and install the required dependencies. + +3. Authenticate to your Databricks workspace if you have not done so already: + ``` + $ databricks configure + ``` + +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + +5. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. + +## Deploy and run jobs + +1. Create a new virtual environment and install the required dependencies: + ``` + $ uv sync + ``` + +2. To deploy the bundle to the development target: + ``` + $ databricks bundle deploy --target dev + ``` + + *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_jobs_as_code_job` to your workspace. + You can find that job by opening your workspace and clicking on **Workflows**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_jobs_as_code_job.py). The schedule + is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). + +4. To run a job: + ``` + $ databricks bundle run + ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml new file mode 100644 index 000000000..fd87aa381 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks asset bundle definition for my_jobs_as_code. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_jobs_as_code + uuid: + +experimental: + python: + # Activate virtual environment before loading resources defined in Python. + # If disabled, defaults to using the Python interpreter available in the current shell. + venv_path: .venv + # Functions called to load resources defined in Python. See resources/__init__.py + resources: + - "resources:load_resources" + +artifacts: + default: + type: whl + path: . + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml new file mode 100644 index 000000000..28240e3ec --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml @@ -0,0 +1,49 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "my_jobs_as_code" +requires-python = ">=3.10" +description = "wheel file based on my_jobs_as_code" + +# Dependencies in case the output wheel file is used as a library dependency. +# For defining dependencies, when this package is used in Databricks, see: +# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +# +# Example: +# dependencies = [ +# "requests==x.y.z", +# ] +dependencies = [ +] + +# see setup.py +dynamic = ["version"] + +[project.entry-points.packages] +main = "my_jobs_as_code.main:main" + +[tool.setuptools] +py-modules = ["resources", "my_jobs_as_code"] + +[tool.uv] +## Dependencies for local development +dev-dependencies = [ + "databricks-bundles==0.7.0", + + ## Add code completion support for DLT + # "databricks-dlt", + + ## databricks-connect can be used to run parts of this project locally. + ## See https://docs.databricks.com/dev-tools/databricks-connect.html. + ## + ## Uncomment line below to install a version of db-connect that corresponds to + ## the Databricks Runtime version used for this project. + # "databricks-connect>=15.4,<15.5", +] + +override-dependencies = [ + # pyspark package conflicts with 'databricks-connect' + "pyspark; sys_platform == 'never'", +] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py new file mode 100644 index 000000000..fbcb9dc5f --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import ( + Bundle, + Resources, + load_resources_from_current_package_module, +) + + +def load_resources(bundle: Bundle) -> Resources: + """ + 'load_resources' function is referenced in databricks.yml and is responsible for loading + bundle resources defined in Python code. This function is called by Databricks CLI during + bundle deployment. After deployment, this function is not used. + """ + + # the default implementation loads all Python files in 'resources' directory + return load_resources_from_current_package_module() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py new file mode 100644 index 000000000..4854d656f --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py @@ -0,0 +1,67 @@ +from databricks.bundles.jobs import Job + +""" +The main job for my_jobs_as_code. +""" + + +my_jobs_as_code_job = Job.from_dict( + { + "name": "my_jobs_as_code_job", + "trigger": { + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + "periodic": { + "interval": 1, + "unit": "DAYS", + }, + }, + "email_notifications": { + "on_failure": [ + "$USERNAME", + ], + }, + "tasks": [ + { + "task_key": "notebook_task", + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "src/notebook.ipynb", + }, + }, + { + "task_key": "main_task", + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + "job_cluster_key": "job_cluster", + "python_wheel_task": { + "package_name": "my_jobs_as_code", + "entry_point": "main", + }, + "libraries": [ + # By default we just include the .whl file generated for the my_jobs_as_code package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + { + "whl": "dist/*.whl", + }, + ], + }, + ], + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "spark_version": "15.4.x-scala2.12", + "node_type_id": "i3.xlarge", + "autoscale": { + "min_workers": 1, + "max_workers": 4, + }, + }, + }, + ], + } +) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py new file mode 100644 index 000000000..ba284ba82 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py @@ -0,0 +1,18 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_jobs_as_code project. +""" + +import os + +from setuptools import setup + +local_version = os.getenv("LOCAL_VERSION") +version = "0.0.1" + +setup( + version=f"{version}+{local_version}" if local_version else version, +) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb new file mode 100644 index 000000000..9bc3f1560 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_jobs_as_code import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py new file mode 100644 index 000000000..13e100ee2 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py @@ -0,0 +1,8 @@ +from my_jobs_as_code.main import get_taxis, get_spark + +# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/script b/acceptance/bundle/templates/experimental-jobs-as-code/script new file mode 100644 index 000000000..af28b9d0a --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/script @@ -0,0 +1,14 @@ +trace $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output + +cd output/my_jobs_as_code + +# silence uv output because it's non-deterministic +uv sync 2> /dev/null + +# remove version constraint because it always creates a warning on dev builds +cat databricks.yml | grep -v databricks_cli_version > databricks.yml.new +mv databricks.yml.new databricks.yml + +trace $CLI bundle validate -t dev --output json | jq ".resources" + +rm -fr .venv resources/__pycache__ uv.lock my_jobs_as_code.egg-info diff --git a/acceptance/bundle/variables/arg-repeat/databricks.yml b/acceptance/bundle/variables/arg-repeat/databricks.yml new file mode 100644 index 000000000..377c6cfab --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: arg-repeat + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/arg-repeat/output.txt b/acceptance/bundle/variables/arg-repeat/output.txt new file mode 100644 index 000000000..2f9de1a3c --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/output.txt @@ -0,0 +1,18 @@ + +>>> errcode $CLI bundle validate --var a=one -o json +{ + "a": { + "default": "hello", + "value": "one" + } +} + +>>> errcode $CLI bundle validate --var a=one --var a=two +Error: failed to assign two to a: variable has already been assigned value: one + +Name: arg-repeat +Target: default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/variables/arg-repeat/script b/acceptance/bundle/variables/arg-repeat/script new file mode 100644 index 000000000..3e03dbcb1 --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle validate --var a=one -o json | jq .variables +trace errcode $CLI bundle validate --var a=one --var a=two diff --git a/acceptance/bundle/variables/complex-cross-ref/databricks.yml b/acceptance/bundle/variables/complex-cross-ref/databricks.yml new file mode 100644 index 000000000..4459f44df --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/databricks.yml @@ -0,0 +1,12 @@ +bundle: + name: complex-cross-ref + +variables: + a: + default: + a_1: 500 + a_2: ${var.b.b_2} + b: + default: + b_1: ${var.a.a_1} + b_2: 2.5 diff --git a/acceptance/bundle/variables/complex-cross-ref/output.txt b/acceptance/bundle/variables/complex-cross-ref/output.txt new file mode 100644 index 000000000..f1b624d29 --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/output.txt @@ -0,0 +1,22 @@ +{ + "a": { + "default": { + "a_1": 500, + "a_2": 2.5 + }, + "value": { + "a_1": 500, + "a_2": 2.5 + } + }, + "b": { + "default": { + "b_1": 500, + "b_2": 2.5 + }, + "value": { + "b_1": 500, + "b_2": 2.5 + } + } +} diff --git a/acceptance/bundle/variables/complex-cross-ref/script b/acceptance/bundle/variables/complex-cross-ref/script new file mode 100644 index 000000000..0e53f237e --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .variables diff --git a/acceptance/bundle/variables/complex-cycle-self/databricks.yml b/acceptance/bundle/variables/complex-cycle-self/databricks.yml new file mode 100644 index 000000000..bb461795c --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/databricks.yml @@ -0,0 +1,7 @@ +bundle: + name: cycle + +variables: + a: + default: + hello: ${var.a} diff --git a/acceptance/bundle/variables/complex-cycle-self/output.txt b/acceptance/bundle/variables/complex-cycle-self/output.txt new file mode 100644 index 000000000..fa80154ca --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/output.txt @@ -0,0 +1,9 @@ +Warning: Detected unresolved variables after 11 resolution rounds + +Name: cycle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/cycle/default + +Found 1 warning diff --git a/acceptance/bundle/variables/complex-cycle-self/script b/acceptance/bundle/variables/complex-cycle-self/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/variables/complex-cycle/databricks.yml b/acceptance/bundle/variables/complex-cycle/databricks.yml new file mode 100644 index 000000000..9784a4e25 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: cycle + +variables: + a: + default: + hello: ${var.b} + b: + default: + hello: ${var.a} diff --git a/acceptance/bundle/variables/complex-cycle/output.txt b/acceptance/bundle/variables/complex-cycle/output.txt new file mode 100644 index 000000000..fa80154ca --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/output.txt @@ -0,0 +1,9 @@ +Warning: Detected unresolved variables after 11 resolution rounds + +Name: cycle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/cycle/default + +Found 1 warning diff --git a/acceptance/bundle/variables/complex-cycle/script b/acceptance/bundle/variables/complex-cycle/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/variables/complex-simple/databricks.yml b/acceptance/bundle/variables/complex-simple/databricks.yml new file mode 100644 index 000000000..135ff86cf --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/databricks.yml @@ -0,0 +1,27 @@ +# This example works and properly merges resources.jobs.job1.job_clusters.new_cluster and ${var.cluster}. +# retaining num_workers, spark_version and overriding node_type_id. +bundle: + name: TestResolveComplexVariable + +variables: + cluster: + type: "complex" + value: + node_type_id: "Standard_DS3_v2" + num_workers: 2 + +resources: + jobs: + job1: + job_clusters: + - new_cluster: + node_type_id: "random" + spark_version: 13.3.x-scala2.12 + +targets: + dev: + resources: + jobs: + job1: + job_clusters: + - new_cluster: ${var.cluster} diff --git a/acceptance/bundle/variables/complex-simple/output.txt b/acceptance/bundle/variables/complex-simple/output.txt new file mode 100644 index 000000000..16b0ec80f --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/output.txt @@ -0,0 +1,10 @@ +[ + { + "job_cluster_key": "", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "spark_version": "13.3.x-scala2.12" + } + } +] diff --git a/acceptance/bundle/variables/complex-simple/script b/acceptance/bundle/variables/complex-simple/script new file mode 100644 index 000000000..1c31d0b40 --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.job_clusters diff --git a/acceptance/bundle/variables/complex-transitive-deep/databricks.yml b/acceptance/bundle/variables/complex-transitive-deep/databricks.yml new file mode 100644 index 000000000..1357c291a --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: complex-transitive + +variables: + catalog: + default: hive_metastore + spark_conf_1: + default: + "spark.databricks.sql.initial.catalog.name": ${var.catalog} + spark_conf: + default: ${var.spark_conf_1} + etl_cluster_config: + type: complex + default: + spark_version: 14.3.x-scala2.12 + runtime_engine: PHOTON + spark_conf: ${var.spark_conf} + +resources: + clusters: + my_cluster: ${var.etl_cluster_config} diff --git a/acceptance/bundle/variables/complex-transitive-deep/output.txt b/acceptance/bundle/variables/complex-transitive-deep/output.txt new file mode 100644 index 000000000..29c41cda5 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/output.txt @@ -0,0 +1,3 @@ +{ + "spark.databricks.sql.initial.catalog.name": "hive_metastore" +} diff --git a/acceptance/bundle/variables/complex-transitive-deep/script b/acceptance/bundle/variables/complex-transitive-deep/script new file mode 100644 index 000000000..52bb08ed4 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/script @@ -0,0 +1,2 @@ +# Currently, this incorrectly outputs variable reference instead of resolved value +$CLI bundle validate -o json | jq '.resources.clusters.my_cluster.spark_conf' diff --git a/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml b/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml new file mode 100644 index 000000000..3f9bea464 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml @@ -0,0 +1,22 @@ +bundle: + name: complex-transitive-deeper + +variables: + catalog_1: + default: + name: hive_metastore + catalog: + default: ${var.catalog_1} + spark_conf: + default: + "spark.databricks.sql.initial.catalog.name": ${var.catalog.name} + etl_cluster_config: + type: complex + default: + spark_version: 14.3.x-scala2.12 + runtime_engine: PHOTON + spark_conf: ${var.spark_conf} + +resources: + clusters: + my_cluster: ${var.etl_cluster_config} diff --git a/acceptance/bundle/variables/complex-transitive-deeper/output.txt b/acceptance/bundle/variables/complex-transitive-deeper/output.txt new file mode 100644 index 000000000..3bedbfb9a --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/output.txt @@ -0,0 +1,7 @@ +Error: expected a map to index "variables.catalog.value.name", found string + +{ + "my_cluster": "${var.etl_cluster_config}" +} + +Exit code: 1 diff --git a/acceptance/bundle/variables/complex-transitive-deeper/script b/acceptance/bundle/variables/complex-transitive-deeper/script new file mode 100644 index 000000000..d4fb404b1 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/script @@ -0,0 +1,2 @@ +# Currently, this errors instead of interpolating variables +$CLI bundle validate -o json | jq '.resources.clusters' diff --git a/acceptance/bundle/variables/complex-transitive/output.txt b/acceptance/bundle/variables/complex-transitive/output.txt index a031e0497..29c41cda5 100644 --- a/acceptance/bundle/variables/complex-transitive/output.txt +++ b/acceptance/bundle/variables/complex-transitive/output.txt @@ -1,3 +1,3 @@ { - "spark.databricks.sql.initial.catalog.name": "${var.catalog}" + "spark.databricks.sql.initial.catalog.name": "hive_metastore" } diff --git a/acceptance/bundle/variables/complex-with-var-reference/databricks.yml b/acceptance/bundle/variables/complex-with-var-reference/databricks.yml new file mode 100644 index 000000000..104f9a470 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/databricks.yml @@ -0,0 +1,17 @@ +bundle: + name: TestResolveComplexVariableWithVarReference + +variables: + package_version: + default: "1.0.0" + cluster_libraries: + type: "complex" + default: + - pypi: + package: "cicd_template==${var.package_version}" + +resources: + jobs: + job1: + tasks: + - libraries: ${var.cluster_libraries} diff --git a/acceptance/bundle/variables/complex-with-var-reference/output.txt b/acceptance/bundle/variables/complex-with-var-reference/output.txt new file mode 100644 index 000000000..a5b792ac4 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/output.txt @@ -0,0 +1,12 @@ +[ + { + "libraries": [ + { + "pypi": { + "package": "cicd_template==1.0.0" + } + } + ], + "task_key": "" + } +] diff --git a/acceptance/bundle/variables/complex-with-var-reference/script b/acceptance/bundle/variables/complex-with-var-reference/script new file mode 100644 index 000000000..0f7353ad1 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.tasks diff --git a/acceptance/bundle/variables/complex-within-complex/databricks.yml b/acceptance/bundle/variables/complex-within-complex/databricks.yml new file mode 100644 index 000000000..f1d77289e --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/databricks.yml @@ -0,0 +1,34 @@ +# Does not work currently, explicitly disabled, even though it works if you remove 'type: "complex"' lines +# Also fails to merge clusters. +bundle: + name: TestResolveComplexVariableReferencesWithComplexVariablesError + +variables: + cluster: + type: "complex" + value: + node_type_id: "Standard_DS3_v2" + num_workers: 2 + spark_conf: "${var.spark_conf}" + spark_conf: + type: "complex" + value: + spark.executor.memory: "4g" + spark.executor.cores: "2" + +resources: + jobs: + job1: + job_clusters: + - job_cluster_key: my_cluster + new_cluster: + node_type_id: "random" + +targets: + dev: + resources: + jobs: + job1: + job_clusters: + - job_cluster_key: my_cluster + new_cluster: ${var.cluster} diff --git a/acceptance/bundle/variables/complex-within-complex/output.txt b/acceptance/bundle/variables/complex-within-complex/output.txt new file mode 100644 index 000000000..72e6ef69a --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/output.txt @@ -0,0 +1,17 @@ +Warning: unknown field: node_type_id + at resources.jobs.job1.job_clusters[0] + in databricks.yml:25:11 + +[ + { + "job_cluster_key": "my_cluster", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "spark_conf": { + "spark.executor.cores": "2", + "spark.executor.memory": "4g" + } + } + } +] diff --git a/acceptance/bundle/variables/complex-within-complex/script b/acceptance/bundle/variables/complex-within-complex/script new file mode 100644 index 000000000..1c31d0b40 --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.job_clusters diff --git a/acceptance/bundle/variables/complex/out.default.json b/acceptance/bundle/variables/complex/out.default.json index 6454562a6..a1ccd52bc 100644 --- a/acceptance/bundle/variables/complex/out.default.json +++ b/acceptance/bundle/variables/complex/out.default.json @@ -4,7 +4,7 @@ "my_job": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables/default/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/complex-variables/default/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/variables/complex/out.dev.json b/acceptance/bundle/variables/complex/out.dev.json index cede5feb2..bb939091b 100644 --- a/acceptance/bundle/variables/complex/out.dev.json +++ b/acceptance/bundle/variables/complex/out.dev.json @@ -4,7 +4,7 @@ "my_job": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables/dev/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/complex-variables/dev/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/variables/complex_multiple_files/output.txt b/acceptance/bundle/variables/complex_multiple_files/output.txt index e87b8df11..ec2cad1ce 100644 --- a/acceptance/bundle/variables/complex_multiple_files/output.txt +++ b/acceptance/bundle/variables/complex_multiple_files/output.txt @@ -4,7 +4,7 @@ "my_job": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables-multiple-files/dev/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/complex-variables-multiple-files/dev/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/variables/cycle/databricks.yml b/acceptance/bundle/variables/cycle/databricks.yml new file mode 100644 index 000000000..b35196671 --- /dev/null +++ b/acceptance/bundle/variables/cycle/databricks.yml @@ -0,0 +1,8 @@ +bundle: + name: cycle + +variables: + a: + default: ${var.b} + b: + default: ${var.a} diff --git a/acceptance/bundle/variables/cycle/output.txt b/acceptance/bundle/variables/cycle/output.txt new file mode 100644 index 000000000..ea9c95cd4 --- /dev/null +++ b/acceptance/bundle/variables/cycle/output.txt @@ -0,0 +1,14 @@ +Error: cycle detected in field resolution: variables.a.default -> var.b -> var.a -> var.b + +{ + "a": { + "default": "${var.b}", + "value": "${var.b}" + }, + "b": { + "default": "${var.a}", + "value": "${var.a}" + } +} + +Exit code: 1 diff --git a/acceptance/bundle/variables/cycle/script b/acceptance/bundle/variables/cycle/script new file mode 100644 index 000000000..0e53f237e --- /dev/null +++ b/acceptance/bundle/variables/cycle/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .variables diff --git a/acceptance/bundle/variables/empty/output.txt b/acceptance/bundle/variables/empty/output.txt index c3f0af130..8933443df 100644 --- a/acceptance/bundle/variables/empty/output.txt +++ b/acceptance/bundle/variables/empty/output.txt @@ -1,10 +1,10 @@ -Error: no value assigned to required variable a. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_a environment variable +Error: no value assigned to required variable a. Assignment can be done using "--var", by setting the BUNDLE_VAR_a environment variable, or in .databricks/bundle//variable-overrides.json file Name: empty${var.a} Target: default Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/empty${var.a}/default + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/empty${var.a}/default Found 1 error diff --git a/acceptance/bundle/variables/env_overrides/output.txt b/acceptance/bundle/variables/env_overrides/output.txt index e8fb99938..1ee9ef625 100644 --- a/acceptance/bundle/variables/env_overrides/output.txt +++ b/acceptance/bundle/variables/env_overrides/output.txt @@ -9,13 +9,13 @@ "prod-a env-var-b" >>> errcode $CLI bundle validate -t env-missing-a-required-variable-assignment -Error: no value assigned to required variable b. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_b environment variable +Error: no value assigned to required variable b. Assignment can be done using "--var", by setting the BUNDLE_VAR_b environment variable, or in .databricks/bundle//variable-overrides.json file Name: test bundle Target: env-missing-a-required-variable-assignment Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/test bundle/env-missing-a-required-variable-assignment + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/test bundle/env-missing-a-required-variable-assignment Found 1 error diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/complex_to_string/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/complex_to_string/variable-overrides.json new file mode 100644 index 000000000..602567a68 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/complex_to_string/variable-overrides.json @@ -0,0 +1,5 @@ +{ + "cluster_key": { + "node_type_id": "Standard_DS3_v2" + } +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/default/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/default/variable-overrides.json new file mode 100644 index 000000000..3a865e120 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/default/variable-overrides.json @@ -0,0 +1,7 @@ +{ + "cluster": { + "node_type_id": "Standard_DS3_v2" + }, + "cluster_key": "mlops_stacks-cluster", + "cluster_workers": 2 +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/invalid_json/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/invalid_json/variable-overrides.json new file mode 100644 index 000000000..257cc5642 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/invalid_json/variable-overrides.json @@ -0,0 +1 @@ +foo diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/string_to_complex/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/string_to_complex/variable-overrides.json new file mode 100644 index 000000000..1ea719446 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/string_to_complex/variable-overrides.json @@ -0,0 +1,3 @@ +{ + "cluster": "mlops_stacks-cluster" +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/with_value/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/with_value/variable-overrides.json new file mode 100644 index 000000000..686d68548 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/with_value/variable-overrides.json @@ -0,0 +1,3 @@ +{ + "cluster_key": "mlops_stacks-cluster-from-file" +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/without_defaults/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/without_defaults/variable-overrides.json new file mode 100644 index 000000000..86166408e --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/without_defaults/variable-overrides.json @@ -0,0 +1,4 @@ +{ + "cluster_key": "mlops_stacks-cluster", + "cluster_workers": 2 +} diff --git a/acceptance/bundle/variables/file-defaults/.databricks/bundle/wrong_file_structure/variable-overrides.json b/acceptance/bundle/variables/file-defaults/.databricks/bundle/wrong_file_structure/variable-overrides.json new file mode 100644 index 000000000..de140ba36 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.databricks/bundle/wrong_file_structure/variable-overrides.json @@ -0,0 +1,3 @@ +[ + "foo" +] diff --git a/acceptance/bundle/variables/file-defaults/.gitignore b/acceptance/bundle/variables/file-defaults/.gitignore new file mode 100644 index 000000000..bd1711fd1 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/.gitignore @@ -0,0 +1 @@ +!.databricks diff --git a/acceptance/bundle/variables/file-defaults/databricks.yml b/acceptance/bundle/variables/file-defaults/databricks.yml new file mode 100644 index 000000000..5838843e1 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/databricks.yml @@ -0,0 +1,53 @@ +bundle: + name: TestResolveVariablesFromFile + +variables: + cluster: + type: "complex" + cluster_key: + cluster_workers: + +resources: + jobs: + job1: + job_clusters: + - job_cluster_key: ${var.cluster_key} + new_cluster: + node_type_id: "${var.cluster.node_type_id}" + num_workers: ${var.cluster_workers} + +targets: + default: + default: true + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: "default" + + without_defaults: + + complex_to_string: + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: "default" + + string_to_complex: + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: "default" + + wrong_file_structure: + + invalid_json: + + with_value: + variables: + cluster_workers: 1 + cluster: + node_type_id: "default" + cluster_key: cluster_key_value diff --git a/acceptance/bundle/variables/file-defaults/output.txt b/acceptance/bundle/variables/file-defaults/output.txt new file mode 100644 index 000000000..73830aae3 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/output.txt @@ -0,0 +1,82 @@ + +=== variable file +>>> $CLI bundle validate -o json +{ + "job_cluster_key": "mlops_stacks-cluster", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2 + } +} + +=== variable file and variable flag +>>> $CLI bundle validate -o json --var=cluster_key=mlops_stacks-cluster-overriden +{ + "job_cluster_key": "mlops_stacks-cluster-overriden", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2 + } +} + +=== variable file and environment variable +>>> BUNDLE_VAR_cluster_key=mlops_stacks-cluster-overriden $CLI bundle validate -o json +{ + "job_cluster_key": "mlops_stacks-cluster-overriden", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2 + } +} + +=== variable has value in config file +>>> $CLI bundle validate -o json --target with_value +{ + "job_cluster_key": "mlops_stacks-cluster-from-file", + "new_cluster": { + "node_type_id": "default", + "num_workers": 1 + } +} + +=== file has variable that is complex but default is string +>>> errcode $CLI bundle validate -o json --target complex_to_string +Error: variable cluster_key is not of type complex, but the value in the variable file is a complex type + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} + +=== file has variable that is string but default is complex +>>> errcode $CLI bundle validate -o json --target string_to_complex +Error: variable cluster is of type complex, but the value in the variable file is not a complex type + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} + +=== variable is required but it's not provided in the file +>>> errcode $CLI bundle validate -o json --target without_defaults +Error: no value assigned to required variable cluster. Assignment can be done using "--var", by setting the BUNDLE_VAR_cluster environment variable, or in .databricks/bundle//variable-overrides.json file + + +Exit code: 1 +{ + "job_cluster_key": "${var.cluster_key}", + "new_cluster": { + "node_type_id": "${var.cluster.node_type_id}", + "num_workers": "${var.cluster_workers}" + } +} diff --git a/acceptance/bundle/variables/file-defaults/script b/acceptance/bundle/variables/file-defaults/script new file mode 100644 index 000000000..c5b208755 --- /dev/null +++ b/acceptance/bundle/variables/file-defaults/script @@ -0,0 +1,30 @@ +cluster_expr=".resources.jobs.job1.job_clusters[0]" + +# defaults from variable file, see .databricks/bundle//variable-overrides.json + +title "variable file" +trace $CLI bundle validate -o json | jq $cluster_expr + +title "variable file and variable flag" +trace $CLI bundle validate -o json --var="cluster_key=mlops_stacks-cluster-overriden" | jq $cluster_expr + +title "variable file and environment variable" +trace BUNDLE_VAR_cluster_key=mlops_stacks-cluster-overriden $CLI bundle validate -o json | jq $cluster_expr + +title "variable has value in config file" +trace $CLI bundle validate -o json --target with_value | jq $cluster_expr + +# title "file cannot be parsed" +# trace errcode $CLI bundle validate -o json --target invalid_json | jq $cluster_expr + +# title "file has wrong structure" +# trace errcode $CLI bundle validate -o json --target wrong_file_structure | jq $cluster_expr + +title "file has variable that is complex but default is string" +trace errcode $CLI bundle validate -o json --target complex_to_string | jq $cluster_expr + +title "file has variable that is string but default is complex" +trace errcode $CLI bundle validate -o json --target string_to_complex | jq $cluster_expr + +title "variable is required but it's not provided in the file" +trace errcode $CLI bundle validate -o json --target without_defaults | jq $cluster_expr diff --git a/acceptance/bundle/variables/git-branch/databricks.yml b/acceptance/bundle/variables/git-branch/databricks.yml new file mode 100644 index 000000000..7cf210722 --- /dev/null +++ b/acceptance/bundle/variables/git-branch/databricks.yml @@ -0,0 +1,19 @@ +bundle: + name: git + git: + # This is currently not supported + branch: ${var.deployment_branch} + +variables: + deployment_branch: + # By setting deployment_branch to "" we set bundle.git.branch to "" which is the same unsetting it. + # This this should make CLI read branch from git and update bundle.git.branch accordingly. It should + # Also set bundle.git.inferred to true. + default: "" + +targets: + prod: + default: true + dev: + variables: + deployment_branch: dev-branch diff --git a/acceptance/bundle/variables/git-branch/output.txt b/acceptance/bundle/variables/git-branch/output.txt new file mode 100644 index 000000000..d6d824394 --- /dev/null +++ b/acceptance/bundle/variables/git-branch/output.txt @@ -0,0 +1,98 @@ + +>>> $CLI bundle validate -o json +{ + "bundle": { + "environment": "prod", + "git": { + "actual_branch": "main", + "branch": "", + "bundle_root_path": ".", + }, + "name": "git", + "target": "prod", + "terraform": { + "exec_path": "$TMPHOME" + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "deployment_branch": { + "default": "", + "value": "" + } + }, + "workspace": { + "artifact_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/files", + "resource_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/resources", + "root_path": "/Workspace/Users/$USERNAME/.bundle/git/prod", + "state_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/state" + } +} + +>>> $CLI bundle validate +Name: git +Target: prod +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/git/prod + +Validation OK! + +>>> $CLI bundle validate -o json -t dev +{ + "bundle": { + "environment": "dev", + "git": { + "actual_branch": "main", + "branch": "dev-branch", + "bundle_root_path": ".", + }, + "name": "git", + "target": "dev", + "terraform": { + "exec_path": "$TMPHOME" + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "deployment_branch": { + "default": "dev-branch", + "value": "dev-branch" + } + }, + "workspace": { + "artifact_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/files", + "resource_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/resources", + "root_path": "/Workspace/Users/$USERNAME/.bundle/git/dev", + "state_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/state" + } +} + +>>> $CLI bundle validate -t dev +Name: git +Target: dev +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/git/dev + +Validation OK! diff --git a/acceptance/bundle/variables/git-branch/script b/acceptance/bundle/variables/git-branch/script new file mode 100644 index 000000000..aed881f1f --- /dev/null +++ b/acceptance/bundle/variables/git-branch/script @@ -0,0 +1,6 @@ +git-repo-init +trace $CLI bundle validate -o json | grep -v '"commit"' +trace $CLI bundle validate +trace $CLI bundle validate -o json -t dev | grep -v '"commit"' +trace $CLI bundle validate -t dev | grep -v '"commit"' +rm -fr .git diff --git a/acceptance/bundle/variables/host/databricks.yml b/acceptance/bundle/variables/host/databricks.yml new file mode 100644 index 000000000..b25020a1f --- /dev/null +++ b/acceptance/bundle/variables/host/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: host + +variables: + host: + default: https://nonexistent123.staging.cloud.databricks.com + +workspace: + # This is currently not supported + host: ${var.host} diff --git a/acceptance/bundle/variables/host/output.txt b/acceptance/bundle/variables/host/output.txt new file mode 100644 index 000000000..89342908c --- /dev/null +++ b/acceptance/bundle/variables/host/output.txt @@ -0,0 +1,38 @@ + +>>> errcode $CLI bundle validate -o json +Error: failed during request visitor: parse "https://${var.host}": invalid character "{" in host name + +{ + "bundle": { + "environment": "default", + "name": "host", + "target": "default" + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "host": { + "default": "https://nonexistent123.staging.cloud.databricks.com" + } + }, + "workspace": { + "host": "${var.host}" + } +} +Exit code: 1 + +>>> errcode $CLI bundle validate +Error: failed during request visitor: parse "https://${var.host}": invalid character "{" in host name + +Name: host +Target: default +Workspace: + Host: ${var.host} + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/variables/host/script b/acceptance/bundle/variables/host/script new file mode 100644 index 000000000..90e083627 --- /dev/null +++ b/acceptance/bundle/variables/host/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle validate -o json +trace errcode $CLI bundle validate diff --git a/acceptance/bundle/variables/prepend-workspace-var/databricks.yml b/acceptance/bundle/variables/prepend-workspace-var/databricks.yml new file mode 100644 index 000000000..c843752f8 --- /dev/null +++ b/acceptance/bundle/variables/prepend-workspace-var/databricks.yml @@ -0,0 +1,24 @@ +workspace: + profile: profile_name + root_path: ${var.workspace_root}/path/to/root + +variables: + workspace_root: + description: "root directory in the Databricks workspace to store the asset bundle and associated artifacts" + default: /Users/${workspace.current_user.userName} + +targets: + dev: + default: true + prod: + variables: + workspace_root: /Shared + +resources: + jobs: + my_job: + tasks: + - existing_cluster_id: 500 + python_wheel_task: + named_parameters: + conf-file: "${workspace.file_path}/path/to/config.yaml" diff --git a/acceptance/bundle/variables/prepend-workspace-var/output.txt b/acceptance/bundle/variables/prepend-workspace-var/output.txt new file mode 100644 index 000000000..575fac6d4 --- /dev/null +++ b/acceptance/bundle/variables/prepend-workspace-var/output.txt @@ -0,0 +1,67 @@ +/Workspace should be prepended on all paths, but it is not the case: +{ + "bundle": { + "environment": "dev", + "git": { + "bundle_root_path": ".", + "inferred": true + }, + "target": "dev", + "terraform": { + "exec_path": "$TMPHOME" + } + }, + "resources": { + "jobs": { + "my_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Users/$USERNAME/path/to/root/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "existing_cluster_id": "500", + "python_wheel_task": { + "named_parameters": { + "conf-file": "/Users/$USERNAME/path/to/root/files/path/to/config.yaml" + } + }, + "task_key": "" + } + ] + } + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "workspace_root": { + "default": "/Users/$USERNAME", + "description": "root directory in the Databricks workspace to store the asset bundle and associated artifacts", + "value": "/Users/$USERNAME" + } + }, + "workspace": { + "artifact_path": "/Users/$USERNAME/path/to/root/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "/Users/$USERNAME/path/to/root/files", + "profile": "profile_name", + "resource_path": "/Users/$USERNAME/path/to/root/resources", + "root_path": "/Users/$USERNAME/path/to/root", + "state_path": "/Users/$USERNAME/path/to/root/state" + } +} \ No newline at end of file diff --git a/acceptance/bundle/variables/prepend-workspace-var/script b/acceptance/bundle/variables/prepend-workspace-var/script new file mode 100644 index 000000000..de6bc8a17 --- /dev/null +++ b/acceptance/bundle/variables/prepend-workspace-var/script @@ -0,0 +1,2 @@ +echo /Workspace should be prepended on all paths, but it is not the case: #2181 +$CLI bundle validate -o json diff --git a/acceptance/bundle/variables/resolve-builtin/output.txt b/acceptance/bundle/variables/resolve-builtin/output.txt index 2f58abd8a..f060c472e 100644 --- a/acceptance/bundle/variables/resolve-builtin/output.txt +++ b/acceptance/bundle/variables/resolve-builtin/output.txt @@ -1,8 +1,8 @@ { "artifact_path": "TestResolveVariableReferences/bar/artifacts", "current_user": { - "short_name": "tester", - "userName": "tester@databricks.com" + "short_name": "$USERNAME", + "userName": "$USERNAME" }, "file_path": "TestResolveVariableReferences/bar/baz", "resource_path": "TestResolveVariableReferences/bar/resources", diff --git a/acceptance/bundle/variables/resolve-nonstrings/databricks.yml b/acceptance/bundle/variables/resolve-nonstrings/databricks.yml new file mode 100644 index 000000000..a02c78a7e --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/databricks.yml @@ -0,0 +1,23 @@ +bundle: + name: TestResolveVariableReferencesForPrimitiveNonStringFields + +variables: + no_alert_for_canceled_runs: {} + no_alert_for_skipped_runs: {} + min_workers: {} + max_workers: {} + spot_bid_max_price: {} + +resources: + jobs: + job1: + notification_settings: + no_alert_for_canceled_runs: ${var.no_alert_for_canceled_runs} + no_alert_for_skipped_runs: ${var.no_alert_for_skipped_runs} + tasks: + - new_cluster: + autoscale: + min_workers: ${var.min_workers} + max_workers: ${var.max_workers} + azure_attributes: + spot_bid_max_price: ${var.spot_bid_max_price} diff --git a/acceptance/bundle/variables/resolve-nonstrings/output.txt b/acceptance/bundle/variables/resolve-nonstrings/output.txt new file mode 100644 index 000000000..3a1eb9c47 --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/output.txt @@ -0,0 +1,52 @@ +{ + "variables": { + "max_workers": { + "value": "2" + }, + "min_workers": { + "value": "1" + }, + "no_alert_for_canceled_runs": { + "value": "true" + }, + "no_alert_for_skipped_runs": { + "value": "false" + }, + "spot_bid_max_price": { + "value": "0.5" + } + }, + "jobs": { + "job1": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/TestResolveVariableReferencesForPrimitiveNonStringFields/default/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "notification_settings": { + "no_alert_for_canceled_runs": true, + "no_alert_for_skipped_runs": false + }, + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "new_cluster": { + "autoscale": { + "max_workers": 2, + "min_workers": 1 + }, + "azure_attributes": { + "spot_bid_max_price": 0.5 + } + }, + "task_key": "" + } + ] + } + } +} diff --git a/acceptance/bundle/variables/resolve-nonstrings/script b/acceptance/bundle/variables/resolve-nonstrings/script new file mode 100644 index 000000000..cb9e45b61 --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/script @@ -0,0 +1,4 @@ +export BUNDLE_VAR_no_alert_for_skipped_runs=false +export BUNDLE_VAR_max_workers=2 +export BUNDLE_VAR_min_workers=3 # shadowed by --var below +$CLI bundle validate -o json --var no_alert_for_canceled_runs=true --var min_workers=1 --var spot_bid_max_price=0.5 | jq '{ variables, jobs: .resources.jobs }' diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml b/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml new file mode 100644 index 000000000..6a45de330 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml @@ -0,0 +1,9 @@ +bundle: + name: TestResolveVariableReferencesToBundleVariables + +workspace: + root_path: "${bundle.name}/${var.foo}" + +variables: + foo: + value: "bar" diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt b/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt new file mode 100644 index 000000000..c56fbe415 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt @@ -0,0 +1,11 @@ +{ + "artifact_path": "TestResolveVariableReferencesToBundleVariables/bar/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "TestResolveVariableReferencesToBundleVariables/bar/files", + "resource_path": "TestResolveVariableReferencesToBundleVariables/bar/resources", + "root_path": "TestResolveVariableReferencesToBundleVariables/bar", + "state_path": "TestResolveVariableReferencesToBundleVariables/bar/state" +} diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/script b/acceptance/bundle/variables/resolve-vars-in-root-path/script new file mode 100644 index 000000000..fefd9abe6 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .workspace diff --git a/acceptance/bundle/variables/vanilla/output.txt b/acceptance/bundle/variables/vanilla/output.txt index 69b358a3f..e98882bb0 100644 --- a/acceptance/bundle/variables/vanilla/output.txt +++ b/acceptance/bundle/variables/vanilla/output.txt @@ -3,13 +3,13 @@ "abc def" >>> errcode $CLI bundle validate -Error: no value assigned to required variable b. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_b environment variable +Error: no value assigned to required variable b. Assignment can be done using "--var", by setting the BUNDLE_VAR_b environment variable, or in .databricks/bundle//variable-overrides.json file Name: ${var.a} ${var.b} Target: default Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/${var.a} ${var.b}/default + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/${var.a} ${var.b}/default Found 1 error diff --git a/acceptance/bundle/variables/variable_overrides_in_target/output.txt b/acceptance/bundle/variables/variable_overrides_in_target/output.txt index de193f5b6..8998b691d 100644 --- a/acceptance/bundle/variables/variable_overrides_in_target/output.txt +++ b/acceptance/bundle/variables/variable_overrides_in_target/output.txt @@ -12,7 +12,7 @@ "continuous": true, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/use-default-variable-values/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/use-default-variable-values/state/metadata.json" }, "name": "a_string", "permissions": [] @@ -33,7 +33,7 @@ "continuous": true, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-string-variable/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/override-string-variable/state/metadata.json" }, "name": "overridden_string", "permissions": [] @@ -54,7 +54,7 @@ "continuous": true, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-int-variable/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/override-int-variable/state/metadata.json" }, "name": "a_string", "permissions": [] @@ -75,7 +75,7 @@ "continuous": false, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-both-bool-and-string-variables/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/override-both-bool-and-string-variables/state/metadata.json" }, "name": "overridden_string", "permissions": [] diff --git a/acceptance/cmd_server_test.go b/acceptance/cmd_server_test.go new file mode 100644 index 000000000..28feec1bd --- /dev/null +++ b/acceptance/cmd_server_test.go @@ -0,0 +1,73 @@ +package acceptance_test + +import ( + "encoding/json" + "net/http" + "os" + "strings" + "testing" + + "github.com/databricks/cli/internal/testcli" + "github.com/stretchr/testify/require" +) + +func StartCmdServer(t *testing.T) *TestServer { + server := StartServer(t) + server.Handle("/", func(r *http.Request) (any, error) { + q := r.URL.Query() + args := strings.Split(q.Get("args"), " ") + + var env map[string]string + require.NoError(t, json.Unmarshal([]byte(q.Get("env")), &env)) + + for key, val := range env { + defer Setenv(t, key, val)() + } + + defer Chdir(t, q.Get("cwd"))() + + c := testcli.NewRunner(t, r.Context(), args...) + c.Verbose = false + stdout, stderr, err := c.Run() + result := map[string]any{ + "stdout": stdout.String(), + "stderr": stderr.String(), + } + exitcode := 0 + if err != nil { + exitcode = 1 + } + result["exitcode"] = exitcode + return result, nil + }) + return server +} + +// Chdir variant that is intended to be used with defer so that it can switch back before function ends. +// This is unlike testutil.Chdir which switches back only when tests end. +func Chdir(t *testing.T, cwd string) func() { + require.NotEmpty(t, cwd) + prevDir, err := os.Getwd() + require.NoError(t, err) + err = os.Chdir(cwd) + require.NoError(t, err) + return func() { + _ = os.Chdir(prevDir) + } +} + +// Setenv variant that is intended to be used with defer so that it can switch back before function ends. +// This is unlike t.Setenv which switches back only when tests end. +func Setenv(t *testing.T, key, value string) func() { + prevVal, exists := os.LookupEnv(key) + + require.NoError(t, os.Setenv(key, value)) + + return func() { + if exists { + _ = os.Setenv(key, prevVal) + } else { + _ = os.Unsetenv(key) + } + } +} diff --git a/acceptance/config_test.go b/acceptance/config_test.go new file mode 100644 index 000000000..41866c4a7 --- /dev/null +++ b/acceptance/config_test.go @@ -0,0 +1,104 @@ +package acceptance_test + +import ( + "os" + "path/filepath" + "sync" + "testing" + + "github.com/BurntSushi/toml" + "github.com/databricks/cli/libs/testdiff" + "github.com/stretchr/testify/require" +) + +const configFilename = "test.toml" + +var ( + configCache map[string]TestConfig + configMutex sync.Mutex +) + +type TestConfig struct { + // Place to describe what's wrong with this test. Does not affect how the test is run. + Badness string + + // Which OSes the test is enabled on. Each string is compared against runtime.GOOS. + // If absent, default to true. + GOOS map[string]bool + + // List of additional replacements to apply on this test. + // Old is a regexp, New is a replacement expression. + Repls []testdiff.Replacement +} + +// FindConfig finds the closest config file. +func FindConfig(t *testing.T, dir string) (string, bool) { + shared := false + for { + path := filepath.Join(dir, configFilename) + _, err := os.Stat(path) + + if err == nil { + return path, shared + } + + shared = true + + if dir == "" || dir == "." { + break + } + + if os.IsNotExist(err) { + dir = filepath.Dir(dir) + continue + } + + t.Fatalf("Error while reading %s: %s", path, err) + } + + t.Fatal("Config not found: " + configFilename) + return "", shared +} + +// LoadConfig loads the config file. Non-leaf configs are cached. +func LoadConfig(t *testing.T, dir string) (TestConfig, string) { + path, leafConfig := FindConfig(t, dir) + + if leafConfig { + return DoLoadConfig(t, path), path + } + + configMutex.Lock() + defer configMutex.Unlock() + + if configCache == nil { + configCache = make(map[string]TestConfig) + } + + result, ok := configCache[path] + if ok { + return result, path + } + + result = DoLoadConfig(t, path) + configCache[path] = result + return result, path +} + +func DoLoadConfig(t *testing.T, path string) TestConfig { + bytes, err := os.ReadFile(path) + if err != nil { + t.Fatalf("failed to read config: %s", err) + } + + var config TestConfig + meta, err := toml.Decode(string(bytes), &config) + require.NoError(t, err) + + keys := meta.Undecoded() + if len(keys) > 0 { + t.Fatalf("Undecoded keys in %s: %#v", path, keys) + } + + return config +} diff --git a/acceptance/help/output.txt b/acceptance/help/output.txt index ed4a88ce6..18434251d 100644 --- a/acceptance/help/output.txt +++ b/acceptance/help/output.txt @@ -115,7 +115,6 @@ Marketplace Apps apps Apps run directly on a customer’s Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on. - apps Apps run directly on a customer’s Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on. Clean Rooms clean-room-assets Clean room assets are data and code objects — Tables, volumes, and notebooks that are shared with the clean room. diff --git a/acceptance/script.prepare b/acceptance/script.prepare index 3f1bb2acc..b814a1260 100644 --- a/acceptance/script.prepare +++ b/acceptance/script.prepare @@ -1,6 +1,3 @@ -# Prevent CLI from downloading terraform in each test: -export DATABRICKS_TF_EXEC_PATH=/tmp/ - errcode() { # Temporarily disable 'set -e' to prevent the script from exiting on error set +e @@ -9,7 +6,9 @@ errcode() { local exit_code=$? # Re-enable 'set -e' if it was previously set set -e - >&2 printf "\nExit code: $exit_code\n" + if [ $exit_code -ne 0 ]; then + >&2 printf "\nExit code: $exit_code\n" + fi } trace() { @@ -34,3 +33,28 @@ trace() { return $? } + +git-repo-init() { + git init -qb main + git config core.autocrlf false + git config user.name "Tester" + git config user.email "tester@databricks.com" + git add databricks.yml + git commit -qm 'Add databricks.yml' +} + +title() { + local label="$1" + printf "\n=== %s" "$label" +} + +withdir() { + local dir="$1" + shift + local orig_dir="$(pwd)" + cd "$dir" || return $? + "$@" + local exit_code=$? + cd "$orig_dir" || return $? + return $exit_code +} diff --git a/acceptance/selftest/out.hello.txt b/acceptance/selftest/out.hello.txt new file mode 100644 index 000000000..e427984d4 --- /dev/null +++ b/acceptance/selftest/out.hello.txt @@ -0,0 +1 @@ +HELLO diff --git a/acceptance/selftest/output.txt b/acceptance/selftest/output.txt new file mode 100644 index 000000000..9fdfbc1e7 --- /dev/null +++ b/acceptance/selftest/output.txt @@ -0,0 +1,35 @@ +=== Capturing STDERR +>>> python3 -c import sys; sys.stderr.write("STDERR\n") +STDERR + +=== Capturing STDOUT +>>> python3 -c import sys; sys.stderr.write("STDOUT\n") +STDOUT + +=== Capturing exit code +>>> errcode python3 -c raise SystemExit(5) + +Exit code: 5 + +=== Capturing exit code (alt) +>>> python3 -c raise SystemExit(7) + +Exit code: 7 + +=== Capturing pwd +>>> python3 -c import os; print(os.getcwd()) +$TMPDIR + +=== Capturing subdir +>>> mkdir -p subdir/a/b/c + +>>> withdir subdir/a/b/c python3 -c import os; print(os.getcwd()) +$TMPDIR/subdir/a/b/c + +=== Custom output files - everything starting with out is captured and compared +>>> echo HELLO + +=== Custom regex can be specified in [[Repl]] section +1234 +CUSTOM_NUMBER_REGEX +123456 diff --git a/acceptance/selftest/script b/acceptance/selftest/script new file mode 100644 index 000000000..665726167 --- /dev/null +++ b/acceptance/selftest/script @@ -0,0 +1,26 @@ +printf "=== Capturing STDERR" +trace python3 -c 'import sys; sys.stderr.write("STDERR\n")' + +printf "\n=== Capturing STDOUT" +trace python3 -c 'import sys; sys.stderr.write("STDOUT\n")' + +printf "\n=== Capturing exit code" +trace errcode python3 -c 'raise SystemExit(5)' + +printf "\n=== Capturing exit code (alt)" +errcode trace python3 -c 'raise SystemExit(7)' + +printf "\n=== Capturing pwd" +trace python3 -c 'import os; print(os.getcwd())' + +printf "\n=== Capturing subdir" +trace mkdir -p subdir/a/b/c +trace withdir subdir/a/b/c python3 -c 'import os; print(os.getcwd())' + +printf "\n=== Custom output files - everything starting with out is captured and compared" +trace echo HELLO > out.hello.txt + +printf "\n=== Custom regex can be specified in [[Repl]] section\n" +echo 1234 +echo 12345 +echo 123456 diff --git a/acceptance/selftest/test.toml b/acceptance/selftest/test.toml new file mode 100644 index 000000000..9607ec5df --- /dev/null +++ b/acceptance/selftest/test.toml @@ -0,0 +1,20 @@ +# Badness = "Brief description of what's wrong with the test output, if anything" + +#[GOOS] +# Disable on Windows +#windows = false + +# Disable on Mac +#mac = false + +# Disable on Linux +#linux = false + +[[Repls]] +Old = '\b[0-9]{5}\b' +New = "CUSTOM_NUMBER_REGEX" + +[[Repls]] +# Fix path with reverse slashes in the output for Windows. +Old = '\$TMPDIR\\subdir\\a\\b\\c' +New = '$$TMPDIR/subdir/a/b/c' diff --git a/acceptance/server_test.go b/acceptance/server_test.go index 7b21e198f..eb8cbb24a 100644 --- a/acceptance/server_test.go +++ b/acceptance/server_test.go @@ -2,11 +2,11 @@ package acceptance_test import ( "encoding/json" - "net" "net/http" "net/http/httptest" "testing" + "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/workspace" @@ -14,8 +14,7 @@ import ( type TestServer struct { *httptest.Server - Mux *http.ServeMux - Port int + Mux *http.ServeMux } type HandlerFunc func(r *http.Request) (any, error) @@ -23,12 +22,10 @@ type HandlerFunc func(r *http.Request) (any, error) func NewTestServer() *TestServer { mux := http.NewServeMux() server := httptest.NewServer(mux) - port := server.Listener.Addr().(*net.TCPAddr).Port return &TestServer{ Server: server, Mux: mux, - Port: port, } } @@ -71,7 +68,7 @@ func StartServer(t *testing.T) *TestServer { } func AddHandlers(server *TestServer) { - server.Handle("/api/2.0/policies/clusters/list", func(r *http.Request) (any, error) { + server.Handle("GET /api/2.0/policies/clusters/list", func(r *http.Request) (any, error) { return compute.ListPoliciesResponse{ Policies: []compute.Policy{ { @@ -86,7 +83,7 @@ func AddHandlers(server *TestServer) { }, nil }) - server.Handle("/api/2.0/instance-pools/list", func(r *http.Request) (any, error) { + server.Handle("GET /api/2.0/instance-pools/list", func(r *http.Request) (any, error) { return compute.ListInstancePools{ InstancePools: []compute.InstancePoolAndStats{ { @@ -97,7 +94,7 @@ func AddHandlers(server *TestServer) { }, nil }) - server.Handle("/api/2.1/clusters/list", func(r *http.Request) (any, error) { + server.Handle("GET /api/2.1/clusters/list", func(r *http.Request) (any, error) { return compute.ListClustersResponse{ Clusters: []compute.ClusterDetails{ { @@ -112,13 +109,13 @@ func AddHandlers(server *TestServer) { }, nil }) - server.Handle("/api/2.0/preview/scim/v2/Me", func(r *http.Request) (any, error) { + server.Handle("GET /api/2.0/preview/scim/v2/Me", func(r *http.Request) (any, error) { return iam.User{ UserName: "tester@databricks.com", }, nil }) - server.Handle("/api/2.0/workspace/get-status", func(r *http.Request) (any, error) { + server.Handle("GET /api/2.0/workspace/get-status", func(r *http.Request) (any, error) { return workspace.ObjectInfo{ ObjectId: 1001, ObjectType: "DIRECTORY", @@ -126,4 +123,27 @@ func AddHandlers(server *TestServer) { ResourceId: "1001", }, nil }) + + server.Handle("GET /api/2.1/unity-catalog/current-metastore-assignment", func(r *http.Request) (any, error) { + return catalog.MetastoreAssignment{ + DefaultCatalogName: "main", + }, nil + }) + + server.Handle("GET /api/2.0/permissions/directories/1001", func(r *http.Request) (any, error) { + return workspace.WorkspaceObjectPermissions{ + ObjectId: "1001", + ObjectType: "DIRECTORY", + AccessControlList: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "tester@databricks.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + { + PermissionLevel: "CAN_MANAGE", + }, + }, + }, + }, + }, nil + }) } diff --git a/acceptance/test.toml b/acceptance/test.toml new file mode 100644 index 000000000..eee94d0ea --- /dev/null +++ b/acceptance/test.toml @@ -0,0 +1,2 @@ +# If test directory nor any of its parents do not have test.toml then this file serves as fallback configuration. +# The configurations are not merged across parents; the closest one is used fully. diff --git a/bundle/apps/interpolate_variables.go b/bundle/apps/interpolate_variables.go new file mode 100644 index 000000000..f88e7e9db --- /dev/null +++ b/bundle/apps/interpolate_variables.go @@ -0,0 +1,50 @@ +package apps + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/dynvar" +) + +type interpolateVariables struct{} + +func (i *interpolateVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + pattern := dyn.NewPattern( + dyn.Key("resources"), + dyn.Key("apps"), + dyn.AnyKey(), + dyn.Key("config"), + ) + + tfToConfigMap := map[string]string{} + for k, r := range config.SupportedResources() { + tfToConfigMap[r.TerraformResourceName] = k + } + + err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { + return dyn.MapByPattern(root, pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + return dynvar.Resolve(v, func(path dyn.Path) (dyn.Value, error) { + key, ok := tfToConfigMap[path[0].Key()] + if ok { + path = dyn.NewPath(dyn.Key("resources"), dyn.Key(key)).Append(path[1:]...) + } + + return dyn.GetByPath(root, path) + }) + }) + }) + + return diag.FromErr(err) +} + +func (i *interpolateVariables) Name() string { + return "apps.InterpolateVariables" +} + +func InterpolateVariables() bundle.Mutator { + return &interpolateVariables{} +} diff --git a/bundle/apps/interpolate_variables_test.go b/bundle/apps/interpolate_variables_test.go new file mode 100644 index 000000000..b6c424a95 --- /dev/null +++ b/bundle/apps/interpolate_variables_test.go @@ -0,0 +1,49 @@ +package apps + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/require" +) + +func TestAppInterpolateVariables(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "my_app_1": { + App: &apps.App{ + Name: "my_app_1", + }, + Config: map[string]any{ + "command": []string{"echo", "hello"}, + "env": []map[string]string{ + {"name": "JOB_ID", "value": "${databricks_job.my_job.id}"}, + }, + }, + }, + "my_app_2": { + App: &apps.App{ + Name: "my_app_2", + }, + }, + }, + Jobs: map[string]*resources.Job{ + "my_job": { + ID: "123", + }, + }, + }, + }, + } + + diags := bundle.Apply(context.Background(), b, InterpolateVariables()) + require.Empty(t, diags) + require.Equal(t, []any{map[string]any{"name": "JOB_ID", "value": "123"}}, b.Config.Resources.Apps["my_app_1"].Config["env"]) + require.Nil(t, b.Config.Resources.Apps["my_app_2"].Config) +} diff --git a/bundle/apps/slow_deploy_message.go b/bundle/apps/slow_deploy_message.go new file mode 100644 index 000000000..87275980a --- /dev/null +++ b/bundle/apps/slow_deploy_message.go @@ -0,0 +1,29 @@ +package apps + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" +) + +type slowDeployMessage struct{} + +// TODO: needs to be removed when when no_compute option becomes available in TF provider and used in DABs +// See https://github.com/databricks/cli/pull/2144 +func (v *slowDeployMessage) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if len(b.Config.Resources.Apps) > 0 { + cmdio.LogString(ctx, "Note: Databricks apps included in this bundle may increase initial deployment time due to compute provisioning.") + } + + return nil +} + +func (v *slowDeployMessage) Name() string { + return "apps.SlowDeployMessage" +} + +func SlowDeployMessage() bundle.Mutator { + return &slowDeployMessage{} +} diff --git a/bundle/apps/upload_config.go b/bundle/apps/upload_config.go new file mode 100644 index 000000000..5c58c5c6f --- /dev/null +++ b/bundle/apps/upload_config.go @@ -0,0 +1,97 @@ +package apps + +import ( + "bytes" + "context" + "fmt" + "path" + "strings" + "sync" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/deploy" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/filer" + "golang.org/x/sync/errgroup" + + "gopkg.in/yaml.v3" +) + +type uploadConfig struct { + filerFactory deploy.FilerFactory +} + +func (u *uploadConfig) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + errGroup, ctx := errgroup.WithContext(ctx) + + mu := sync.Mutex{} + for key, app := range b.Config.Resources.Apps { + // If the app has a config, we need to deploy it first. + // It means we need to write app.yml file with the content of the config field + // to the remote source code path of the app. + if app.Config != nil { + appPath := strings.TrimPrefix(app.SourceCodePath, b.Config.Workspace.FilePath) + + buf, err := configToYaml(app) + if err != nil { + return diag.FromErr(err) + } + + f, err := u.filerFactory(b) + if err != nil { + return diag.FromErr(err) + } + + errGroup.Go(func() error { + err := f.Write(ctx, path.Join(appPath, "app.yml"), buf, filer.OverwriteIfExists) + if err != nil { + mu.Lock() + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: "Failed to save config", + Detail: fmt.Sprintf("Failed to write %s file: %s", path.Join(app.SourceCodePath, "app.yml"), err), + Locations: b.Config.GetLocations("resources.apps." + key), + }) + mu.Unlock() + } + return nil + }) + } + } + + if err := errGroup.Wait(); err != nil { + return diags.Extend(diag.FromErr(err)) + } + + return diags +} + +// Name implements bundle.Mutator. +func (u *uploadConfig) Name() string { + return "apps:UploadConfig" +} + +func UploadConfig() bundle.Mutator { + return &uploadConfig{ + filerFactory: func(b *bundle.Bundle) (filer.Filer, error) { + return filer.NewWorkspaceFilesClient(b.WorkspaceClient(), b.Config.Workspace.FilePath) + }, + } +} + +func configToYaml(app *resources.App) (*bytes.Buffer, error) { + buf := bytes.NewBuffer(nil) + enc := yaml.NewEncoder(buf) + enc.SetIndent(2) + + err := enc.Encode(app.Config) + defer enc.Close() + + if err != nil { + return nil, fmt.Errorf("failed to encode app config to yaml: %w", err) + } + + return buf, nil +} diff --git a/bundle/apps/upload_config_test.go b/bundle/apps/upload_config_test.go new file mode 100644 index 000000000..a1a6b3afb --- /dev/null +++ b/bundle/apps/upload_config_test.go @@ -0,0 +1,75 @@ +package apps + +import ( + "bytes" + "context" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + mockfiler "github.com/databricks/cli/internal/mocks/libs/filer" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestAppUploadConfig(t *testing.T) { + root := t.TempDir() + err := os.MkdirAll(filepath.Join(root, "my_app"), 0o700) + require.NoError(t, err) + + b := &bundle.Bundle{ + BundleRootPath: root, + SyncRootPath: root, + SyncRoot: vfs.MustNew(root), + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Users/foo@bar.com/", + }, + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "my_app": { + App: &apps.App{ + Name: "my_app", + }, + SourceCodePath: "./my_app", + Config: map[string]any{ + "command": []string{"echo", "hello"}, + "env": []map[string]string{ + {"name": "MY_APP", "value": "my value"}, + }, + }, + }, + }, + }, + }, + } + + mockFiler := mockfiler.NewMockFiler(t) + mockFiler.EXPECT().Write(mock.Anything, "my_app/app.yml", bytes.NewBufferString(`command: + - echo + - hello +env: + - name: MY_APP + value: my value +`), filer.OverwriteIfExists).Return(nil) + + u := uploadConfig{ + filerFactory: func(b *bundle.Bundle) (filer.Filer, error) { + return mockFiler, nil + }, + } + + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(root, "databricks.yml")}}) + + diags := bundle.Apply(context.Background(), b, bundle.Seq(mutator.TranslatePaths(), &u)) + require.NoError(t, diags.Error()) +} diff --git a/bundle/apps/validate.go b/bundle/apps/validate.go new file mode 100644 index 000000000..fc50aeafc --- /dev/null +++ b/bundle/apps/validate.go @@ -0,0 +1,53 @@ +package apps + +import ( + "context" + "fmt" + "path" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +type validate struct{} + +func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + possibleConfigFiles := []string{"app.yml", "app.yaml"} + usedSourceCodePaths := make(map[string]string) + + for key, app := range b.Config.Resources.Apps { + if _, ok := usedSourceCodePaths[app.SourceCodePath]; ok { + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: "Duplicate app source code path", + Detail: fmt.Sprintf("app resource '%s' has the same source code path as app resource '%s', this will lead to the app configuration being overriden by each other", key, usedSourceCodePaths[app.SourceCodePath]), + Locations: b.Config.GetLocations(fmt.Sprintf("resources.apps.%s.source_code_path", key)), + }) + } + usedSourceCodePaths[app.SourceCodePath] = key + + for _, configFile := range possibleConfigFiles { + appPath := strings.TrimPrefix(app.SourceCodePath, b.Config.Workspace.FilePath) + cf := path.Join(appPath, configFile) + if _, err := b.SyncRoot.Stat(cf); err == nil { + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: configFile + " detected", + Detail: fmt.Sprintf("remove %s and use 'config' property for app resource '%s' instead", cf, app.Name), + }) + } + } + } + + return diags +} + +func (v *validate) Name() string { + return "apps.Validate" +} + +func Validate() bundle.Mutator { + return &validate{} +} diff --git a/bundle/apps/validate_test.go b/bundle/apps/validate_test.go new file mode 100644 index 000000000..6c3a88191 --- /dev/null +++ b/bundle/apps/validate_test.go @@ -0,0 +1,97 @@ +package apps + +import ( + "context" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/require" +) + +func TestAppsValidate(t *testing.T) { + tmpDir := t.TempDir() + testutil.Touch(t, tmpDir, "app1", "app.yml") + testutil.Touch(t, tmpDir, "app2", "app.py") + + b := &bundle.Bundle{ + BundleRootPath: tmpDir, + SyncRootPath: tmpDir, + SyncRoot: vfs.MustNew(tmpDir), + Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/foo/bar/", + }, + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "app1": { + App: &apps.App{ + Name: "app1", + }, + SourceCodePath: "./app1", + }, + "app2": { + App: &apps.App{ + Name: "app2", + }, + SourceCodePath: "./app2", + }, + }, + }, + }, + } + + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) + + diags := bundle.Apply(context.Background(), b, bundle.Seq(mutator.TranslatePaths(), Validate())) + require.Len(t, diags, 1) + require.Equal(t, "app.yml detected", diags[0].Summary) + require.Contains(t, diags[0].Detail, "app.yml and use 'config' property for app resource") +} + +func TestAppsValidateSameSourcePath(t *testing.T) { + tmpDir := t.TempDir() + testutil.Touch(t, tmpDir, "app1", "app.py") + + b := &bundle.Bundle{ + BundleRootPath: tmpDir, + SyncRootPath: tmpDir, + SyncRoot: vfs.MustNew(tmpDir), + Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/foo/bar/", + }, + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "app1": { + App: &apps.App{ + Name: "app1", + }, + SourceCodePath: "./app1", + }, + "app2": { + App: &apps.App{ + Name: "app2", + }, + SourceCodePath: "./app1", + }, + }, + }, + }, + } + + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) + + diags := bundle.Apply(context.Background(), b, bundle.Seq(mutator.TranslatePaths(), Validate())) + require.Len(t, diags, 1) + require.Equal(t, "Duplicate app source code path", diags[0].Summary) + require.Contains(t, diags[0].Detail, "has the same source code path as app resource") +} diff --git a/bundle/bundle.go b/bundle/bundle.go index 1f5e2a294..e715b8b2c 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -17,6 +17,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/metadata" + "github.com/databricks/cli/libs/auth" "github.com/databricks/cli/libs/fileset" "github.com/databricks/cli/libs/locker" "github.com/databricks/cli/libs/log" @@ -24,7 +25,6 @@ import ( "github.com/databricks/cli/libs/terraform" "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go" - sdkconfig "github.com/databricks/databricks-sdk-go/config" "github.com/hashicorp/terraform-exec/tfexec" ) @@ -57,6 +57,9 @@ type Bundle struct { // It is loaded from the bundle configuration files and mutators may update it. Config config.Root + // Target stores a snapshot of the Root.Bundle.Target configuration when it was selected by SelectTarget. + Target *config.Target `json:"target_config,omitempty" bundle:"internal"` + // Metadata about the bundle deployment. This is the interface Databricks services // rely on to integrate with bundles when they need additional information about // a bundle deployment. @@ -239,21 +242,5 @@ func (b *Bundle) AuthEnv() (map[string]string, error) { } cfg := b.client.Config - out := make(map[string]string) - for _, attr := range sdkconfig.ConfigAttributes { - // Ignore profile so that downstream tools don't try and reload - // the profile even though we know the current configuration is valid. - if attr.Name == "profile" { - continue - } - if len(attr.EnvVars) == 0 { - continue - } - if attr.IsZero(cfg) { - continue - } - out[attr.EnvVars[0]] = attr.GetString(cfg) - } - - return out, nil + return auth.Env(cfg), nil } diff --git a/bundle/config/generate/app.go b/bundle/config/generate/app.go new file mode 100644 index 000000000..1255d63f8 --- /dev/null +++ b/bundle/config/generate/app.go @@ -0,0 +1,37 @@ +package generate + +import ( + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/databricks-sdk-go/service/apps" +) + +func ConvertAppToValue(app *apps.App, sourceCodePath string, appConfig map[string]any) (dyn.Value, error) { + ac, err := convert.FromTyped(appConfig, dyn.NilValue) + if err != nil { + return dyn.NilValue, err + } + + ar, err := convert.FromTyped(app.Resources, dyn.NilValue) + if err != nil { + return dyn.NilValue, err + } + + // The majority of fields of the app struct are read-only. + // We copy the relevant fields manually. + dv := map[string]dyn.Value{ + "name": dyn.NewValue(app.Name, []dyn.Location{{Line: 1}}), + "description": dyn.NewValue(app.Description, []dyn.Location{{Line: 2}}), + "source_code_path": dyn.NewValue(sourceCodePath, []dyn.Location{{Line: 3}}), + } + + if ac.Kind() != dyn.KindNil { + dv["config"] = ac.WithLocations([]dyn.Location{{Line: 4}}) + } + + if ar.Kind() != dyn.KindNil { + dv["resources"] = ar.WithLocations([]dyn.Location{{Line: 5}}) + } + + return dyn.V(dv), nil +} diff --git a/bundle/config/loader/process_root_includes.go b/bundle/config/loader/process_root_includes.go index c608a3de6..198095742 100644 --- a/bundle/config/loader/process_root_includes.go +++ b/bundle/config/loader/process_root_includes.go @@ -2,6 +2,7 @@ package loader import ( "context" + "fmt" "path/filepath" "slices" "strings" @@ -36,6 +37,7 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag. // Maintain list of files in order of files being loaded. // This is stored in the bundle configuration for observability. var files []string + var diags diag.Diagnostics // For each glob, find all files to load. // Ordering of the list of globs is maintained in the output. @@ -60,7 +62,7 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag. // Filter matches to ones we haven't seen yet. var includes []string - for _, match := range matches { + for i, match := range matches { rel, err := filepath.Rel(b.BundleRootPath, match) if err != nil { return diag.FromErr(err) @@ -69,9 +71,22 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag. continue } seen[rel] = true + if filepath.Ext(rel) != ".yaml" && filepath.Ext(rel) != ".yml" { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Error, + Summary: "Files in the 'include' configuration section must be YAML files.", + Detail: fmt.Sprintf("The file %s in the 'include' configuration section is not a YAML file, and only YAML files are supported. To include files to sync, specify them in the 'sync.include' configuration section instead.", rel), + Locations: b.Config.GetLocations(fmt.Sprintf("include[%d]", i)), + }) + continue + } includes = append(includes, rel) } + if len(diags) > 0 { + return diags + } + // Add matches to list of mutators to return. slices.Sort(includes) files = append(files, includes...) diff --git a/bundle/config/mutator/apply_presets.go b/bundle/config/mutator/apply_presets.go index 59b8547be..b402053e7 100644 --- a/bundle/config/mutator/apply_presets.go +++ b/bundle/config/mutator/apply_presets.go @@ -221,6 +221,8 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos dashboard.DisplayName = prefix + dashboard.DisplayName } + // Apps: No presets + return diags } diff --git a/bundle/config/mutator/apply_source_linked_deployment_preset.go b/bundle/config/mutator/apply_source_linked_deployment_preset.go index 78ccc5322..570ca72cf 100644 --- a/bundle/config/mutator/apply_source_linked_deployment_preset.go +++ b/bundle/config/mutator/apply_source_linked_deployment_preset.go @@ -56,17 +56,34 @@ func (m *applySourceLinkedDeploymentPreset) Apply(ctx context.Context, b *bundle b.Config.Presets.SourceLinkedDeployment = &enabled } - if b.Config.Workspace.FilePath != "" && config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { - path := dyn.NewPath(dyn.Key("targets"), dyn.Key(target), dyn.Key("workspace"), dyn.Key("file_path")) + if len(b.Config.Resources.Apps) > 0 && config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { + path := dyn.NewPath(dyn.Key("targets"), dyn.Key(target), dyn.Key("presets"), dyn.Key("source_linked_deployment")) + diags = diags.Append( + diag.Diagnostic{ + Severity: diag.Error, + Summary: "source-linked deployment is not supported for apps", + Paths: []dyn.Path{ + path, + }, + Locations: b.Config.GetLocations(path[2:].String()), + }, + ) + return diags + } + + // This mutator runs before workspace paths are defaulted so it's safe to check for the user-defined value + if b.Config.Workspace.FilePath != "" && config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { + path := dyn.NewPath(dyn.Key("workspace"), dyn.Key("file_path")) diags = diags.Append( diag.Diagnostic{ Severity: diag.Warning, Summary: "workspace.file_path setting will be ignored in source-linked deployment mode", + Detail: "In source-linked deployment files are not copied to the destination and resources use source files instead", Paths: []dyn.Path{ - path[2:], + path, }, - Locations: b.Config.GetLocations(path[2:].String()), + Locations: b.Config.GetLocations(path.String()), }, ) } diff --git a/bundle/config/mutator/apply_source_linked_deployment_preset_test.go b/bundle/config/mutator/apply_source_linked_deployment_preset_test.go index 1b74fd8e9..42fda8ea7 100644 --- a/bundle/config/mutator/apply_source_linked_deployment_preset_test.go +++ b/bundle/config/mutator/apply_source_linked_deployment_preset_test.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/internal/bundletest" "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/dyn" @@ -31,6 +32,7 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) { initialValue *bool expectedValue *bool expectedWarning string + expectedError string }{ { name: "preset enabled, bundle in Workspace, databricks runtime", @@ -86,6 +88,18 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) { expectedValue: &enabled, expectedWarning: "workspace.file_path setting will be ignored in source-linked deployment mode", }, + { + name: "preset enabled, apps is defined by user", + ctx: dbr.MockRuntime(testContext, true), + mutateBundle: func(b *bundle.Bundle) { + b.Config.Resources.Apps = map[string]*resources.App{ + "app": {}, + } + }, + initialValue: &enabled, + expectedValue: &enabled, + expectedError: "source-linked deployment is not supported for apps", + }, } for _, tt := range tests { @@ -107,7 +121,7 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) { bundletest.SetLocation(b, "workspace.file_path", []dyn.Location{{File: "databricks.yml"}}) diags := bundle.Apply(tt.ctx, b, mutator.ApplySourceLinkedDeploymentPreset()) - if diags.HasError() { + if diags.HasError() && tt.expectedError == "" { t.Fatalf("unexpected error: %v", diags) } @@ -116,6 +130,11 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) { require.NotEmpty(t, diags[0].Locations) } + if tt.expectedError != "" { + require.Equal(t, tt.expectedError, diags[0].Summary) + require.NotEmpty(t, diags[0].Locations) + } + require.Equal(t, tt.expectedValue, b.Config.Presets.SourceLinkedDeployment) }) } diff --git a/bundle/config/mutator/capture_schema_dependency.go b/bundle/config/mutator/capture_schema_dependency.go new file mode 100644 index 000000000..5025c9a0d --- /dev/null +++ b/bundle/config/mutator/capture_schema_dependency.go @@ -0,0 +1,100 @@ +package mutator + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" +) + +type captureSchemaDependency struct{} + +// If a user defines a UC schema in the bundle, they can refer to it in DLT pipelines +// or UC Volumes using the `${resources.schemas..name}` syntax. Using this +// syntax allows TF to capture the deploy time dependency this DLT pipeline or UC Volume +// has on the schema and deploy changes to the schema before deploying the pipeline or volume. +// +// This mutator translates any implicit schema references in DLT pipelines or UC Volumes +// to the explicit syntax. +func CaptureSchemaDependency() bundle.Mutator { + return &captureSchemaDependency{} +} + +func (m *captureSchemaDependency) Name() string { + return "CaptureSchemaDependency" +} + +func schemaNameRef(key string) string { + return fmt.Sprintf("${resources.schemas.%s.name}", key) +} + +func findSchema(b *bundle.Bundle, catalogName, schemaName string) (string, *resources.Schema) { + if catalogName == "" || schemaName == "" { + return "", nil + } + + for k, s := range b.Config.Resources.Schemas { + if s != nil && s.CreateSchema != nil && s.CatalogName == catalogName && s.Name == schemaName { + return k, s + } + } + return "", nil +} + +func resolveVolume(v *resources.Volume, b *bundle.Bundle) { + if v == nil || v.CreateVolumeRequestContent == nil { + return + } + schemaK, schema := findSchema(b, v.CatalogName, v.SchemaName) + if schema == nil { + return + } + + v.SchemaName = schemaNameRef(schemaK) +} + +func resolvePipelineSchema(p *resources.Pipeline, b *bundle.Bundle) { + if p == nil || p.PipelineSpec == nil { + return + } + if p.Schema == "" { + return + } + schemaK, schema := findSchema(b, p.Catalog, p.Schema) + if schema == nil { + return + } + + p.Schema = schemaNameRef(schemaK) +} + +func resolvePipelineTarget(p *resources.Pipeline, b *bundle.Bundle) { + if p == nil || p.PipelineSpec == nil { + return + } + if p.Target == "" { + return + } + schemaK, schema := findSchema(b, p.Catalog, p.Target) + if schema == nil { + return + } + p.Target = schemaNameRef(schemaK) +} + +func (m *captureSchemaDependency) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + for _, p := range b.Config.Resources.Pipelines { + // "schema" and "target" have the same semantics in the DLT API but are mutually + // exclusive i.e. only one can be set at a time. If schema is set, the pipeline + // is in direct publishing mode and can write tables to multiple schemas + // (vs target which is limited to a single schema). + resolvePipelineTarget(p, b) + resolvePipelineSchema(p, b) + } + for _, v := range b.Config.Resources.Volumes { + resolveVolume(v, b) + } + return nil +} diff --git a/bundle/config/mutator/capture_schema_dependency_test.go b/bundle/config/mutator/capture_schema_dependency_test.go new file mode 100644 index 000000000..0a94e7748 --- /dev/null +++ b/bundle/config/mutator/capture_schema_dependency_test.go @@ -0,0 +1,277 @@ +package mutator + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCaptureSchemaDependencyForVolume(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Volumes: map[string]*resources.Volume{ + "volume1": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "foobar", + }, + }, + "volume2": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog2", + SchemaName: "foobar", + }, + }, + "volume3": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "barfoo", + }, + }, + "volume4": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalogX", + SchemaName: "foobar", + }, + }, + "volume5": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "schemaX", + }, + }, + "nilVolume": nil, + "emptyVolume": {}, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Volumes["volume1"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Volumes["volume2"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Volumes["volume3"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "foobar", b.Config.Resources.Volumes["volume4"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "schemaX", b.Config.Resources.Volumes["volume5"].CreateVolumeRequestContent.SchemaName) + + assert.Nil(t, b.Config.Resources.Volumes["nilVolume"]) + assert.Nil(t, b.Config.Resources.Volumes["emptyVolume"].CreateVolumeRequestContent) +} + +func TestCaptureSchemaDependencyForPipelinesWithTarget(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "foobar", + }, + }, + "pipeline2": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog2", + Schema: "foobar", + }, + }, + "pipeline3": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "barfoo", + }, + }, + "pipeline4": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalogX", + Schema: "foobar", + }, + }, + "pipeline5": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "schemaX", + }, + }, + "pipeline6": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Schema: "foobar", + }, + }, + "pipeline7": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Schema: "", + Name: "whatever", + }, + }, + "nilPipeline": nil, + "emptyPipeline": {}, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Pipelines["pipeline1"].Schema) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Pipelines["pipeline2"].Schema) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Pipelines["pipeline3"].Schema) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline4"].Schema) + assert.Equal(t, "schemaX", b.Config.Resources.Pipelines["pipeline5"].Schema) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline6"].Schema) + assert.Equal(t, "", b.Config.Resources.Pipelines["pipeline7"].Schema) + + assert.Nil(t, b.Config.Resources.Pipelines["nilPipeline"]) + assert.Nil(t, b.Config.Resources.Pipelines["emptyPipeline"].PipelineSpec) + + for _, k := range []string{"pipeline1", "pipeline2", "pipeline3", "pipeline4", "pipeline5", "pipeline6", "pipeline7"} { + assert.Empty(t, b.Config.Resources.Pipelines[k].Target) + } +} + +func TestCaptureSchemaDependencyForPipelinesWithSchema(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "foobar", + }, + }, + "pipeline2": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog2", + Target: "foobar", + }, + }, + "pipeline3": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "barfoo", + }, + }, + "pipeline4": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalogX", + Target: "foobar", + }, + }, + "pipeline5": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "schemaX", + }, + }, + "pipeline6": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Target: "foobar", + }, + }, + "pipeline7": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Target: "", + Name: "whatever", + }, + }, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Pipelines["pipeline1"].Target) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Pipelines["pipeline2"].Target) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Pipelines["pipeline3"].Target) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline4"].Target) + assert.Equal(t, "schemaX", b.Config.Resources.Pipelines["pipeline5"].Target) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline6"].Target) + assert.Equal(t, "", b.Config.Resources.Pipelines["pipeline7"].Target) + + for _, k := range []string{"pipeline1", "pipeline2", "pipeline3", "pipeline4", "pipeline5", "pipeline6", "pipeline7"} { + assert.Empty(t, b.Config.Resources.Pipelines[k].Schema) + } +} diff --git a/bundle/config/mutator/load_git_details.go b/bundle/config/mutator/load_git_details.go index 5c263ac03..3661c6bcd 100644 --- a/bundle/config/mutator/load_git_details.go +++ b/bundle/config/mutator/load_git_details.go @@ -32,7 +32,7 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn } if info.WorktreeRoot == "" { - b.WorktreeRoot = b.BundleRoot + b.WorktreeRoot = b.SyncRoot } else { b.WorktreeRoot = vfs.MustNew(info.WorktreeRoot) } diff --git a/bundle/config/mutator/merge_apps.go b/bundle/config/mutator/merge_apps.go new file mode 100644 index 000000000..d91e8dd7f --- /dev/null +++ b/bundle/config/mutator/merge_apps.go @@ -0,0 +1,45 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/merge" +) + +type mergeApps struct{} + +func MergeApps() bundle.Mutator { + return &mergeApps{} +} + +func (m *mergeApps) Name() string { + return "MergeApps" +} + +func (m *mergeApps) resourceName(v dyn.Value) string { + switch v.Kind() { + case dyn.KindInvalid, dyn.KindNil: + return "" + case dyn.KindString: + return v.MustString() + default: + panic("app name must be a string") + } +} + +func (m *mergeApps) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + if v.Kind() == dyn.KindNil { + return v, nil + } + + return dyn.Map(v, "resources.apps", dyn.Foreach(func(_ dyn.Path, app dyn.Value) (dyn.Value, error) { + return dyn.Map(app, "resources", merge.ElementsByKeyWithOverride("name", m.resourceName)) + })) + }) + + return diag.FromErr(err) +} diff --git a/bundle/config/mutator/merge_apps_test.go b/bundle/config/mutator/merge_apps_test.go new file mode 100644 index 000000000..0a161b845 --- /dev/null +++ b/bundle/config/mutator/merge_apps_test.go @@ -0,0 +1,73 @@ +package mutator_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/assert" +) + +func TestMergeApps(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "foo": { + App: &apps.App{ + Name: "foo", + Resources: []apps.AppResource{ + { + Name: "job1", + Job: &apps.AppResourceJob{ + Id: "1234", + Permission: "CAN_MANAGE_RUN", + }, + }, + { + Name: "sql1", + SqlWarehouse: &apps.AppResourceSqlWarehouse{ + Id: "5678", + Permission: "CAN_USE", + }, + }, + { + Name: "job1", + Job: &apps.AppResourceJob{ + Id: "1234", + Permission: "CAN_MANAGE", + }, + }, + { + Name: "sql1", + Job: &apps.AppResourceJob{ + Id: "9876", + Permission: "CAN_MANAGE", + }, + }, + }, + }, + }, + }, + }, + }, + } + + diags := bundle.Apply(context.Background(), b, mutator.MergeApps()) + assert.NoError(t, diags.Error()) + + j := b.Config.Resources.Apps["foo"] + + assert.Len(t, j.Resources, 2) + assert.Equal(t, "job1", j.Resources[0].Name) + assert.Equal(t, "sql1", j.Resources[1].Name) + + assert.Equal(t, "CAN_MANAGE", string(j.Resources[0].Job.Permission)) + + assert.Nil(t, j.Resources[1].SqlWarehouse) + assert.Equal(t, "CAN_MANAGE", string(j.Resources[1].Job.Permission)) +} diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index 9f8fd318d..f175566c3 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -197,6 +197,10 @@ func isRunAsSet(r config.Resources) bool { return true } +func isExplicitRootSet(b *bundle.Bundle) bool { + return b.Target != nil && b.Target.Workspace != nil && b.Target.Workspace.RootPath != "" +} + func (m *processTargetMode) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { switch b.Config.Bundle.Mode { case config.Development: diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index d46e64895..9872b4eb0 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/cli/libs/tags" "github.com/databricks/cli/libs/vfs" sdkconfig "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/service/apps" "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/dashboards" @@ -142,6 +143,13 @@ func mockBundle(mode config.Mode) *bundle.Bundle { }, }, }, + Apps: map[string]*resources.App{ + "app1": { + App: &apps.App{ + Name: "app1", + }, + }, + }, }, }, SyncRoot: vfs.MustNew("/Users/lennart.kats@databricks.com"), @@ -376,6 +384,23 @@ func TestProcessTargetModeProductionOkForPrincipal(t *testing.T) { require.NoError(t, diags.Error()) } +func TestProcessTargetModeProductionOkWithRootPath(t *testing.T) { + b := mockBundle(config.Production) + + // Our target has all kinds of problems when not using service principals ... + diags := validateProductionMode(context.Background(), b, false) + require.Error(t, diags.Error()) + + // ... but we're okay if we specify a root path + b.Target = &config.Target{ + Workspace: &config.Workspace{ + RootPath: "some-root-path", + }, + } + diags = validateProductionMode(context.Background(), b, false) + require.NoError(t, diags.Error()) +} + // Make sure that we have test coverage for all resource types func TestAllResourcesMocked(t *testing.T) { b := mockBundle(config.Development) @@ -417,6 +442,13 @@ func TestAllNonUcResourcesAreRenamed(t *testing.T) { for _, key := range field.MapKeys() { resource := field.MapIndex(key) nameField := resource.Elem().FieldByName("Name") + resourceType := resources.Type().Field(i).Name + + // Skip apps, as they are not renamed + if resourceType == "Apps" { + continue + } + if !nameField.IsValid() || nameField.Kind() != reflect.String { continue } diff --git a/bundle/config/mutator/python/python_diagnostics.go b/bundle/config/mutator/python/python_diagnostics.go index 12822065b..7a1e13b4e 100644 --- a/bundle/config/mutator/python/python_diagnostics.go +++ b/bundle/config/mutator/python/python_diagnostics.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/libs/dyn" ) +// pythonDiagnostic is a single entry in diagnostics.json type pythonDiagnostic struct { Severity pythonSeverity `json:"severity"` Summary string `json:"summary"` diff --git a/bundle/config/mutator/python/python_locations.go b/bundle/config/mutator/python/python_locations.go new file mode 100644 index 000000000..2fa86bea0 --- /dev/null +++ b/bundle/config/mutator/python/python_locations.go @@ -0,0 +1,194 @@ +package python + +import ( + "encoding/json" + "fmt" + "io" + "path/filepath" + + "github.com/databricks/cli/libs/dyn" +) + +// generatedFileName is used as the virtual file name for YAML generated by Python code. +// +// mergePythonLocations replaces dyn.Location with generatedFileName with locations loaded +// from locations.json +const generatedFileName = "__generated_by_python__.yml" + +// pythonLocations is data structure for efficient location lookup for a given path +// +// Locations form a tree, and we assign locations of the closest ancestor to each dyn.Value based on its path. +// We implement it as a trie (prefix tree) where keys are components of the path. With that, lookups are O(n) +// where n is the number of components in the path. +// +// For example, with locations.json: +// +// {"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5} +// {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5} +// {"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7} +// +// - resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5 +// +// - resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5, +// because we use the location of the job as the most precise approximation. +// +// See pythonLocationEntry for the structure of a single entry in locations.json +type pythonLocations struct { + // descendants referenced by index, e.g. '.foo' + keys map[string]*pythonLocations + + // descendants referenced by key, e.g. '[0]' + indexes map[int]*pythonLocations + + // location for the current node if it exists + location dyn.Location + + // if true, location is present + exists bool +} + +// pythonLocationEntry is a single entry in locations.json +type pythonLocationEntry struct { + Path string `json:"path"` + File string `json:"file"` + Line int `json:"line"` + Column int `json:"column"` +} + +// mergePythonLocations applies locations from Python mutator into given dyn.Value +// +// The primary use-case is to merge locations.json with output.json, so that any +// validation errors will point to Python source code instead of generated YAML. +func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) { + return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) { + newLocation, ok := findPythonLocation(locations, path) + if !ok { + return value, nil + } + + // The first item in the list is the "last" location used for error reporting + // + // Loaded YAML uses virtual file path as location, we remove any of such references, + // because they should use 'newLocation' instead. + // + // We preserve any previous non-virtual locations in case when Python function modified + // resource defined in YAML. + newLocations := append( + []dyn.Location{newLocation}, + removeVirtualLocations(value.Locations())..., + ) + + return value.WithLocations(newLocations), nil + }) +} + +func removeVirtualLocations(locations []dyn.Location) []dyn.Location { + var newLocations []dyn.Location + + for _, location := range locations { + if filepath.Base(location.File) == generatedFileName { + continue + } + + newLocations = append(newLocations, location) + } + + return newLocations +} + +// parsePythonLocations parses locations.json from the Python mutator. +// +// locations file is newline-separated JSON objects with pythonLocationEntry structure. +func parsePythonLocations(input io.Reader) (*pythonLocations, error) { + decoder := json.NewDecoder(input) + locations := newPythonLocations() + + for decoder.More() { + var entry pythonLocationEntry + + err := decoder.Decode(&entry) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + path, err := dyn.NewPathFromString(entry.Path) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + location := dyn.Location{ + File: entry.File, + Line: entry.Line, + Column: entry.Column, + } + + putPythonLocation(locations, path, location) + } + + return locations, nil +} + +// putPythonLocation puts the location to the trie for the given path +func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) { + currentNode := trie + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + currentNode.keys[key] = newPythonLocations() + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + currentNode.indexes[index] = newPythonLocations() + } + + currentNode = currentNode.indexes[index] + } + } + + currentNode.location = location + currentNode.exists = true +} + +// newPythonLocations creates a new trie node +func newPythonLocations() *pythonLocations { + return &pythonLocations{ + keys: make(map[string]*pythonLocations), + indexes: make(map[int]*pythonLocations), + } +} + +// findPythonLocation finds the location or closest ancestor location in the trie for the given path +// if no ancestor or exact location is found, false is returned. +func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) { + currentNode := locations + lastLocation := locations.location + exists := locations.exists + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + break + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + break + } + + currentNode = currentNode.indexes[index] + } + + if currentNode.exists { + lastLocation = currentNode.location + exists = true + } + } + + return lastLocation, exists +} diff --git a/bundle/config/mutator/python/python_locations_test.go b/bundle/config/mutator/python/python_locations_test.go new file mode 100644 index 000000000..32afcc92b --- /dev/null +++ b/bundle/config/mutator/python/python_locations_test.go @@ -0,0 +1,179 @@ +package python + +import ( + "bytes" + "path/filepath" + "testing" + + "github.com/databricks/cli/libs/diag" + "github.com/stretchr/testify/require" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestMergeLocations(t *testing.T) { + pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1} + generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1} + yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation) + + input := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + "baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}), + "qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}), + }, + []dyn.Location{}, + ), + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + expected := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + // pythonLocation is appended to the beginning of the list if absent + "baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + // generatedLocation is replaced by pythonLocation + "qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + }, + []dyn.Location{pythonLocation}, + ), + // if location is unknown, we keep it as-is + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + actual, err := mergePythonLocations(input, locations) + + assert.NoError(t, err) + assert.Equal(t, expected, actual) +} + +func TestFindLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_indexPathComponent(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]")) + + assert.True(t, exists) + assert.Equal(t, location2, actual) +} + +func TestFindLocation_closestAncestorLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_unknownLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + _, exists := findPythonLocation(locations, dyn.MustPathFromString("bar")) + + assert.False(t, exists) +} + +func TestLoadOutput(t *testing.T) { + location := dyn.Location{File: "my_job.py", Line: 1, Column: 1} + bundleRoot := t.TempDir() + output := `{ + "resources": { + "jobs": { + "my_job": { + "name": "my_job", + "tasks": [ + { + "task_key": "my_task", + "notebook_task": { + "notebook_path": "my_notebook" + } + } + ] + } + } + } + }` + + locations := newPythonLocations() + putPythonLocation( + locations, + dyn.MustPathFromString("resources.jobs.my_job"), + location, + ) + + value, diags := loadOutput( + bundleRoot, + bytes.NewReader([]byte(output)), + locations, + ) + + assert.Equal(t, diag.Diagnostics{}, diags) + + name, err := dyn.Get(value, "resources.jobs.my_job.name") + require.NoError(t, err) + require.Equal(t, []dyn.Location{location}, name.Locations()) + + // until we implement path normalization, we have to keep locations of values + // that change semantic depending on their location + // + // note: it's important to have absolute path including 'bundleRoot' + // because mutator pipeline already has expanded locations into absolute path + notebookPath, err := dyn.Get(value, "resources.jobs.my_job.tasks[0].notebook_task.notebook_path") + require.NoError(t, err) + require.Len(t, notebookPath.Locations(), 1) + require.Equal(t, filepath.Join(bundleRoot, generatedFileName), notebookPath.Locations()[0].File) +} + +func TestParsePythonLocations(t *testing.T) { + expected := dyn.Location{File: "foo.py", Line: 1, Column: 2} + + input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}` + reader := bytes.NewReader([]byte(input)) + locations, err := parsePythonLocations(reader) + + assert.NoError(t, err) + + assert.True(t, locations.keys["foo"].exists) + assert.Equal(t, expected, locations.keys["foo"].location) +} diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index 8009ab243..cd2e286e5 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -7,11 +7,14 @@ import ( "errors" "fmt" "io" + "io/fs" "os" "path/filepath" "reflect" "strings" + "github.com/databricks/cli/bundle/config/mutator/paths" + "github.com/databricks/databricks-sdk-go/logger" "github.com/fatih/color" @@ -124,6 +127,15 @@ type opts struct { enabled bool venvPath string + + loadLocations bool +} + +type runPythonMutatorOpts struct { + cacheDir string + bundleRootPath string + pythonPath string + loadLocations bool } // getOpts adapts deprecated PyDABs and upcoming Python configuration @@ -148,8 +160,9 @@ func getOpts(b *bundle.Bundle, phase phase) (opts, error) { // don't execute for phases for 'python' section if phase == PythonMutatorPhaseInit || phase == PythonMutatorPhaseLoad { return opts{ - enabled: true, - venvPath: experimental.PyDABs.VEnvPath, + enabled: true, + venvPath: experimental.PyDABs.VEnvPath, + loadLocations: false, // not supported in PyDABs }, nil } else { return opts{}, nil @@ -158,8 +171,9 @@ func getOpts(b *bundle.Bundle, phase phase) (opts, error) { // don't execute for phases for 'pydabs' section if phase == PythonMutatorPhaseLoadResources || phase == PythonMutatorPhaseApplyMutators { return opts{ - enabled: true, - venvPath: experimental.Python.VEnvPath, + enabled: true, + venvPath: experimental.Python.VEnvPath, + loadLocations: true, }, nil } else { return opts{}, nil @@ -194,7 +208,12 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err) } - rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.BundleRootPath, pythonPath, leftRoot) + rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{ + cacheDir: cacheDir, + bundleRootPath: b.BundleRootPath, + pythonPath: pythonPath, + loadLocations: opts.loadLocations, + }) mutateDiags = diags if diags.HasError() { return dyn.InvalidValue, mutateDiagsHasError @@ -238,13 +257,14 @@ func createCacheDir(ctx context.Context) (string, error) { return os.MkdirTemp("", "-python") } -func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) { - inputPath := filepath.Join(cacheDir, "input.json") - outputPath := filepath.Join(cacheDir, "output.json") - diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") +func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) { + inputPath := filepath.Join(opts.cacheDir, "input.json") + outputPath := filepath.Join(opts.cacheDir, "output.json") + diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json") + locationsPath := filepath.Join(opts.cacheDir, "locations.json") args := []string{ - pythonPath, + opts.pythonPath, "-m", "databricks.bundles.build", "--phase", @@ -257,6 +277,10 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath diagnosticsPath, } + if opts.loadLocations { + args = append(args, "--locations", locationsPath) + } + if err := writeInputFile(inputPath, root); err != nil { return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) } @@ -271,7 +295,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath _, processErr := process.Background( ctx, args, - process.WithDir(rootPath), + process.WithDir(opts.bundleRootPath), process.WithStderrWriter(stderrWriter), process.WithStdoutWriter(stdoutWriter), ) @@ -307,7 +331,12 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) } - output, outputDiags := loadOutputFile(rootPath, outputPath) + locations, err := loadLocationsFile(locationsPath) + if err != nil { + return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err) + } + + output, outputDiags := loadOutputFile(opts.bundleRootPath, outputPath, locations) pythonDiagnostics = pythonDiagnostics.Extend(outputDiags) // we pass through pythonDiagnostic because it contains warnings @@ -351,7 +380,21 @@ func writeInputFile(inputPath string, input dyn.Value) error { return os.WriteFile(inputPath, rootConfigJson, 0o600) } -func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) { +// loadLocationsFile loads locations.json containing source locations for generated YAML. +func loadLocationsFile(locationsPath string) (*pythonLocations, error) { + locationsFile, err := os.Open(locationsPath) + if errors.Is(err, fs.ErrNotExist) { + return newPythonLocations(), nil + } else if err != nil { + return nil, fmt.Errorf("failed to open locations file: %w", err) + } + + defer locationsFile.Close() + + return parsePythonLocations(locationsFile) +} + +func loadOutputFile(rootPath, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) { outputFile, err := os.Open(outputPath) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err)) @@ -359,15 +402,19 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) { defer outputFile.Close() + return loadOutput(rootPath, outputFile, locations) +} + +func loadOutput(rootPath string, outputFile io.Reader, locations *pythonLocations) (dyn.Value, diag.Diagnostics) { // we need absolute path because later parts of pipeline assume all paths are absolute // and this file will be used as location to resolve relative paths. // - // virtualPath has to stay in rootPath, because locations outside root path are not allowed: + // virtualPath has to stay in bundleRootPath, because locations outside root path are not allowed: // // Error: path /var/folders/.../python/dist/*.whl is not contained in bundle root path // // for that, we pass virtualPath instead of outputPath as file location - virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_python__.yml")) + virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName)) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err)) } @@ -377,7 +424,29 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err)) } - return strictNormalize(config.Root{}, generated) + // paths are resolved relative to locations of their values, if we change location + // we have to update each path, until we simplify that, we don't update locations + // for such values, so we don't change how paths are resolved + // + // we can remove this once we: + // - add variable interpolation before and after PythonMutator + // - implement path normalization (aka path normal form) + _, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) { + putPythonLocation(locations, p, v.Location()) + return v, nil + }) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + // generated has dyn.Location as if it comes from generated YAML file + // earlier we loaded locations.json with source locations in Python code + generatedWithLocations, err := mergePythonLocations(generated, locations) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + return strictNormalize(config.Root{}, generatedWithLocations) } func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) { diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index d51572c8a..322fb79e8 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -7,7 +7,6 @@ import ( "os" "os/exec" "path/filepath" - "reflect" "runtime" "testing" @@ -93,6 +92,8 @@ func TestPythonMutator_loadResources(t *testing.T) { } }`, `{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`, + `{"path": "resources.jobs.job0", "file": "src/examples/job0.py", "line": 3, "column": 5} + {"path": "resources.jobs.job1", "file": "src/examples/job1.py", "line": 5, "column": 7}`, ) mutator := PythonMutator(PythonMutatorPhaseLoadResources) @@ -110,6 +111,25 @@ func TestPythonMutator_loadResources(t *testing.T) { assert.Equal(t, "job_1", job1.Name) } + // output of locations.json should be applied to underlying dyn.Value + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + name1, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job1.name")) + if err != nil { + return dyn.InvalidValue, err + } + + assert.Equal(t, []dyn.Location{ + { + File: "src/examples/job1.py", + Line: 5, + Column: 7, + }, + }, name1.Locations()) + + return v, nil + }) + assert.NoError(t, err) + assert.Equal(t, 1, len(diags)) assert.Equal(t, "job doesn't have any tasks", diags[0].Summary) assert.Equal(t, []dyn.Location{ @@ -157,7 +177,7 @@ func TestPythonMutator_loadResources_disallowed(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseLoadResources) diag := bundle.Apply(ctx, b, mutator) @@ -202,7 +222,7 @@ func TestPythonMutator_applyMutators(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseApplyMutators) diag := bundle.Apply(ctx, b, mutator) @@ -224,7 +244,7 @@ func TestPythonMutator_applyMutators(t *testing.T) { description, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job0.description")) require.NoError(t, err) - expectedVirtualPath, err := filepath.Abs("__generated_by_python__.yml") + expectedVirtualPath, err := filepath.Abs(generatedFileName) require.NoError(t, err) assert.Equal(t, expectedVirtualPath, description.Location().File) @@ -263,7 +283,7 @@ func TestPythonMutator_badOutput(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseLoadResources) diag := bundle.Apply(ctx, b, mutator) @@ -312,7 +332,7 @@ func TestGetOps_Python(t *testing.T) { }, PythonMutatorPhaseLoadResources) assert.NoError(t, err) - assert.Equal(t, opts{venvPath: ".venv", enabled: true}, actual) + assert.Equal(t, opts{venvPath: ".venv", enabled: true, loadLocations: true}, actual) } func TestGetOps_PyDABs(t *testing.T) { @@ -328,7 +348,7 @@ func TestGetOps_PyDABs(t *testing.T) { }, PythonMutatorPhaseInit) assert.NoError(t, err) - assert.Equal(t, opts{venvPath: ".venv", enabled: true}, actual) + assert.Equal(t, opts{venvPath: ".venv", enabled: true, loadLocations: false}, actual) } func TestGetOps_empty(t *testing.T) { @@ -661,7 +681,7 @@ or activate the environment before running CLI commands: assert.Equal(t, expected, out) } -func withProcessStub(t *testing.T, args []string, output, diagnostics string) context.Context { +func withProcessStub(t *testing.T, args []string, output, diagnostics, locations string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) @@ -673,32 +693,51 @@ func withProcessStub(t *testing.T, args []string, output, diagnostics string) co inputPath := filepath.Join(cacheDir, "input.json") outputPath := filepath.Join(cacheDir, "output.json") + locationsPath := filepath.Join(cacheDir, "locations.json") diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") - args = append(args, "--input", inputPath) - args = append(args, "--output", outputPath) - args = append(args, "--diagnostics", diagnosticsPath) - stub.WithCallback(func(actual *exec.Cmd) error { _, err := os.Stat(inputPath) assert.NoError(t, err) - if reflect.DeepEqual(actual.Args, args) { - err := os.WriteFile(outputPath, []byte(output), 0o600) - require.NoError(t, err) + actualInputPath := getArg(actual.Args, "--input") + actualOutputPath := getArg(actual.Args, "--output") + actualDiagnosticsPath := getArg(actual.Args, "--diagnostics") + actualLocationsPath := getArg(actual.Args, "--locations") - err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600) - require.NoError(t, err) + require.Equal(t, inputPath, actualInputPath) + require.Equal(t, outputPath, actualOutputPath) + require.Equal(t, diagnosticsPath, actualDiagnosticsPath) - return nil - } else { - return fmt.Errorf("unexpected command: %v", actual.Args) + // locations is an optional argument + if locations != "" { + require.Equal(t, locationsPath, actualLocationsPath) + + err = os.WriteFile(locationsPath, []byte(locations), 0o600) + require.NoError(t, err) } + + err = os.WriteFile(outputPath, []byte(output), 0o600) + require.NoError(t, err) + + err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600) + require.NoError(t, err) + + return nil }) return ctx } +func getArg(args []string, name string) string { + for i := range args { + if args[i] == name { + return args[i+1] + } + } + return "" +} + func loadYaml(name, content string) *bundle.Bundle { v, diag := config.LoadFromBytes(name, []byte(content)) diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 7ad3dfd8d..9aa93791f 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -3,6 +3,7 @@ package mutator import ( "context" "errors" + "fmt" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" @@ -13,15 +14,37 @@ import ( "github.com/databricks/cli/libs/dyn/dynvar" ) +/* +For pathological cases, output and time grow exponentially. + +On my laptop, timings for acceptance/bundle/variables/complex-cycle: +rounds time + + 9 0.10s + 10 0.13s + 11 0.27s + 12 0.68s + 13 1.98s + 14 6.28s + 15 21.70s + 16 78.16s +*/ +const maxResolutionRounds = 11 + type resolveVariableReferences struct { - prefixes []string - pattern dyn.Pattern - lookupFn func(dyn.Value, dyn.Path, *bundle.Bundle) (dyn.Value, error) - skipFn func(dyn.Value) bool + prefixes []string + pattern dyn.Pattern + lookupFn func(dyn.Value, dyn.Path, *bundle.Bundle) (dyn.Value, error) + skipFn func(dyn.Value) bool + extraRounds int } func ResolveVariableReferences(prefixes ...string) bundle.Mutator { - return &resolveVariableReferences{prefixes: prefixes, lookupFn: lookup} + return &resolveVariableReferences{ + prefixes: prefixes, + lookupFn: lookup, + extraRounds: maxResolutionRounds - 1, + } } func ResolveVariableReferencesInLookup() bundle.Mutator { @@ -32,19 +55,6 @@ func ResolveVariableReferencesInLookup() bundle.Mutator { }, pattern: dyn.NewPattern(dyn.Key("variables"), dyn.AnyKey(), dyn.Key("lookup")), lookupFn: lookupForVariables} } -func ResolveVariableReferencesInComplexVariables() bundle.Mutator { - return &resolveVariableReferences{ - prefixes: []string{ - "bundle", - "workspace", - "variables", - }, - pattern: dyn.NewPattern(dyn.Key("variables"), dyn.AnyKey(), dyn.Key("value")), - lookupFn: lookupForComplexVariables, - skipFn: skipResolvingInNonComplexVariables, - } -} - func lookup(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { if path.String() == "workspace.file_path" { @@ -57,38 +67,6 @@ func lookup(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { return dyn.GetByPath(v, path) } -func lookupForComplexVariables(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { - if path[0].Key() != "variables" { - return lookup(v, path, b) - } - - varV, err := dyn.GetByPath(v, path[:len(path)-1]) - if err != nil { - return dyn.InvalidValue, err - } - - var vv variable.Variable - err = convert.ToTyped(&vv, varV) - if err != nil { - return dyn.InvalidValue, err - } - - if vv.Type == variable.VariableTypeComplex { - return dyn.InvalidValue, errors.New("complex variables cannot contain references to another complex variables") - } - - return lookup(v, path, b) -} - -func skipResolvingInNonComplexVariables(v dyn.Value) bool { - switch v.Kind() { - case dyn.KindMap, dyn.KindSequence: - return false - default: - return true - } -} - func lookupForVariables(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { if path[0].Key() != "variables" { return lookup(v, path, b) @@ -131,7 +109,36 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) varPath := dyn.NewPath(dyn.Key("var")) var diags diag.Diagnostics + maxRounds := 1 + m.extraRounds + for round := range maxRounds { + hasUpdates, newDiags := m.resolveOnce(b, prefixes, varPath) + + diags = diags.Extend(newDiags) + + if diags.HasError() { + break + } + + if !hasUpdates { + break + } + + if round >= maxRounds-1 { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: fmt.Sprintf("Detected unresolved variables after %d resolution rounds", round+1), + // Would be nice to include names of the variables there, but that would complicate things more + }) + break + } + } + return diags +} + +func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn.Path, varPath dyn.Path) (bool, diag.Diagnostics) { + var diags diag.Diagnostics + hasUpdates := false err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { // Synthesize a copy of the root that has all fields that are present in the type // but not set in the dynamic value set to their corresponding empty value. @@ -174,6 +181,7 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) if m.skipFn != nil && m.skipFn(v) { return dyn.InvalidValue, dynvar.ErrSkipResolution } + hasUpdates = true return m.lookupFn(normalized, path, b) } } @@ -194,5 +202,6 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) if err != nil { diags = diags.Extend(diag.FromErr(err)) } - return diags + + return hasUpdates, diags } diff --git a/bundle/config/mutator/resolve_variable_references_test.go b/bundle/config/mutator/resolve_variable_references_test.go index 18bb022aa..44f6c8dbb 100644 --- a/bundle/config/mutator/resolve_variable_references_test.go +++ b/bundle/config/mutator/resolve_variable_references_test.go @@ -7,321 +7,10 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/bundle/config/variable" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func TestResolveVariableReferencesToBundleVariables(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Workspace: config.Workspace{ - RootPath: "${bundle.name}/${var.foo}", - }, - Variables: map[string]*variable.Variable{ - "foo": { - Value: "bar", - }, - }, - }, - } - - // Apply with a valid prefix. This should change the workspace root path. - diags := bundle.Apply(context.Background(), b, ResolveVariableReferences("bundle", "variables")) - require.NoError(t, diags.Error()) - require.Equal(t, "example/bar", b.Config.Workspace.RootPath) -} - -func TestResolveVariableReferencesForPrimitiveNonStringFields(t *testing.T) { - var diags diag.Diagnostics - - b := &bundle.Bundle{ - Config: config.Root{ - Variables: map[string]*variable.Variable{ - "no_alert_for_canceled_runs": {}, - "no_alert_for_skipped_runs": {}, - "min_workers": {}, - "max_workers": {}, - "spot_bid_max_price": {}, - }, - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - NotificationSettings: &jobs.JobNotificationSettings{ - NoAlertForCanceledRuns: false, - NoAlertForSkippedRuns: false, - }, - Tasks: []jobs.Task{ - { - NewCluster: &compute.ClusterSpec{ - Autoscale: &compute.AutoScale{ - MinWorkers: 0, - MaxWorkers: 0, - }, - AzureAttributes: &compute.AzureAttributes{ - SpotBidMaxPrice: 0.0, - }, - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Initialize the variables. - diags = bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.InitializeVariables([]string{ - "no_alert_for_canceled_runs=true", - "no_alert_for_skipped_runs=true", - "min_workers=1", - "max_workers=2", - "spot_bid_max_price=0.5", - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - // Assign the variables to the dynamic configuration. - diags = bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - // Set the notification settings. - p = dyn.MustPathFromString("resources.jobs.job1.notification_settings") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("no_alert_for_canceled_runs")), dyn.V("${var.no_alert_for_canceled_runs}")) - require.NoError(t, err) - v, err = dyn.SetByPath(v, p.Append(dyn.Key("no_alert_for_skipped_runs")), dyn.V("${var.no_alert_for_skipped_runs}")) - require.NoError(t, err) - - // Set the min and max workers. - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0].new_cluster.autoscale") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("min_workers")), dyn.V("${var.min_workers}")) - require.NoError(t, err) - v, err = dyn.SetByPath(v, p.Append(dyn.Key("max_workers")), dyn.V("${var.max_workers}")) - require.NoError(t, err) - - // Set the spot bid max price. - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0].new_cluster.azure_attributes") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("spot_bid_max_price")), dyn.V("${var.spot_bid_max_price}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - // Apply for the variable prefix. This should resolve the variables to their values. - diags = bundle.Apply(context.Background(), b, ResolveVariableReferences("variables")) - require.NoError(t, diags.Error()) - assert.True(t, b.Config.Resources.Jobs["job1"].JobSettings.NotificationSettings.NoAlertForCanceledRuns) - assert.True(t, b.Config.Resources.Jobs["job1"].JobSettings.NotificationSettings.NoAlertForSkippedRuns) - assert.Equal(t, 1, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.Autoscale.MinWorkers) - assert.Equal(t, 2, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.Autoscale.MaxWorkers) - assert.InDelta(t, 0.5, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.AzureAttributes.SpotBidMaxPrice, 0.0001) -} - -func TestResolveComplexVariable(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "cluster": { - Value: map[string]any{ - "node_type_id": "Standard_DS3_v2", - "num_workers": 2, - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - JobClusters: []jobs.JobCluster{ - { - NewCluster: compute.ClusterSpec{ - NodeTypeId: "random", - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.job_clusters[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("new_cluster")), dyn.V("${var.cluster}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, ResolveVariableReferences("bundle", "workspace", "variables")) - require.NoError(t, diags.Error()) - require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["job1"].JobSettings.JobClusters[0].NewCluster.NodeTypeId) - require.Equal(t, 2, b.Config.Resources.Jobs["job1"].JobSettings.JobClusters[0].NewCluster.NumWorkers) -} - -func TestResolveComplexVariableReferencesWithComplexVariablesError(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "cluster": { - Value: map[string]any{ - "node_type_id": "Standard_DS3_v2", - "num_workers": 2, - "spark_conf": "${var.spark_conf}", - }, - Type: variable.VariableTypeComplex, - }, - "spark_conf": { - Value: map[string]any{ - "spark.executor.memory": "4g", - "spark.executor.cores": "2", - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - JobClusters: []jobs.JobCluster{ - { - NewCluster: compute.ClusterSpec{ - NodeTypeId: "random", - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.job_clusters[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("new_cluster")), dyn.V("${var.cluster}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, bundle.Seq(ResolveVariableReferencesInComplexVariables(), ResolveVariableReferences("bundle", "workspace", "variables"))) - require.ErrorContains(t, diags.Error(), "complex variables cannot contain references to another complex variables") -} - -func TestResolveComplexVariableWithVarReference(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "package_version": { - Value: "1.0.0", - }, - "cluster_libraries": { - Value: [](map[string]any){ - { - "pypi": map[string]string{ - "package": "cicd_template==${var.package_version}", - }, - }, - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - Tasks: []jobs.Task{ - { - Libraries: []compute.Library{}, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("libraries")), dyn.V("${var.cluster_libraries}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, bundle.Seq( - ResolveVariableReferencesInComplexVariables(), - ResolveVariableReferences("bundle", "workspace", "variables"), - )) - require.NoError(t, diags.Error()) - require.Equal(t, "cicd_template==1.0.0", b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].Libraries[0].Pypi.Package) -} - func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { testCases := []struct { enabled bool diff --git a/bundle/config/mutator/run_as.go b/bundle/config/mutator/run_as.go index 7ffd782c2..3d7391b01 100644 --- a/bundle/config/mutator/run_as.go +++ b/bundle/config/mutator/run_as.go @@ -119,6 +119,16 @@ func validateRunAs(b *bundle.Bundle) diag.Diagnostics { )) } + // Apps do not support run_as in the API. + if len(b.Config.Resources.Apps) > 0 { + diags = diags.Extend(reportRunAsNotSupported( + "apps", + b.Config.GetLocation("resources.apps"), + b.Config.Workspace.CurrentUser.UserName, + identity, + )) + } + return diags } diff --git a/bundle/config/mutator/run_as_test.go b/bundle/config/mutator/run_as_test.go index dbf4bf806..650b65d61 100644 --- a/bundle/config/mutator/run_as_test.go +++ b/bundle/config/mutator/run_as_test.go @@ -32,6 +32,7 @@ func allResourceTypes(t *testing.T) []string { // the dyn library gives us the correct list of all resources supported. Please // also update this check when adding a new resource require.Equal(t, []string{ + "apps", "clusters", "dashboards", "experiments", @@ -104,47 +105,47 @@ func TestRunAsWorksForAllowedResources(t *testing.T) { } } -func TestRunAsErrorForUnsupportedResources(t *testing.T) { - // Bundle "run_as" has two modes of operation, each with a different set of - // resources that are supported. - // Cases: - // 1. When the bundle "run_as" identity is same as the current deployment - // identity. In this case all resources are supported. - // 2. When the bundle "run_as" identity is different from the current - // deployment identity. In this case only a subset of resources are - // supported. This subset of resources are defined in the allow list below. - // - // To be a part of the allow list, the resource must satisfy one of the following - // two conditions: - // 1. The resource supports setting a run_as identity to a different user - // from the owner/creator of the resource. For example, jobs. - // 2. Run as semantics do not apply to the resource. We do not plan to add - // platform side support for `run_as` for these resources. For example, - // experiments or registered models. - // - // Any resource that is not on the allow list cannot be used when the bundle - // run_as is different from the current deployment user. "bundle validate" must - // return an error if such a resource has been defined, and the run_as identity - // is different from the current deployment identity. - // - // Action Item: If you are adding a new resource to DABs, please check in with - // the relevant owning team whether the resource should be on the allow list or (implicitly) on - // the deny list. Any resources that could have run_as semantics in the future - // should be on the deny list. - // For example: Teams for pipelines, model serving endpoints or Lakeview dashboards - // are planning to add platform side support for `run_as` for these resources at - // some point in the future. These resources are (implicitly) on the deny list, since - // they are not on the allow list below. - allowList := []string{ - "clusters", - "jobs", - "models", - "registered_models", - "experiments", - "schemas", - "volumes", - } +// Bundle "run_as" has two modes of operation, each with a different set of +// resources that are supported. +// Cases: +// 1. When the bundle "run_as" identity is same as the current deployment +// identity. In this case all resources are supported. +// 2. When the bundle "run_as" identity is different from the current +// deployment identity. In this case only a subset of resources are +// supported. This subset of resources are defined in the allow list below. +// +// To be a part of the allow list, the resource must satisfy one of the following +// two conditions: +// 1. The resource supports setting a run_as identity to a different user +// from the owner/creator of the resource. For example, jobs. +// 2. Run as semantics do not apply to the resource. We do not plan to add +// platform side support for `run_as` for these resources. For example, +// experiments or registered models. +// +// Any resource that is not on the allow list cannot be used when the bundle +// run_as is different from the current deployment user. "bundle validate" must +// return an error if such a resource has been defined, and the run_as identity +// is different from the current deployment identity. +// +// Action Item: If you are adding a new resource to DABs, please check in with +// the relevant owning team whether the resource should be on the allow list or (implicitly) on +// the deny list. Any resources that could have run_as semantics in the future +// should be on the deny list. +// For example: Teams for pipelines, model serving endpoints or Lakeview dashboards +// are planning to add platform side support for `run_as` for these resources at +// some point in the future. These resources are (implicitly) on the deny list, since +// they are not on the allow list below. +var allowList = []string{ + "clusters", + "jobs", + "models", + "registered_models", + "experiments", + "schemas", + "volumes", +} +func TestRunAsErrorForUnsupportedResources(t *testing.T) { base := config.Root{ Workspace: config.Workspace{ CurrentUser: &config.User{ @@ -197,3 +198,54 @@ func TestRunAsErrorForUnsupportedResources(t *testing.T) { "See https://docs.databricks.com/dev-tools/bundles/run-as.html to learn more about the run_as property.", rt) } } + +func TestRunAsNoErrorForSupportedResources(t *testing.T) { + base := config.Root{ + Workspace: config.Workspace{ + CurrentUser: &config.User{ + User: &iam.User{ + UserName: "alice", + }, + }, + }, + RunAs: &jobs.JobRunAs{ + UserName: "bob", + }, + } + + v, err := convert.FromTyped(base, dyn.NilValue) + require.NoError(t, err) + + // Define top level resources key in the bundle configuration. + // This is not part of the typed configuration, so we need to add it manually. + v, err = dyn.Set(v, "resources", dyn.V(map[string]dyn.Value{})) + require.NoError(t, err) + + for _, rt := range allResourceTypes(t) { + // Skip unsupported resources + if !slices.Contains(allowList, rt) { + continue + } + + // Add an instance of the resource type that is not on the allow list to + // the bundle configuration. + nv, err := dyn.SetByPath(v, dyn.NewPath(dyn.Key("resources"), dyn.Key(rt)), dyn.V(map[string]dyn.Value{ + "foo": dyn.V(map[string]dyn.Value{ + "name": dyn.V("bar"), + }), + })) + require.NoError(t, err) + + // Get back typed configuration from the newly created invalid bundle configuration. + r := &config.Root{} + err = convert.ToTyped(r, nv) + require.NoError(t, err) + + // Assert this configuration passes validation. + b := &bundle.Bundle{ + Config: *r, + } + diags := bundle.Apply(context.Background(), b, SetRunAs()) + require.NoError(t, diags.Error()) + } +} diff --git a/bundle/config/mutator/select_target.go b/bundle/config/mutator/select_target.go index 178686b6e..ce18da4f5 100644 --- a/bundle/config/mutator/select_target.go +++ b/bundle/config/mutator/select_target.go @@ -15,6 +15,7 @@ type selectTarget struct { } // SelectTarget merges the specified target into the root configuration. +// After merging, it removes the 'Targets' section from the configuration. func SelectTarget(name string) bundle.Mutator { return &selectTarget{ name: name, @@ -31,7 +32,7 @@ func (m *selectTarget) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnosti } // Get specified target - _, ok := b.Config.Targets[m.name] + target, ok := b.Config.Targets[m.name] if !ok { return diag.Errorf("%s: no such target. Available targets: %s", m.name, strings.Join(maps.Keys(b.Config.Targets), ", ")) } @@ -43,13 +44,15 @@ func (m *selectTarget) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnosti } // Store specified target in configuration for reference. + b.Target = target b.Config.Bundle.Target = m.name // We do this for backward compatibility. // TODO: remove when Environments section is not supported anymore. b.Config.Bundle.Environment = b.Config.Bundle.Target - // Clear targets after loading. + // Cleanup the original targets and environments sections since they + // show up in the JSON output of the 'summary' and 'validate' commands. b.Config.Targets = nil b.Config.Environments = nil diff --git a/bundle/config/mutator/set_variables.go b/bundle/config/mutator/set_variables.go index 9e9f2dcfe..ac2f660a9 100644 --- a/bundle/config/mutator/set_variables.go +++ b/bundle/config/mutator/set_variables.go @@ -3,11 +3,14 @@ package mutator import ( "context" "fmt" + "os" + "path/filepath" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/variable" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/jsonloader" "github.com/databricks/cli/libs/env" ) @@ -23,7 +26,11 @@ func (m *setVariables) Name() string { return "SetVariables" } -func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, name string) (dyn.Value, error) { +func getDefaultVariableFilePath(target string) string { + return ".databricks/bundle/" + target + "/variable-overrides.json" +} + +func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, name string, fileDefault dyn.Value) (dyn.Value, error) { // case: variable already has value initialized, so skip if variable.HasValue() { return v, nil @@ -49,6 +56,26 @@ func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, return v, nil } + // case: Set the variable to the default value from the variable file + if fileDefault.Kind() != dyn.KindInvalid && fileDefault.Kind() != dyn.KindNil { + hasComplexType := variable.IsComplex() + hasComplexValue := fileDefault.Kind() == dyn.KindMap || fileDefault.Kind() == dyn.KindSequence + + if hasComplexType && !hasComplexValue { + return dyn.InvalidValue, fmt.Errorf(`variable %s is of type complex, but the value in the variable file is not a complex type`, name) + } + if !hasComplexType && hasComplexValue { + return dyn.InvalidValue, fmt.Errorf(`variable %s is not of type complex, but the value in the variable file is a complex type`, name) + } + + v, err := dyn.Set(v, "value", fileDefault) + if err != nil { + return dyn.InvalidValue, fmt.Errorf(`failed to assign default value from variable file to variable %s with error: %v`, name, err) + } + + return v, nil + } + // case: Set the variable to its default value if variable.HasDefault() { vDefault, err := dyn.Get(v, "default") @@ -64,10 +91,43 @@ func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, } // We should have had a value to set for the variable at this point. - return dyn.InvalidValue, fmt.Errorf(`no value assigned to required variable %s. Assignment can be done through the "--var" flag or by setting the %s environment variable`, name, bundleVarPrefix+name) + return dyn.InvalidValue, fmt.Errorf(`no value assigned to required variable %s. Assignment can be done using "--var", by setting the %s environment variable, or in %s file`, name, bundleVarPrefix+name, getDefaultVariableFilePath("")) +} + +func readVariablesFromFile(b *bundle.Bundle) (dyn.Value, diag.Diagnostics) { + var diags diag.Diagnostics + + filePath := filepath.Join(b.BundleRootPath, getDefaultVariableFilePath(b.Config.Bundle.Target)) + if _, err := os.Stat(filePath); err != nil { + return dyn.InvalidValue, nil + } + + f, err := os.ReadFile(filePath) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to read variables file: %w", err)) + } + + val, err := jsonloader.LoadJSON(f, filePath) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse variables file %s: %w", filePath, err)) + } + + if val.Kind() != dyn.KindMap { + return dyn.InvalidValue, diags.Append(diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("failed to parse variables file %s: invalid format", filePath), + Detail: "Variables file must be a JSON object with the following format:\n{\"var1\": \"value1\", \"var2\": \"value2\"}", + }) + } + + return val, nil } func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + defaults, diags := readVariablesFromFile(b) + if diags.HasError() { + return diags + } err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { return dyn.Map(v, "variables", dyn.Foreach(func(p dyn.Path, variable dyn.Value) (dyn.Value, error) { name := p[1].Key() @@ -76,9 +136,10 @@ func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos return dyn.InvalidValue, fmt.Errorf(`variable "%s" is not defined`, name) } - return setVariable(ctx, variable, v, name) + fileDefault, _ := dyn.Get(defaults, name) + return setVariable(ctx, variable, v, name, fileDefault) })) }) - return diag.FromErr(err) + return diags.Extend(diag.FromErr(err)) } diff --git a/bundle/config/mutator/set_variables_test.go b/bundle/config/mutator/set_variables_test.go index 07a5c8214..d904d5be3 100644 --- a/bundle/config/mutator/set_variables_test.go +++ b/bundle/config/mutator/set_variables_test.go @@ -25,7 +25,7 @@ func TestSetVariableFromProcessEnvVar(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) @@ -43,7 +43,7 @@ func TestSetVariableUsingDefaultValue(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) @@ -65,7 +65,7 @@ func TestSetVariableWhenAlreadyAValueIsAssigned(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) @@ -90,7 +90,7 @@ func TestSetVariableEnvVarValueDoesNotOverridePresetValue(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - v, err = setVariable(context.Background(), v, &variable, "foo") + v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) require.NoError(t, err) err = convert.ToTyped(&variable, v) @@ -107,8 +107,8 @@ func TestSetVariablesErrorsIfAValueCouldNotBeResolved(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - _, err = setVariable(context.Background(), v, &variable, "foo") - assert.ErrorContains(t, err, "no value assigned to required variable foo. Assignment can be done through the \"--var\" flag or by setting the BUNDLE_VAR_foo environment variable") + _, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) + assert.ErrorContains(t, err, "no value assigned to required variable foo. Assignment can be done using \"--var\", by setting the BUNDLE_VAR_foo environment variable, or in .databricks/bundle//variable-overrides.json file") } func TestSetVariablesMutator(t *testing.T) { @@ -157,6 +157,6 @@ func TestSetComplexVariablesViaEnvVariablesIsNotAllowed(t *testing.T) { v, err := convert.FromTyped(variable, dyn.NilValue) require.NoError(t, err) - _, err = setVariable(context.Background(), v, &variable, "foo") + _, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue) assert.ErrorContains(t, err, "setting via environment variables (BUNDLE_VAR_foo) is not supported for complex variable foo") } diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index af0f94120..1eda578fa 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -6,6 +6,7 @@ import ( "fmt" "io/fs" "net/url" + "os" "path" "path/filepath" "strings" @@ -17,6 +18,47 @@ import ( "github.com/databricks/cli/libs/notebook" ) +// TranslateMode specifies how a path should be translated. +type TranslateMode int + +const ( + // TranslateModeNotebook translates a path to a remote notebook. + TranslateModeNotebook TranslateMode = iota + + // TranslateModeFile translates a path to a remote regular file. + TranslateModeFile + + // TranslateModeDirectory translates a path to a remote directory. + TranslateModeDirectory + + // TranslateModeLocalAbsoluteFile translates a path to the local absolute file path. + // It returns an error if the path does not exist or is a directory. + TranslateModeLocalAbsoluteFile + + // TranslateModeLocalAbsoluteDirectory translates a path to the local absolute directory path. + // It returns an error if the path does not exist or is not a directory. + TranslateModeLocalAbsoluteDirectory + + // TranslateModeLocalRelative translates a path to be relative to the bundle sync root path. + // It does not check if the path exists, nor care if it is a file or directory. + TranslateModeLocalRelative + + // TranslateModeLocalRelativeWithPrefix translates a path to be relative to the bundle sync root path. + // It a "./" prefix to the path if it does not already have one. + // This allows for disambiguating between paths and PyPI package names. + TranslateModeLocalRelativeWithPrefix +) + +// translateOptions control path translation behavior. +type translateOptions struct { + // Mode specifies how the path should be translated. + Mode TranslateMode + + // AllowPathOutsideSyncRoot can be set for paths that are not tied to the sync root path. + // This is the case for artifact paths, for example. + AllowPathOutsideSyncRoot bool +} + type ErrIsNotebook struct { path string } @@ -44,8 +86,6 @@ func (m *translatePaths) Name() string { return "TranslatePaths" } -type rewriteFunc func(literal, localFullPath, localRelPath, remotePath string) (string, error) - // translateContext is a context for rewriting paths in a config. // It is freshly instantiated on every mutator apply call. // It provides access to the underlying bundle object such that @@ -56,77 +96,97 @@ type translateContext struct { // seen is a map of local paths to their corresponding remote paths. // If a local path has already been successfully resolved, we do not need to resolve it again. seen map[string]string + + // remoteRoot is the root path of the remote workspace. + // It is equal to ${workspace.file_path} for regular deployments. + // It points to the source root path for source-linked deployments. + remoteRoot string } // rewritePath converts a given relative path from the loaded config to a new path based on the passed rewriting function // // It takes these arguments: -// - The argument `dir` is the directory relative to which the given relative path is. -// - The given relative path is both passed and written back through `*p`. -// - The argument `fn` is a function that performs the actual rewriting logic. -// This logic is different between regular files or notebooks. +// - The context in which the function is called. +// - The argument `dir` is the directory relative to which the relative path should be interpreted. +// - The argument `input` is the relative path to rewrite. +// - The argument `opts` is a struct that specifies how the path should be rewritten. +// It contains a `Mode` field that specifies how the path should be rewritten. // -// The function returns an error if it is impossible to rewrite the given relative path. +// The function returns the rewritten path if successful, or an error if the path could not be rewritten. +// The returned path is an empty string if the path was not rewritten. func (t *translateContext) rewritePath( + ctx context.Context, dir string, - p *string, - fn rewriteFunc, -) error { + input string, + opts translateOptions, +) (string, error) { // We assume absolute paths point to a location in the workspace - if path.IsAbs(*p) { - return nil + if path.IsAbs(input) { + return "", nil } - url, err := url.Parse(*p) + url, err := url.Parse(input) if err != nil { - return err + return "", err } // If the file path has scheme, it's a full path and we don't need to transform it if url.Scheme != "" { - return nil + return "", nil } // Local path is relative to the directory the resource was defined in. - localPath := filepath.Join(dir, filepath.FromSlash(*p)) + localPath := filepath.Join(dir, input) if interp, ok := t.seen[localPath]; ok { - *p = interp - return nil + return interp, nil } // Local path must be contained in the sync root. // If it isn't, it won't be synchronized into the workspace. localRelPath, err := filepath.Rel(t.b.SyncRootPath, localPath) if err != nil { - return err + return "", err } - if strings.HasPrefix(localRelPath, "..") { - return fmt.Errorf("path %s is not contained in sync root path", localPath) + if !opts.AllowPathOutsideSyncRoot && !filepath.IsLocal(localRelPath) { + return "", fmt.Errorf("path %s is not contained in sync root path", localPath) } - var workspacePath string - if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { - workspacePath = t.b.SyncRootPath - } else { - workspacePath = t.b.Config.Workspace.FilePath - } - remotePath := path.Join(workspacePath, filepath.ToSlash(localRelPath)) + // Normalize paths to separated by forward slashes. + localPath = filepath.ToSlash(localPath) + localRelPath = filepath.ToSlash(localRelPath) // Convert local path into workspace path via specified function. - interp, err := fn(*p, localPath, localRelPath, remotePath) + var interp string + switch opts.Mode { + case TranslateModeNotebook: + interp, err = t.translateNotebookPath(ctx, input, localPath, localRelPath) + case TranslateModeFile: + interp, err = t.translateFilePath(ctx, input, localPath, localRelPath) + case TranslateModeDirectory: + interp, err = t.translateDirectoryPath(ctx, input, localPath, localRelPath) + case TranslateModeLocalAbsoluteFile: + interp, err = t.translateLocalAbsoluteFilePath(ctx, input, localPath, localRelPath) + case TranslateModeLocalAbsoluteDirectory: + interp, err = t.translateLocalAbsoluteDirectoryPath(ctx, input, localPath, localRelPath) + case TranslateModeLocalRelative: + interp, err = t.translateLocalRelativePath(ctx, input, localPath, localRelPath) + case TranslateModeLocalRelativeWithPrefix: + interp, err = t.translateLocalRelativeWithPrefixPath(ctx, input, localPath, localRelPath) + default: + return "", fmt.Errorf("unsupported translate mode: %d", opts.Mode) + } if err != nil { - return err + return "", err } - *p = interp t.seen[localPath] = interp - return nil + return interp, nil } -func (t *translateContext) translateNotebookPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) +func (t *translateContext) translateNotebookPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, localRelPath) if errors.Is(err, fs.ErrNotExist) { - if filepath.Ext(localFullPath) != notebook.ExtensionNone { + if path.Ext(localFullPath) != notebook.ExtensionNone { return "", fmt.Errorf("notebook %s not found", literal) } @@ -142,7 +202,7 @@ func (t *translateContext) translateNotebookPath(literal, localFullPath, localRe // way we can provide a more targeted error message. for _, ext := range extensions { literalWithExt := literal + ext - localRelPathWithExt := filepath.ToSlash(localRelPath + ext) + localRelPathWithExt := localRelPath + ext if _, err := fs.Stat(t.b.SyncRoot, localRelPathWithExt); err == nil { return "", fmt.Errorf(`notebook %s not found. Did you mean %s? Local notebook references are expected to contain one of the following @@ -162,45 +222,42 @@ to contain one of the following file extensions: [%s]`, literal, strings.Join(ex } // Upon import, notebooks are stripped of their extension. - return strings.TrimSuffix(remotePath, filepath.Ext(localFullPath)), nil + localRelPathNoExt := strings.TrimSuffix(localRelPath, path.Ext(localRelPath)) + return path.Join(t.remoteRoot, localRelPathNoExt), nil } -func (t *translateContext) translateFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) +func (t *translateContext) translateFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, localRelPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", filepath.FromSlash(localFullPath), err) } if nb { return "", ErrIsNotebook{localFullPath} } - return remotePath, nil + return path.Join(t.remoteRoot, localRelPath), nil } -func (t *translateContext) translateDirectoryPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) +func (t *translateContext) translateDirectoryPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + info, err := t.b.SyncRoot.Stat(localRelPath) if err != nil { return "", err } if !info.IsDir() { - return "", fmt.Errorf("%s is not a directory", localFullPath) + return "", fmt.Errorf("%s is not a directory", filepath.FromSlash(localFullPath)) } - return remotePath, nil + return path.Join(t.remoteRoot, localRelPath), nil } -func (t *translateContext) translateNoOp(literal, localFullPath, localRelPath, remotePath string) (string, error) { - return localRelPath, nil -} - -func (t *translateContext) retainLocalAbsoluteFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) +func (t *translateContext) translateLocalAbsoluteFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + info, err := t.b.SyncRoot.Stat(localRelPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is a file: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is a file: %w", filepath.FromSlash(localFullPath), err) } if info.IsDir() { return "", fmt.Errorf("expected %s to be a file but found a directory", literal) @@ -208,16 +265,33 @@ func (t *translateContext) retainLocalAbsoluteFilePath(literal, localFullPath, l return localFullPath, nil } -func (t *translateContext) translateNoOpWithPrefix(literal, localFullPath, localRelPath, remotePath string) (string, error) { +func (t *translateContext) translateLocalAbsoluteDirectoryPath(ctx context.Context, literal, localFullPath, _ string) (string, error) { + info, err := os.Stat(filepath.FromSlash(localFullPath)) + if errors.Is(err, fs.ErrNotExist) { + return "", fmt.Errorf("directory %s not found", literal) + } + if err != nil { + return "", fmt.Errorf("unable to determine if %s is a directory: %w", filepath.FromSlash(localFullPath), err) + } + if !info.IsDir() { + return "", fmt.Errorf("expected %s to be a directory but found a file", literal) + } + return localFullPath, nil +} + +func (t *translateContext) translateLocalRelativePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { + return localRelPath, nil +} + +func (t *translateContext) translateLocalRelativeWithPrefixPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { if !strings.HasPrefix(localRelPath, ".") { - localRelPath = "." + string(filepath.Separator) + localRelPath + localRelPath = "./" + localRelPath } return localRelPath, nil } -func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc, dir string) (dyn.Value, error) { - out := v.MustString() - err := t.rewritePath(dir, &out, fn) +func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.Value, dir string, opts translateOptions) (dyn.Value, error) { + out, err := t.rewritePath(ctx, dir, v.MustString(), opts) if err != nil { if target := (&ErrIsNotebook{}); errors.As(err, target) { return dyn.InvalidValue, fmt.Errorf(`expected a file for "%s" but got a notebook: %w`, p, target) @@ -228,42 +302,38 @@ func (t *translateContext) rewriteValue(p dyn.Path, v dyn.Value, fn rewriteFunc, return dyn.InvalidValue, err } + // If the path was not rewritten, return the original value. + if out == "" { + return v, nil + } + return dyn.NewValue(out, v.Locations()), nil } -func (t *translateContext) rewriteRelativeTo(p dyn.Path, v dyn.Value, fn rewriteFunc, dir, fallback string) (dyn.Value, error) { - nv, err := t.rewriteValue(p, v, fn, dir) - if err == nil { - return nv, nil - } - - // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. - if fallback != "" { - nv, nerr := t.rewriteValue(p, v, fn, fallback) - if nerr == nil { - // TODO: Emit a warning that this path should be rewritten. - return nv, nil - } - } - - return dyn.InvalidValue, err -} - -func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { +func (m *translatePaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { t := &translateContext{ b: b, seen: make(map[string]string), } + // Set the remote root to the sync root if source-linked deployment is enabled. + // Otherwise, set it to the workspace file path. + if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { + t.remoteRoot = t.b.SyncRootPath + } else { + t.remoteRoot = t.b.Config.Workspace.FilePath + } + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { var err error - for _, fn := range []func(dyn.Value) (dyn.Value, error){ + for _, fn := range []func(context.Context, dyn.Value) (dyn.Value, error){ t.applyJobTranslations, t.applyPipelineTranslations, t.applyArtifactTranslations, t.applyDashboardTranslations, + t.applyAppsTranslations, } { - v, err = fn(v) + v, err = fn(ctx, v) if err != nil { return dyn.InvalidValue, err } @@ -274,6 +344,8 @@ func (m *translatePaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnos return diag.FromErr(err) } +// gatherFallbackPaths collects the fallback paths for relative paths in the configuration. +// Read more about the motivation for this functionality in the "fallback" path translation tests. func gatherFallbackPaths(v dyn.Value, typ string) (map[string]string, error) { fallback := make(map[string]string) pattern := dyn.NewPattern(dyn.Key("resources"), dyn.Key(typ), dyn.AnyKey()) diff --git a/bundle/config/mutator/translate_paths_apps.go b/bundle/config/mutator/translate_paths_apps.go new file mode 100644 index 000000000..6117ee43f --- /dev/null +++ b/bundle/config/mutator/translate_paths_apps.go @@ -0,0 +1,33 @@ +package mutator + +import ( + "context" + "fmt" + + "github.com/databricks/cli/libs/dyn" +) + +func (t *translateContext) applyAppsTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { + // Convert the `source_code_path` field to a remote absolute path. + // We use this path for app deployment to point to the source code. + pattern := dyn.NewPattern( + dyn.Key("resources"), + dyn.Key("apps"), + dyn.AnyKey(), + dyn.Key("source_code_path"), + ) + + opts := translateOptions{ + Mode: TranslateModeDirectory, + } + + return dyn.MapByPattern(v, pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + key := p[2].Key() + dir, err := v.Location().Directory() + if err != nil { + return dyn.InvalidValue, fmt.Errorf("unable to determine directory for app %s: %w", key, err) + } + + return t.rewriteValue(ctx, p, v, dir, opts) + }) +} diff --git a/bundle/config/mutator/translate_paths_apps_test.go b/bundle/config/mutator/translate_paths_apps_test.go new file mode 100644 index 000000000..5692934b8 --- /dev/null +++ b/bundle/config/mutator/translate_paths_apps_test.go @@ -0,0 +1,57 @@ +package mutator_test + +import ( + "context" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTranslatePathsApps_FilePathRelativeSubDirectory(t *testing.T) { + dir := t.TempDir() + touchEmptyFile(t, filepath.Join(dir, "src", "app", "app.py")) + + b := &bundle.Bundle{ + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), + Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/bundle/files", + }, + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "app": { + App: &apps.App{ + Name: "My App", + }, + SourceCodePath: "../src/app", + }, + }, + }, + }, + } + + bundletest.SetLocation(b, "resources.apps", []dyn.Location{{ + File: filepath.Join(dir, "resources/app.yml"), + }}) + + diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) + require.NoError(t, diags.Error()) + + // Assert that the file path for the app has been converted to its local absolute path. + assert.Equal( + t, + "/bundle/files/src/app", + b.Config.Resources.Apps["app"].SourceCodePath, + ) +} diff --git a/bundle/config/mutator/translate_paths_artifacts.go b/bundle/config/mutator/translate_paths_artifacts.go index 921c00c73..8e864073f 100644 --- a/bundle/config/mutator/translate_paths_artifacts.go +++ b/bundle/config/mutator/translate_paths_artifacts.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" @@ -8,7 +9,7 @@ import ( type artifactRewritePattern struct { pattern dyn.Pattern - fn rewriteFunc + opts translateOptions } func (t *translateContext) artifactRewritePatterns() []artifactRewritePattern { @@ -22,12 +23,18 @@ func (t *translateContext) artifactRewritePatterns() []artifactRewritePattern { return []artifactRewritePattern{ { base.Append(dyn.Key("path")), - t.translateNoOp, + translateOptions{ + Mode: TranslateModeLocalAbsoluteDirectory, + + // Artifact paths may be outside the sync root. + // They are the working directory for artifact builds. + AllowPathOutsideSyncRoot: true, + }, }, } } -func (t *translateContext) applyArtifactTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyArtifactTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error for _, rewritePattern := range t.artifactRewritePatterns() { @@ -38,7 +45,7 @@ func (t *translateContext) applyArtifactTranslations(v dyn.Value) (dyn.Value, er return dyn.InvalidValue, fmt.Errorf("unable to determine directory for artifact %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, "") + return t.rewriteValue(ctx, p, v, dir, rewritePattern.opts) }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/translate_paths_artifacts_test.go b/bundle/config/mutator/translate_paths_artifacts_test.go new file mode 100644 index 000000000..0d1af6156 --- /dev/null +++ b/bundle/config/mutator/translate_paths_artifacts_test.go @@ -0,0 +1,83 @@ +package mutator_test + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTranslatePathsArtifacts_InsideSyncRoot(t *testing.T) { + tmp := t.TempDir() + dir := filepath.Join(tmp, "bundle") + lib := filepath.Join(dir, "my_lib") + _ = os.MkdirAll(lib, 0o755) + _ = os.MkdirAll(dir, 0o755) + + b := &bundle.Bundle{ + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), + Config: config.Root{ + Artifacts: map[string]*config.Artifact{ + "my_artifact": { + Type: "wheel", + + // Assume this is defined in a subdir to the sync root. + Path: "../my_lib", + }, + }, + }, + } + + bundletest.SetLocation(b, "artifacts", []dyn.Location{{ + File: filepath.Join(dir, "config/artifacts.yml"), + }}) + + diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) + require.NoError(t, diags.Error()) + + // Assert that the artifact path has been converted to a local absolute path. + assert.Equal(t, filepath.ToSlash(lib), b.Config.Artifacts["my_artifact"].Path) +} + +func TestTranslatePathsArtifacts_OutsideSyncRoot(t *testing.T) { + tmp := t.TempDir() + lib := filepath.Join(tmp, "my_lib") + dir := filepath.Join(tmp, "bundle") + _ = os.MkdirAll(lib, 0o755) + _ = os.MkdirAll(dir, 0o755) + + b := &bundle.Bundle{ + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), + Config: config.Root{ + Artifacts: map[string]*config.Artifact{ + "my_artifact": { + Type: "wheel", + + // Assume this is defined in a subdir of the bundle root. + Path: "../../my_lib", + }, + }, + }, + } + + bundletest.SetLocation(b, "artifacts", []dyn.Location{{ + File: filepath.Join(dir, "config/artifacts.yml"), + }}) + + diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) + require.NoError(t, diags.Error()) + + // Assert that the artifact path has been converted to a local absolute path. + assert.Equal(t, filepath.ToSlash(lib), b.Config.Artifacts["my_artifact"].Path) +} diff --git a/bundle/config/mutator/translate_paths_dashboards.go b/bundle/config/mutator/translate_paths_dashboards.go index 93822a599..18c4c12e2 100644 --- a/bundle/config/mutator/translate_paths_dashboards.go +++ b/bundle/config/mutator/translate_paths_dashboards.go @@ -1,12 +1,13 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" ) -func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyDashboardTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { // Convert the `file_path` field to a local absolute path. // We load the file at this path and use its contents for the dashboard contents. pattern := dyn.NewPattern( @@ -16,6 +17,10 @@ func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, e dyn.Key("file_path"), ) + opts := translateOptions{ + Mode: TranslateModeLocalAbsoluteFile, + } + return dyn.MapByPattern(v, pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { key := p[2].Key() dir, err := v.Location().Directory() @@ -23,6 +28,6 @@ func (t *translateContext) applyDashboardTranslations(v dyn.Value) (dyn.Value, e return dyn.InvalidValue, fmt.Errorf("unable to determine directory for dashboard %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, t.retainLocalAbsoluteFilePath, dir, "") + return t.rewriteValue(ctx, p, v, dir, opts) }) } diff --git a/bundle/config/mutator/translate_paths_dashboards_test.go b/bundle/config/mutator/translate_paths_dashboards_test.go index 5e4e69f5d..02fba92e0 100644 --- a/bundle/config/mutator/translate_paths_dashboards_test.go +++ b/bundle/config/mutator/translate_paths_dashboards_test.go @@ -48,7 +48,7 @@ func TestTranslatePathsDashboards_FilePathRelativeSubDirectory(t *testing.T) { // Assert that the file path for the dashboard has been converted to its local absolute path. assert.Equal( t, - filepath.Join(dir, "src", "my_dashboard.lvdash.json"), + filepath.ToSlash(filepath.Join(dir, "src", "my_dashboard.lvdash.json")), b.Config.Resources.Dashboards["dashboard"].FilePath, ) } diff --git a/bundle/config/mutator/translate_paths_jobs.go b/bundle/config/mutator/translate_paths_jobs.go index c29ff0ea9..148ed4466 100644 --- a/bundle/config/mutator/translate_paths_jobs.go +++ b/bundle/config/mutator/translate_paths_jobs.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "slices" @@ -9,7 +10,7 @@ import ( "github.com/databricks/cli/libs/dyn" ) -func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyJobTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error fallback, err := gatherFallbackPaths(v, "jobs") @@ -38,28 +39,48 @@ func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) return dyn.InvalidValue, fmt.Errorf("unable to determine directory for job %s: %w", key, err) } - rewritePatternFn, err := t.getRewritePatternFn(kind) + mode, err := getJobTranslateMode(kind) if err != nil { return dyn.InvalidValue, err } - return t.rewriteRelativeTo(p, v, rewritePatternFn, dir, fallback[key]) + opts := translateOptions{ + Mode: mode, + } + + // Try to rewrite the path relative to the directory of the configuration file where the value was defined. + nv, err := t.rewriteValue(ctx, p, v, dir, opts) + if err == nil { + return nv, nil + } + + // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. + // We only do this for jobs and pipelines because of the comment in [gatherFallbackPaths]. + if fallback[key] != "" { + nv, nerr := t.rewriteValue(ctx, p, v, fallback[key], opts) + if nerr == nil { + // TODO: Emit a warning that this path should be rewritten. + return nv, nil + } + } + + return dyn.InvalidValue, err }) } -func (t *translateContext) getRewritePatternFn(kind paths.PathKind) (rewriteFunc, error) { +func getJobTranslateMode(kind paths.PathKind) (TranslateMode, error) { switch kind { case paths.PathKindLibrary: - return t.translateNoOp, nil + return TranslateModeLocalRelative, nil case paths.PathKindNotebook: - return t.translateNotebookPath, nil + return TranslateModeNotebook, nil case paths.PathKindWorkspaceFile: - return t.translateFilePath, nil + return TranslateModeFile, nil case paths.PathKindDirectory: - return t.translateDirectoryPath, nil + return TranslateModeDirectory, nil case paths.PathKindWithPrefix: - return t.translateNoOpWithPrefix, nil + return TranslateModeLocalRelativeWithPrefix, nil } - return nil, fmt.Errorf("unsupported path kind: %d", kind) + return TranslateMode(0), fmt.Errorf("unsupported path kind: %d", kind) } diff --git a/bundle/config/mutator/translate_paths_pipelines.go b/bundle/config/mutator/translate_paths_pipelines.go index 71a65e846..204808ff5 100644 --- a/bundle/config/mutator/translate_paths_pipelines.go +++ b/bundle/config/mutator/translate_paths_pipelines.go @@ -1,6 +1,7 @@ package mutator import ( + "context" "fmt" "github.com/databricks/cli/libs/dyn" @@ -8,7 +9,7 @@ import ( type pipelineRewritePattern struct { pattern dyn.Pattern - fn rewriteFunc + opts translateOptions } func (t *translateContext) pipelineRewritePatterns() []pipelineRewritePattern { @@ -25,16 +26,16 @@ func (t *translateContext) pipelineRewritePatterns() []pipelineRewritePattern { return []pipelineRewritePattern{ { base.Append(dyn.Key("notebook"), dyn.Key("path")), - t.translateNotebookPath, + translateOptions{Mode: TranslateModeNotebook}, }, { base.Append(dyn.Key("file"), dyn.Key("path")), - t.translateFilePath, + translateOptions{Mode: TranslateModeFile}, }, } } -func (t *translateContext) applyPipelineTranslations(v dyn.Value) (dyn.Value, error) { +func (t *translateContext) applyPipelineTranslations(ctx context.Context, v dyn.Value) (dyn.Value, error) { var err error fallback, err := gatherFallbackPaths(v, "pipelines") @@ -50,7 +51,23 @@ func (t *translateContext) applyPipelineTranslations(v dyn.Value) (dyn.Value, er return dyn.InvalidValue, fmt.Errorf("unable to determine directory for pipeline %s: %w", key, err) } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, fallback[key]) + // Try to rewrite the path relative to the directory of the configuration file where the value was defined. + nv, err := t.rewriteValue(ctx, p, v, dir, rewritePattern.opts) + if err == nil { + return nv, nil + } + + // If we failed to rewrite the path, try to rewrite it relative to the fallback directory. + // We only do this for jobs and pipelines because of the comment in [gatherFallbackPaths]. + if fallback[key] != "" { + nv, nerr := t.rewriteValue(ctx, p, v, fallback[key], rewritePattern.opts) + if nerr == nil { + // TODO: Emit a warning that this path should be rewritten. + return nv, nil + } + } + + return dyn.InvalidValue, err }) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 493abb8c5..aa6488ab0 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -6,7 +6,6 @@ import ( "os" "path/filepath" "runtime" - "strings" "testing" "github.com/databricks/cli/bundle" @@ -226,7 +225,7 @@ func TestTranslatePaths(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.whl"), + "dist/task.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) assert.Equal( @@ -251,7 +250,7 @@ func TestTranslatePaths(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.jar"), + "dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[5].Libraries[0].Jar, ) assert.Equal( @@ -362,7 +361,7 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { ) assert.Equal( t, - filepath.Join("job", "dist", "task.jar"), + "job/dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[1].Libraries[0].Jar, ) assert.Equal( @@ -774,8 +773,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) - assert.Equal(t, strings.Join([]string{".", "job", "dist", "env1.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0]) - assert.Equal(t, strings.Join([]string{".", "dist", "env2.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) + assert.Equal(t, "./job/dist/env1.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0]) + assert.Equal(t, "./dist/env2.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) assert.Equal(t, "simplejson", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[2]) assert.Equal(t, "/Workspace/Users/foo@bar.com/test.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[3]) assert.Equal(t, "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[4]) @@ -839,7 +838,7 @@ func TestTranslatePathWithComplexVariables(t *testing.T) { assert.Equal( t, - filepath.Join("variables", "local", "whl.whl"), + "variables/local/whl.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) } @@ -952,34 +951,34 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { // updated to source path assert.Equal( t, - filepath.Join(dir, "my_job_notebook"), + dir+"/my_job_notebook", b.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath, ) assert.Equal( t, - filepath.Join(dir, "requirements.txt"), + dir+"/requirements.txt", b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements, ) assert.Equal( t, - filepath.Join(dir, "my_python_file.py"), + dir+"/my_python_file.py", b.Config.Resources.Jobs["job"].Tasks[3].SparkPythonTask.PythonFile, ) assert.Equal( t, - filepath.Join(dir, "my_pipeline_notebook"), + dir+"/my_pipeline_notebook", b.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path, ) assert.Equal( t, - filepath.Join(dir, "my_python_file.py"), + dir+"/my_python_file.py", b.Config.Resources.Pipelines["pipeline"].Libraries[2].File.Path, ) // left as is assert.Equal( t, - filepath.Join("dist", "task.whl"), + "dist/task.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) assert.Equal( @@ -989,7 +988,7 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.jar"), + "dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[4].Libraries[0].Jar, ) assert.Equal( diff --git a/bundle/config/resources.go b/bundle/config/resources.go index 13cf0d462..1f523fed3 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -23,6 +23,7 @@ type Resources struct { Volumes map[string]*resources.Volume `json:"volumes,omitempty"` Clusters map[string]*resources.Cluster `json:"clusters,omitempty"` Dashboards map[string]*resources.Dashboard `json:"dashboards,omitempty"` + Apps map[string]*resources.App `json:"apps,omitempty"` } type ConfigResource interface { @@ -87,6 +88,7 @@ func (r *Resources) AllResources() []ResourceGroup { collectResourceMap(descriptions["clusters"], r.Clusters), collectResourceMap(descriptions["dashboards"], r.Dashboards), collectResourceMap(descriptions["volumes"], r.Volumes), + collectResourceMap(descriptions["apps"], r.Apps), } } @@ -97,12 +99,19 @@ func (r *Resources) FindResourceByConfigKey(key string) (ConfigResource, error) found = append(found, r.Jobs[k]) } } + for k := range r.Pipelines { if k == key { found = append(found, r.Pipelines[k]) } } + for k := range r.Apps { + if k == key { + found = append(found, r.Apps[k]) + } + } + if len(found) == 0 { return nil, fmt.Errorf("no such resource: %s", key) } @@ -126,76 +135,96 @@ type ResourceDescription struct { // Singular and plural title when used in summaries / terminal UI. SingularTitle string PluralTitle string + + TerraformResourceName string } // The keys of the map corresponds to the resource key in the bundle configuration. func SupportedResources() map[string]ResourceDescription { return map[string]ResourceDescription{ "jobs": { - SingularName: "job", - PluralName: "jobs", - SingularTitle: "Job", - PluralTitle: "Jobs", + SingularName: "job", + PluralName: "jobs", + SingularTitle: "Job", + PluralTitle: "Jobs", + TerraformResourceName: "databricks_job", }, "pipelines": { - SingularName: "pipeline", - PluralName: "pipelines", - SingularTitle: "Pipeline", - PluralTitle: "Pipelines", + SingularName: "pipeline", + PluralName: "pipelines", + SingularTitle: "Pipeline", + PluralTitle: "Pipelines", + TerraformResourceName: "databricks_pipeline", }, "models": { - SingularName: "model", - PluralName: "models", - SingularTitle: "Model", - PluralTitle: "Models", + SingularName: "model", + PluralName: "models", + SingularTitle: "Model", + PluralTitle: "Models", + TerraformResourceName: "databricks_mlflow_model", }, "experiments": { - SingularName: "experiment", - PluralName: "experiments", - SingularTitle: "Experiment", - PluralTitle: "Experiments", + SingularName: "experiment", + PluralName: "experiments", + SingularTitle: "Experiment", + PluralTitle: "Experiments", + TerraformResourceName: "databricks_mlflow_experiment", }, "model_serving_endpoints": { - SingularName: "model_serving_endpoint", - PluralName: "model_serving_endpoints", - SingularTitle: "Model Serving Endpoint", - PluralTitle: "Model Serving Endpoints", + SingularName: "model_serving_endpoint", + PluralName: "model_serving_endpoints", + SingularTitle: "Model Serving Endpoint", + PluralTitle: "Model Serving Endpoints", + TerraformResourceName: "databricks_model_serving_endpoint", }, "registered_models": { - SingularName: "registered_model", - PluralName: "registered_models", - SingularTitle: "Registered Model", - PluralTitle: "Registered Models", + SingularName: "registered_model", + PluralName: "registered_models", + SingularTitle: "Registered Model", + PluralTitle: "Registered Models", + TerraformResourceName: "databricks_registered_model", }, "quality_monitors": { - SingularName: "quality_monitor", - PluralName: "quality_monitors", - SingularTitle: "Quality Monitor", - PluralTitle: "Quality Monitors", + SingularName: "quality_monitor", + PluralName: "quality_monitors", + SingularTitle: "Quality Monitor", + PluralTitle: "Quality Monitors", + TerraformResourceName: "databricks_quality_monitor", }, "schemas": { - SingularName: "schema", - PluralName: "schemas", - SingularTitle: "Schema", - PluralTitle: "Schemas", + SingularName: "schema", + PluralName: "schemas", + SingularTitle: "Schema", + PluralTitle: "Schemas", + TerraformResourceName: "databricks_schema", }, "clusters": { - SingularName: "cluster", - PluralName: "clusters", - SingularTitle: "Cluster", - PluralTitle: "Clusters", + SingularName: "cluster", + PluralName: "clusters", + SingularTitle: "Cluster", + PluralTitle: "Clusters", + TerraformResourceName: "databricks_cluster", }, "dashboards": { - SingularName: "dashboard", - PluralName: "dashboards", - SingularTitle: "Dashboard", - PluralTitle: "Dashboards", + SingularName: "dashboard", + PluralName: "dashboards", + SingularTitle: "Dashboard", + PluralTitle: "Dashboards", + TerraformResourceName: "databricks_dashboard", }, "volumes": { - SingularName: "volume", - PluralName: "volumes", - SingularTitle: "Volume", - PluralTitle: "Volumes", + SingularName: "volume", + PluralName: "volumes", + SingularTitle: "Volume", + PluralTitle: "Volumes", + TerraformResourceName: "databricks_volume", + }, + "apps": { + SingularName: "app", + PluralName: "apps", + SingularTitle: "App", + PluralTitle: "Apps", + TerraformResourceName: "databricks_app", }, } } diff --git a/bundle/config/resources/apps.go b/bundle/config/resources/apps.go new file mode 100644 index 000000000..809e04896 --- /dev/null +++ b/bundle/config/resources/apps.go @@ -0,0 +1,70 @@ +package resources + +import ( + "context" + "net/url" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/marshal" + "github.com/databricks/databricks-sdk-go/service/apps" +) + +type App struct { + // SourceCodePath is a required field used by DABs to point to Databricks app source code + // on local disk and to the corresponding workspace path during app deployment. + SourceCodePath string `json:"source_code_path"` + + // Config is an optional field which allows configuring the app following Databricks app configuration format like in app.yml. + // When this field is set, DABs read the configuration set in this field and write + // it to app.yml in the root of the source code folder in Databricks workspace. + // If there’s app.yml defined locally, DABs will raise an error. + Config map[string]any `json:"config,omitempty"` + + Permissions []Permission `json:"permissions,omitempty"` + ModifiedStatus ModifiedStatus `json:"modified_status,omitempty" bundle:"internal"` + URL string `json:"url,omitempty" bundle:"internal"` + + *apps.App +} + +func (a *App) UnmarshalJSON(b []byte) error { + return marshal.Unmarshal(b, a) +} + +func (a App) MarshalJSON() ([]byte, error) { + return marshal.Marshal(a) +} + +func (a *App) Exists(ctx context.Context, w *databricks.WorkspaceClient, name string) (bool, error) { + _, err := w.Apps.GetByName(ctx, name) + if err != nil { + log.Debugf(ctx, "app %s does not exist", name) + return false, err + } + return true, nil +} + +func (a *App) TerraformResourceName() string { + return "databricks_app" +} + +func (a *App) InitializeURL(baseURL url.URL) { + if a.ModifiedStatus == "" || a.ModifiedStatus == ModifiedStatusCreated { + return + } + baseURL.Path = "apps/" + a.Name + a.URL = baseURL.String() +} + +func (a *App) GetName() string { + return a.Name +} + +func (a *App) GetURL() string { + return a.URL +} + +func (a *App) IsNil() bool { + return a.App == nil +} diff --git a/bundle/config/root.go b/bundle/config/root.go index 91c15fd9d..b974bcec5 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -47,8 +47,8 @@ type Root struct { // Targets can be used to differentiate settings and resources between // bundle deployment targets (e.g. development, staging, production). - // If not specified, the code below initializes this field with a - // single default-initialized target called "default". + // Note that this field is set to 'nil' by the SelectTarget mutator; + // use bundle.Bundle.Target to access the selected target configuration. Targets map[string]*Target `json:"targets,omitempty"` // DEPRECATED. Left for backward compatibility with Targets @@ -388,14 +388,6 @@ func (r *Root) MergeTargetOverrides(name string) error { return err } - // If the branch was overridden, we need to clear the inferred flag. - if branch := v.Get("branch"); branch.Kind() != dyn.KindInvalid { - out, err = dyn.SetByPath(out, dyn.NewPath(dyn.Key("inferred")), dyn.V(false)) - if err != nil { - return err - } - } - // Set the merged value. root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("git")), out) if err != nil { diff --git a/bundle/config/validate/validate_sync_patterns.go b/bundle/config/validate/validate_sync_patterns.go index f5787a81d..04acd28ab 100644 --- a/bundle/config/validate/validate_sync_patterns.go +++ b/bundle/config/validate/validate_sync_patterns.go @@ -47,15 +47,13 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di var errs errgroup.Group var diags diag.Diagnostics - for i, pattern := range patterns { - index := i - fullPattern := pattern + for index, pattern := range patterns { // If the pattern is negated, strip the negation prefix // and check if the pattern matches any files. // Negation in gitignore syntax means "don't look at this path' // So if p matches nothing it's useless negation, but if there are matches, // it means: do not include these files into result set - p := strings.TrimPrefix(fullPattern, "!") + p := strings.TrimPrefix(pattern, "!") errs.Go(func() error { fs, err := fileset.NewGlobSet(rb.BundleRoot(), []string{p}) if err != nil { @@ -72,7 +70,7 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di mu.Lock() diags = diags.Append(diag.Diagnostic{ Severity: diag.Warning, - Summary: fmt.Sprintf("Pattern %s does not match any files", fullPattern), + Summary: fmt.Sprintf("Pattern %s does not match any files", pattern), Locations: []dyn.Location{loc.Location()}, Paths: []dyn.Path{loc.Path()}, }) diff --git a/bundle/config/variable/variable.go b/bundle/config/variable/variable.go index 95a68cfeb..d7f1cdede 100644 --- a/bundle/config/variable/variable.go +++ b/bundle/config/variable/variable.go @@ -36,11 +36,12 @@ type Variable struct { // This field stores the resolved value for the variable. The variable are // resolved in the following priority order (from highest to lowest) // - // 1. Command line flag. For example: `--var="foo=bar"` - // 2. Target variable. eg: BUNDLE_VAR_foo=bar - // 3. Default value as defined in the applicable environments block - // 4. Default value defined in variable definition - // 5. Throw error, since if no default value is defined, then the variable + // 1. Command line flag `--var="foo=bar"` + // 2. Environment variable. eg: BUNDLE_VAR_foo=bar + // 3. Load defaults from .databricks/bundle//variable-overrides.json + // 4. Default value as defined in the applicable targets block + // 5. Default value defined in variable definition + // 6. Throw error, since if no default value is defined, then the variable // is required Value VariableValue `json:"value,omitempty" bundle:"readonly"` diff --git a/bundle/deploy/metadata/compute.go b/bundle/deploy/metadata/compute.go index b47baa6b2..633d97081 100644 --- a/bundle/deploy/metadata/compute.go +++ b/bundle/deploy/metadata/compute.go @@ -54,6 +54,7 @@ func (m *compute) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { // Set file upload destination of the bundle in metadata b.Metadata.Config.Workspace.FilePath = b.Config.Workspace.FilePath + // In source-linked deployment files are not copied and resources use source files, therefore we use sync path as file path in metadata if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { b.Metadata.Config.Workspace.FilePath = b.SyncRootPath } diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index b710c690f..d549b9797 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/deploy/terraform/tfdyn" "github.com/databricks/cli/bundle/internal/tf/schema" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/databricks-sdk-go/service/apps" tfjson "github.com/hashicorp/terraform-json" ) @@ -196,6 +197,20 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { } cur.ID = instance.Attributes.ID config.Resources.Dashboards[resource.Name] = cur + case "databricks_app": + if config.Resources.Apps == nil { + config.Resources.Apps = make(map[string]*resources.App) + } + cur := config.Resources.Apps[resource.Name] + if cur == nil { + cur = &resources.App{ModifiedStatus: resources.ModifiedStatusDeleted, App: &apps.App{}} + } else { + // If the app exists in terraform and bundle, we always set modified status to updated + // because we don't really know if the app source code was updated or not. + cur.ModifiedStatus = resources.ModifiedStatusUpdated + } + cur.Name = instance.Attributes.Name + config.Resources.Apps[resource.Name] = cur case "databricks_permissions": case "databricks_grants": // Ignore; no need to pull these back into the configuration. @@ -260,6 +275,11 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { src.ModifiedStatus = resources.ModifiedStatusCreated } } + for _, src := range config.Resources.Apps { + if src.ModifiedStatus == "" { + src.ModifiedStatus = resources.ModifiedStatusCreated + } + } return nil } diff --git a/bundle/deploy/terraform/convert_test.go b/bundle/deploy/terraform/convert_test.go index ccfdcece3..ffe55db71 100644 --- a/bundle/deploy/terraform/convert_test.go +++ b/bundle/deploy/terraform/convert_test.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/internal/tf/schema" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/databricks-sdk-go/service/apps" "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/dashboards" @@ -694,6 +695,14 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "1"}}, }, }, + { + Type: "databricks_app", + Mode: "managed", + Name: "test_app", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{Name: "app1"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -732,6 +741,9 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { assert.Equal(t, "1", config.Resources.Dashboards["test_dashboard"].ID) assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Dashboards["test_dashboard"].ModifiedStatus) + assert.Equal(t, "app1", config.Resources.Apps["test_app"].Name) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Apps["test_app"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -815,6 +827,13 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { }, }, }, + Apps: map[string]*resources.App{ + "test_app": { + App: &apps.App{ + Description: "test_app", + }, + }, + }, }, } tfState := resourcesState{ @@ -856,6 +875,9 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { assert.Equal(t, "", config.Resources.Dashboards["test_dashboard"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Dashboards["test_dashboard"].ModifiedStatus) + assert.Equal(t, "", config.Resources.Apps["test_app"].Name) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Apps["test_app"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -994,6 +1016,18 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { }, }, }, + Apps: map[string]*resources.App{ + "test_app": { + App: &apps.App{ + Name: "test_app", + }, + }, + "test_app_new": { + App: &apps.App{ + Name: "test_app_new", + }, + }, + }, }, } tfState := resourcesState{ @@ -1174,6 +1208,22 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "2"}}, }, }, + { + Type: "databricks_app", + Mode: "managed", + Name: "test_app", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{Name: "test_app"}}, + }, + }, + { + Type: "databricks_app", + Mode: "managed", + Name: "test_app_old", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{Name: "test_app_old"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -1256,6 +1306,13 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { assert.Equal(t, "", config.Resources.Dashboards["test_dashboard_new"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Dashboards["test_dashboard_new"].ModifiedStatus) + assert.Equal(t, "test_app", config.Resources.Apps["test_app"].Name) + assert.Equal(t, resources.ModifiedStatusUpdated, config.Resources.Apps["test_app"].ModifiedStatus) + assert.Equal(t, "test_app_old", config.Resources.Apps["test_app_old"].Name) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Apps["test_app_old"].ModifiedStatus) + assert.Equal(t, "test_app_new", config.Resources.Apps["test_app_new"].Name) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Apps["test_app_new"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } diff --git a/bundle/deploy/terraform/init.go b/bundle/deploy/terraform/init.go index d982354e1..5957611a4 100644 --- a/bundle/deploy/terraform/init.go +++ b/bundle/deploy/terraform/init.go @@ -54,7 +54,7 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con return tf.ExecPath, nil } - binDir, err := b.CacheDir(context.Background(), "bin") + binDir, err := b.CacheDir(ctx, "bin") if err != nil { return "", err } @@ -88,41 +88,43 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con return tf.ExecPath, nil } -// This function inherits some environment variables for Terraform CLI. -func inheritEnvVars(ctx context.Context, environ map[string]string) error { +var envCopy = []string{ // Include $HOME in set of environment variables to pass along. - home, ok := env.Lookup(ctx, "HOME") - if ok { - environ["HOME"] = home - } + "HOME", // Include $USERPROFILE in set of environment variables to pass along. // This variable is used by Azure CLI on Windows to find stored credentials and metadata - userProfile, ok := env.Lookup(ctx, "USERPROFILE") - if ok { - environ["USERPROFILE"] = userProfile - } + "USERPROFILE", // Include $PATH in set of environment variables to pass along. // This is necessary to ensure that our Terraform provider can use the // same auxiliary programs (e.g. `az`, or `gcloud`) as the CLI. - path, ok := env.Lookup(ctx, "PATH") - if ok { - environ["PATH"] = path - } + "PATH", // Include $AZURE_CONFIG_FILE in set of environment variables to pass along. // This is set in Azure DevOps by the AzureCLI@2 task. - azureConfigFile, ok := env.Lookup(ctx, "AZURE_CONFIG_FILE") - if ok { - environ["AZURE_CONFIG_FILE"] = azureConfigFile - } + "AZURE_CONFIG_FILE", // Include $TF_CLI_CONFIG_FILE to override terraform provider in development. // See: https://developer.hashicorp.com/terraform/cli/config/config-file#explicit-installation-method-configuration - devConfigFile, ok := env.Lookup(ctx, "TF_CLI_CONFIG_FILE") - if ok { - environ["TF_CLI_CONFIG_FILE"] = devConfigFile + "TF_CLI_CONFIG_FILE", + + // Include $USE_SDK_V2_RESOURCES and $USE_SDK_V2_DATA_SOURCES, these are used to switch back from plugin framework to SDKv2. + // This is used for mitigation issues with resource migrated to plugin framework, as recommended here: + // https://registry.terraform.io/providers/databricks/databricks/latest/docs/guides/troubleshooting#plugin-framework-migration-problems + // It is currently a workaround for deploying quality_monitors + // https://github.com/databricks/terraform-provider-databricks/issues/4229#issuecomment-2520344690 + "USE_SDK_V2_RESOURCES", + "USE_SDK_V2_DATA_SOURCES", +} + +// This function inherits some environment variables for Terraform CLI. +func inheritEnvVars(ctx context.Context, environ map[string]string) error { + for _, key := range envCopy { + value, ok := env.Lookup(ctx, key) + if ok { + environ[key] = value + } } // Map $DATABRICKS_TF_CLI_CONFIG_FILE to $TF_CLI_CONFIG_FILE diff --git a/bundle/deploy/terraform/interpolate.go b/bundle/deploy/terraform/interpolate.go index 813e6bbb7..719e6ad25 100644 --- a/bundle/deploy/terraform/interpolate.go +++ b/bundle/deploy/terraform/interpolate.go @@ -63,6 +63,8 @@ func (m *interpolateMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.D path = dyn.NewPath(dyn.Key("databricks_cluster")).Append(path[2:]...) case dyn.Key("dashboards"): path = dyn.NewPath(dyn.Key("databricks_dashboard")).Append(path[2:]...) + case dyn.Key("apps"): + path = dyn.NewPath(dyn.Key("databricks_app")).Append(path[2:]...) default: // Trigger "key not found" for unknown resource types. return dyn.GetByPath(root, path) diff --git a/bundle/deploy/terraform/interpolate_test.go b/bundle/deploy/terraform/interpolate_test.go index fc5c4d184..91a7bd54a 100644 --- a/bundle/deploy/terraform/interpolate_test.go +++ b/bundle/deploy/terraform/interpolate_test.go @@ -34,6 +34,7 @@ func TestInterpolate(t *testing.T) { "other_volume": "${resources.volumes.other_volume.id}", "other_cluster": "${resources.clusters.other_cluster.id}", "other_dashboard": "${resources.dashboards.other_dashboard.id}", + "other_app": "${resources.apps.other_app.id}", }, Tasks: []jobs.Task{ { @@ -73,6 +74,7 @@ func TestInterpolate(t *testing.T) { assert.Equal(t, "${databricks_volume.other_volume.id}", j.Tags["other_volume"]) assert.Equal(t, "${databricks_cluster.other_cluster.id}", j.Tags["other_cluster"]) assert.Equal(t, "${databricks_dashboard.other_dashboard.id}", j.Tags["other_dashboard"]) + assert.Equal(t, "${databricks_app.other_app.id}", j.Tags["other_app"]) m := b.Config.Resources.Models["my_model"] assert.Equal(t, "my_model", m.Model.Name) diff --git a/bundle/deploy/terraform/tfdyn/convert_app.go b/bundle/deploy/terraform/tfdyn/convert_app.go new file mode 100644 index 000000000..dcba0809b --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_app.go @@ -0,0 +1,55 @@ +package tfdyn + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go/service/apps" +) + +func convertAppResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { + // Check if the description is not set and if it's not, set it to an empty string. + // This is done to avoid TF drift because Apps API return empty string for description when if it's not set. + if _, err := dyn.Get(vin, "description"); err != nil { + vin, err = dyn.Set(vin, "description", dyn.V("")) + if err != nil { + return vin, err + } + } + + // Normalize the output value to the target schema. + vout, diags := convert.Normalize(apps.App{}, vin) + for _, diag := range diags { + log.Debugf(ctx, "app normalization diagnostic: %s", diag.Summary) + } + + return vout, nil +} + +type appConverter struct{} + +func (appConverter) Convert(ctx context.Context, key string, vin dyn.Value, out *schema.Resources) error { + vout, err := convertAppResource(ctx, vin) + if err != nil { + return err + } + + // Add the converted resource to the output. + out.App[key] = vout.AsAny() + + // Configure permissions for this resource. + if permissions := convertPermissionsResource(ctx, vin); permissions != nil { + permissions.AppName = fmt.Sprintf("${databricks_app.%s.name}", key) + out.Permissions["app_"+key] = permissions + } + + return nil +} + +func init() { + registerConverter("apps", appConverter{}) +} diff --git a/bundle/deploy/terraform/tfdyn/convert_app_test.go b/bundle/deploy/terraform/tfdyn/convert_app_test.go new file mode 100644 index 000000000..be8152cc6 --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_app_test.go @@ -0,0 +1,156 @@ +package tfdyn + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestConvertApp(t *testing.T) { + src := resources.App{ + SourceCodePath: "./app", + Config: map[string]any{ + "command": []string{"python", "app.py"}, + }, + App: &apps.App{ + Name: "app_id", + Description: "app description", + Resources: []apps.AppResource{ + { + Name: "job1", + Job: &apps.AppResourceJob{ + Id: "1234", + Permission: "CAN_MANAGE_RUN", + }, + }, + { + Name: "sql1", + SqlWarehouse: &apps.AppResourceSqlWarehouse{ + Id: "5678", + Permission: "CAN_USE", + }, + }, + }, + }, + Permissions: []resources.Permission{ + { + Level: "CAN_RUN", + UserName: "jack@gmail.com", + }, + { + Level: "CAN_MANAGE", + ServicePrincipalName: "sp", + }, + }, + } + + vin, err := convert.FromTyped(src, dyn.NilValue) + require.NoError(t, err) + + ctx := context.Background() + out := schema.NewResources() + err = appConverter{}.Convert(ctx, "my_app", vin, out) + require.NoError(t, err) + + app := out.App["my_app"] + assert.Equal(t, map[string]any{ + "description": "app description", + "name": "app_id", + "resources": []any{ + map[string]any{ + "name": "job1", + "job": map[string]any{ + "id": "1234", + "permission": "CAN_MANAGE_RUN", + }, + }, + map[string]any{ + "name": "sql1", + "sql_warehouse": map[string]any{ + "id": "5678", + "permission": "CAN_USE", + }, + }, + }, + }, app) + + // Assert equality on the permissions + assert.Equal(t, &schema.ResourcePermissions{ + AppName: "${databricks_app.my_app.name}", + AccessControl: []schema.ResourcePermissionsAccessControl{ + { + PermissionLevel: "CAN_RUN", + UserName: "jack@gmail.com", + }, + { + PermissionLevel: "CAN_MANAGE", + ServicePrincipalName: "sp", + }, + }, + }, out.Permissions["app_my_app"]) +} + +func TestConvertAppWithNoDescription(t *testing.T) { + src := resources.App{ + SourceCodePath: "./app", + Config: map[string]any{ + "command": []string{"python", "app.py"}, + }, + App: &apps.App{ + Name: "app_id", + Resources: []apps.AppResource{ + { + Name: "job1", + Job: &apps.AppResourceJob{ + Id: "1234", + Permission: "CAN_MANAGE_RUN", + }, + }, + { + Name: "sql1", + SqlWarehouse: &apps.AppResourceSqlWarehouse{ + Id: "5678", + Permission: "CAN_USE", + }, + }, + }, + }, + } + + vin, err := convert.FromTyped(src, dyn.NilValue) + require.NoError(t, err) + + ctx := context.Background() + out := schema.NewResources() + err = appConverter{}.Convert(ctx, "my_app", vin, out) + require.NoError(t, err) + + app := out.App["my_app"] + assert.Equal(t, map[string]any{ + "name": "app_id", + "description": "", // Due to Apps API always returning a description field, we set it in the output as well to avoid permanent TF drift + "resources": []any{ + map[string]any{ + "name": "job1", + "job": map[string]any{ + "id": "1234", + "permission": "CAN_MANAGE_RUN", + }, + }, + map[string]any{ + "name": "sql1", + "sql_warehouse": map[string]any{ + "id": "5678", + "permission": "CAN_USE", + }, + }, + }, + }, app) +} diff --git a/bundle/deploy/terraform/util.go b/bundle/deploy/terraform/util.go index 4da015c23..90dfe37b2 100644 --- a/bundle/deploy/terraform/util.go +++ b/bundle/deploy/terraform/util.go @@ -33,7 +33,12 @@ type stateResourceInstance struct { } type stateInstanceAttributes struct { - ID string `json:"id"` + ID string `json:"id"` + + // Some resources such as Apps do not have an ID, so we use the name instead. + // We need this for cases when such resource is removed from bundle config but + // exists in the workspace still so we can correctly display its summary. + Name string `json:"name,omitempty"` ETag string `json:"etag,omitempty"` } diff --git a/bundle/deploy/terraform/util_test.go b/bundle/deploy/terraform/util_test.go index 74b329259..5d1310392 100644 --- a/bundle/deploy/terraform/util_test.go +++ b/bundle/deploy/terraform/util_test.go @@ -97,7 +97,7 @@ func TestParseResourcesStateWithExistingStateFile(t *testing.T) { Type: "databricks_pipeline", Name: "test_pipeline", Instances: []stateResourceInstance{ - {Attributes: stateInstanceAttributes{ID: "123"}}, + {Attributes: stateInstanceAttributes{ID: "123", Name: "test_pipeline"}}, }, }, }, diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index 5283a431b..e18de7896 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -55,7 +55,7 @@ github.com/databricks/cli/bundle/config.Bundle: The name of the bundle. "uuid": "description": |- - PLACEHOLDER + Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command). github.com/databricks/cli/bundle/config.Deployment: "fail_on_active_runs": "description": |- @@ -147,6 +147,9 @@ github.com/databricks/cli/bundle/config.Python: If enabled, Python code will execute within this environment. If disabled, it defaults to using the Python interpreter available in the current shell. github.com/databricks/cli/bundle/config.Resources: + "apps": + "description": |- + PLACEHOLDER "clusters": "description": |- The cluster definitions for the bundle. @@ -371,6 +374,64 @@ github.com/databricks/cli/bundle/config.Workspace: "state_path": "description": |- The workspace state path +github.com/databricks/cli/bundle/config/resources.App: + "active_deployment": + "description": |- + PLACEHOLDER + "app_status": + "description": |- + PLACEHOLDER + "compute_status": + "description": |- + PLACEHOLDER + "config": + "description": |- + PLACEHOLDER + "create_time": + "description": |- + PLACEHOLDER + "creator": + "description": |- + PLACEHOLDER + "default_source_code_path": + "description": |- + PLACEHOLDER + "description": + "description": |- + PLACEHOLDER + "name": + "description": |- + PLACEHOLDER + "pending_deployment": + "description": |- + PLACEHOLDER + "permissions": + "description": |- + PLACEHOLDER + "resources": + "description": |- + PLACEHOLDER + "service_principal_client_id": + "description": |- + PLACEHOLDER + "service_principal_id": + "description": |- + PLACEHOLDER + "service_principal_name": + "description": |- + PLACEHOLDER + "source_code_path": + "description": |- + PLACEHOLDER + "update_time": + "description": |- + PLACEHOLDER + "updater": + "description": |- + PLACEHOLDER + "url": + "description": |- + PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Grant: "principal": "description": |- @@ -459,3 +520,103 @@ github.com/databricks/cli/bundle/config/variable.Variable: "type": "description": |- The type of the variable. +github.com/databricks/databricks-sdk-go/service/apps.AppDeployment: + "create_time": + "description": |- + PLACEHOLDER + "creator": + "description": |- + PLACEHOLDER + "deployment_artifacts": + "description": |- + PLACEHOLDER + "deployment_id": + "description": |- + PLACEHOLDER + "mode": + "description": |- + PLACEHOLDER + "source_code_path": + "description": |- + PLACEHOLDER + "status": + "description": |- + PLACEHOLDER + "update_time": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts: + "source_code_path": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: + "message": + "description": |- + PLACEHOLDER + "state": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResource: + "description": + "description": |- + PLACEHOLDER + "job": + "description": |- + PLACEHOLDER + "name": + "description": |- + PLACEHOLDER + "secret": + "description": |- + PLACEHOLDER + "serving_endpoint": + "description": |- + PLACEHOLDER + "sql_warehouse": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: + "id": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret: + "key": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER + "scope": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint: + "name": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse: + "id": + "description": |- + PLACEHOLDER + "permission": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus: + "message": + "description": |- + PLACEHOLDER + "state": + "description": |- + PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus: + "message": + "description": |- + PLACEHOLDER + "state": + "description": |- + PLACEHOLDER diff --git a/bundle/internal/schema/annotations_openapi.yml b/bundle/internal/schema/annotations_openapi.yml index e9c893c87..8ff5c9253 100644 --- a/bundle/internal/schema/annotations_openapi.yml +++ b/bundle/internal/schema/annotations_openapi.yml @@ -1,4 +1,47 @@ # This file is auto-generated. DO NOT EDIT. +github.com/databricks/cli/bundle/config/resources.App: + "active_deployment": + "description": |- + The active deployment of the app. A deployment is considered active when it has been deployed + to the app compute. + "app_status": {} + "compute_status": {} + "create_time": + "description": |- + The creation time of the app. Formatted timestamp in ISO 6801. + "creator": + "description": |- + The email of the user that created the app. + "default_source_code_path": + "description": |- + The default workspace file system path of the source code from which app deployment are + created. This field tracks the workspace source code path of the last active deployment. + "description": + "description": |- + The description of the app. + "name": + "description": |- + The name of the app. The name must contain only lowercase alphanumeric characters and hyphens. + It must be unique within the workspace. + "pending_deployment": + "description": |- + The pending deployment of the app. A deployment is considered pending when it is being prepared + for deployment to the app compute. + "resources": + "description": |- + Resources for the app. + "service_principal_client_id": {} + "service_principal_id": {} + "service_principal_name": {} + "update_time": + "description": |- + The update time of the app. Formatted timestamp in ISO 6801. + "updater": + "description": |- + The email of the user that last updated the app. + "url": + "description": |- + The URL of the app once it is deployed. github.com/databricks/cli/bundle/config/resources.Cluster: "apply_policy_default_values": "description": |- @@ -220,6 +263,7 @@ github.com/databricks/cli/bundle/config/resources.Job: "job_clusters": "description": |- A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. + If more than 100 job clusters are available, you can paginate through them using :method:jobs/get. "max_concurrent_runs": "description": |- An optional maximum allowed number of concurrent runs of the job. @@ -250,6 +294,7 @@ github.com/databricks/cli/bundle/config/resources.Job: "tasks": "description": |- A list of task specifications to be executed by this job. + If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. "timeout_seconds": "description": |- An optional timeout applied to each run of this job. A value of `0` means no timeout. @@ -489,6 +534,187 @@ github.com/databricks/cli/bundle/config/resources.Volume: "description": |- The storage location on the cloud "volume_type": {} +github.com/databricks/databricks-sdk-go/service/apps.AppDeployment: + "create_time": + "description": |- + The creation time of the deployment. Formatted timestamp in ISO 6801. + "creator": + "description": |- + The email of the user creates the deployment. + "deployment_artifacts": + "description": |- + The deployment artifacts for an app. + "deployment_id": + "description": |- + The unique id of the deployment. + "mode": + "description": |- + The mode of which the deployment will manage the source code. + "source_code_path": + "description": |- + The workspace file system path of the source code used to create the app deployment. This is different from + `deployment_artifacts.source_code_path`, which is the path used by the deployed app. The former refers + to the original source code location of the app in the workspace during deployment creation, whereas + the latter provides a system generated stable snapshotted source code path used by the deployment. + "status": + "description": |- + Status and status message of the deployment + "update_time": + "description": |- + The update time of the deployment. Formatted timestamp in ISO 6801. +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts: + "source_code_path": + "description": |- + The snapshotted workspace file system path of the source code loaded by the deployed app. +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentMode: + "_": + "enum": + - |- + SNAPSHOT + - |- + AUTO_SYNC +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentState: + "_": + "enum": + - |- + SUCCEEDED + - |- + FAILED + - |- + IN_PROGRESS + - |- + CANCELLED +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: + "message": + "description": |- + Message corresponding with the deployment state. + "state": + "description": |- + State of the deployment. +github.com/databricks/databricks-sdk-go/service/apps.AppResource: + "description": + "description": |- + Description of the App Resource. + "job": {} + "name": + "description": |- + Name of the App Resource. + "secret": {} + "serving_endpoint": {} + "sql_warehouse": {} +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: + "id": + "description": |- + Id of the job to grant permission on. + "permission": + "description": |- + Permissions to grant on the Job. Supported permissions are: "CAN_MANAGE", "IS_OWNER", "CAN_MANAGE_RUN", "CAN_VIEW". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJobJobPermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + IS_OWNER + - |- + CAN_MANAGE_RUN + - |- + CAN_VIEW +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret: + "key": + "description": |- + Key of the secret to grant permission on. + "permission": + "description": |- + Permission to grant on the secret scope. For secrets, only one permission is allowed. Permission must be one of: "READ", "WRITE", "MANAGE". + "scope": + "description": |- + Scope of the secret to grant permission on. +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecretSecretPermission: + "_": + "description": |- + Permission to grant on the secret scope. Supported permissions are: "READ", "WRITE", "MANAGE". + "enum": + - |- + READ + - |- + WRITE + - |- + MANAGE +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint: + "name": + "description": |- + Name of the serving endpoint to grant permission on. + "permission": + "description": |- + Permission to grant on the serving endpoint. Supported permissions are: "CAN_MANAGE", "CAN_QUERY", "CAN_VIEW". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpointServingEndpointPermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_QUERY + - |- + CAN_VIEW +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse: + "id": + "description": |- + Id of the SQL warehouse to grant permission on. + "permission": + "description": |- + Permission to grant on the SQL warehouse. Supported permissions are: "CAN_MANAGE", "CAN_USE", "IS_OWNER". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouseSqlWarehousePermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_USE + - |- + IS_OWNER +github.com/databricks/databricks-sdk-go/service/apps.ApplicationState: + "_": + "enum": + - |- + DEPLOYING + - |- + RUNNING + - |- + CRASHED + - |- + UNAVAILABLE +github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus: + "message": + "description": |- + Application status message + "state": + "description": |- + State of the application. +github.com/databricks/databricks-sdk-go/service/apps.ComputeState: + "_": + "enum": + - |- + ERROR + - |- + DELETING + - |- + STARTING + - |- + STOPPING + - |- + UPDATING + - |- + STOPPED + - |- + ACTIVE +github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus: + "message": + "description": |- + Compute status message + "state": + "description": |- + State of the app compute. github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedule: "pause_status": "description": |- @@ -2116,6 +2342,26 @@ github.com/databricks/databricks-sdk-go/service/ml.ModelVersionTag: github.com/databricks/databricks-sdk-go/service/pipelines.CronTrigger: "quartz_cron_schedule": {} "timezone_id": {} +github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek: + "_": + "description": |- + Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). + If not specified all days of the week will be used. + "enum": + - |- + MONDAY + - |- + TUESDAY + - |- + WEDNESDAY + - |- + THURSDAY + - |- + FRIDAY + - |- + SATURDAY + - |- + SUNDAY github.com/databricks/databricks-sdk-go/service/pipelines.DeploymentKind: "_": "description": | @@ -2375,26 +2621,6 @@ github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindow: "description": |- Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. -github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek: - "_": - "description": |- - Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). - If not specified all days of the week will be used. - "enum": - - |- - MONDAY - - |- - TUESDAY - - |- - WEDNESDAY - - |- - THURSDAY - - |- - FRIDAY - - |- - SATURDAY - - |- - SUNDAY github.com/databricks/databricks-sdk-go/service/pipelines.SchemaSpec: "destination_catalog": "description": |- diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index ef602d6ef..120a12543 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -1,3 +1,28 @@ +github.com/databricks/cli/bundle/config/resources.App: + "app_status": + "description": |- + PLACEHOLDER + "compute_status": + "description": |- + PLACEHOLDER + "config": + "description": |- + PLACEHOLDER + "permissions": + "description": |- + PLACEHOLDER + "service_principal_client_id": + "description": |- + PLACEHOLDER + "service_principal_id": + "description": |- + PLACEHOLDER + "service_principal_name": + "description": |- + PLACEHOLDER + "source_code_path": + "description": |- + PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Cluster: "data_security_mode": "description": |- @@ -75,6 +100,19 @@ github.com/databricks/cli/bundle/config/resources.Volume: "volume_type": "description": |- PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResource: + "job": + "description": |- + PLACEHOLDER + "secret": + "description": |- + PLACEHOLDER + "serving_endpoint": + "description": |- + PLACEHOLDER + "sql_warehouse": + "description": |- + PLACEHOLDER github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: "availability": "description": |- diff --git a/bundle/internal/schema/main.go b/bundle/internal/schema/main.go index 77927a966..38e099ece 100644 --- a/bundle/internal/schema/main.go +++ b/bundle/internal/schema/main.go @@ -40,6 +40,19 @@ func addInterpolationPatterns(typ reflect.Type, s jsonschema.Schema) jsonschema. } } + // Allows using variables in enum fields + if s.Type == jsonschema.StringType && s.Enum != nil { + return jsonschema.Schema{ + OneOf: []jsonschema.Schema{ + s, + { + Type: jsonschema.StringType, + Pattern: interpolationPattern("var"), + }, + }, + } + } + switch s.Type { case jsonschema.ArrayType, jsonschema.ObjectType: // arrays and objects can have complex variable values specified. @@ -159,6 +172,15 @@ func generateSchema(workdir, outputFile string) { a.addAnnotations, addInterpolationPatterns, }) + + // AdditionalProperties is set to an empty schema to allow non-typed keys used as yaml-anchors + // Example: + // some_anchor: &some_anchor + // file_path: /some/path/ + // workspace: + // <<: *some_anchor + s.AdditionalProperties = jsonschema.Schema{} + if err != nil { log.Fatal(err) } diff --git a/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml b/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml deleted file mode 100644 index e8a8866bc..000000000 --- a/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml +++ /dev/null @@ -1 +0,0 @@ -unknown: value diff --git a/bundle/internal/schema/testdata/pass/job.yml b/bundle/internal/schema/testdata/pass/job.yml index e13a52c03..ec447ba39 100644 --- a/bundle/internal/schema/testdata/pass/job.yml +++ b/bundle/internal/schema/testdata/pass/job.yml @@ -13,6 +13,8 @@ variables: simplevar: default: true description: "simplevar description" + schedule_status: + default: "PAUSED" complexvar: default: @@ -42,6 +44,8 @@ resources: dependencies: - python=3.7 client: "myclient" + trigger: + pause_status: ${var.schedule_status} tags: foo: bar bar: baz diff --git a/bundle/internal/schema/testdata/pass/yaml_anchors.yml b/bundle/internal/schema/testdata/pass/yaml_anchors.yml new file mode 100644 index 000000000..18749891d --- /dev/null +++ b/bundle/internal/schema/testdata/pass/yaml_anchors.yml @@ -0,0 +1,11 @@ +tags: &job-tags + environment: "some_environment" + +resources: + jobs: + db1: + tags: + <<: *job-tags + db2: + tags: + <<: *job-tags diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index 27c4b16cd..677b8fc10 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.62.0" +const ProviderVersion = "1.63.0" diff --git a/bundle/internal/tf/schema/resource_external_location.go b/bundle/internal/tf/schema/resource_external_location.go index da28271bc..72411f4dc 100644 --- a/bundle/internal/tf/schema/resource_external_location.go +++ b/bundle/internal/tf/schema/resource_external_location.go @@ -13,8 +13,13 @@ type ResourceExternalLocationEncryptionDetails struct { type ResourceExternalLocation struct { AccessPoint string `json:"access_point,omitempty"` + BrowseOnly bool `json:"browse_only,omitempty"` Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + CredentialId string `json:"credential_id,omitempty"` CredentialName string `json:"credential_name"` + Fallback bool `json:"fallback,omitempty"` ForceDestroy bool `json:"force_destroy,omitempty"` ForceUpdate bool `json:"force_update,omitempty"` Id string `json:"id,omitempty"` @@ -24,6 +29,8 @@ type ResourceExternalLocation struct { Owner string `json:"owner,omitempty"` ReadOnly bool `json:"read_only,omitempty"` SkipValidation bool `json:"skip_validation,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` Url string `json:"url"` EncryptionDetails *ResourceExternalLocationEncryptionDetails `json:"encryption_details,omitempty"` } diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index 1f89dc64d..7dd3f9210 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.62.0" +const ProviderVersion = "1.63.0" func NewRoot() *Root { return &Root{ diff --git a/bundle/permissions/mutator.go b/bundle/permissions/mutator.go index cd7cbf40c..8a0057dee 100644 --- a/bundle/permissions/mutator.go +++ b/bundle/permissions/mutator.go @@ -51,6 +51,10 @@ var ( CAN_MANAGE: "CAN_MANAGE", CAN_VIEW: "CAN_READ", }, + "apps": { + CAN_MANAGE: "CAN_MANAGE", + CAN_VIEW: "CAN_USE", + }, } ) diff --git a/bundle/permissions/mutator_test.go b/bundle/permissions/mutator_test.go index 15586e979..1f7897cae 100644 --- a/bundle/permissions/mutator_test.go +++ b/bundle/permissions/mutator_test.go @@ -58,6 +58,10 @@ func TestApplyBundlePermissions(t *testing.T) { "dashboard_1": {}, "dashboard_2": {}, }, + Apps: map[string]*resources.App{ + "app_1": {}, + "app_2": {}, + }, }, }, } @@ -114,6 +118,10 @@ func TestApplyBundlePermissions(t *testing.T) { require.Len(t, b.Config.Resources.Dashboards["dashboard_1"].Permissions, 2) require.Contains(t, b.Config.Resources.Dashboards["dashboard_1"].Permissions, resources.Permission{Level: "CAN_MANAGE", UserName: "TestUser"}) require.Contains(t, b.Config.Resources.Dashboards["dashboard_1"].Permissions, resources.Permission{Level: "CAN_READ", GroupName: "TestGroup"}) + + require.Len(t, b.Config.Resources.Apps["app_1"].Permissions, 2) + require.Contains(t, b.Config.Resources.Apps["app_1"].Permissions, resources.Permission{Level: "CAN_MANAGE", UserName: "TestUser"}) + require.Contains(t, b.Config.Resources.Apps["app_1"].Permissions, resources.Permission{Level: "CAN_USE", GroupName: "TestGroup"}) } func TestWarningOnOverlapPermission(t *testing.T) { diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 16595611f..b59ce9f89 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -5,6 +5,7 @@ import ( "errors" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/apps" "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" @@ -129,12 +130,15 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { // mutators need informed consent if they are potentially destructive. deployCore := bundle.Defer( bundle.Seq( + apps.SlowDeployMessage(), bundle.LogString("Deploying resources..."), terraform.Apply(), ), bundle.Seq( terraform.StatePush(), terraform.Load(), + apps.InterpolateVariables(), + apps.UploadConfig(), metadata.Compute(), metadata.Upload(), bundle.LogString("Deployment complete!"), diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index f7b3cd608..c5b875196 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -2,6 +2,7 @@ package phases import ( "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/apps" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" pythonmutator "github.com/databricks/cli/bundle/config/mutator/python" @@ -33,10 +34,6 @@ func Initialize() bundle.Mutator { // If it is an ancestor, this updates all paths to be relative to the sync root path. mutator.SyncInferRoot(), - mutator.MergeJobClusters(), - mutator.MergeJobParameters(), - mutator.MergeJobTasks(), - mutator.MergePipelineClusters(), mutator.InitializeWorkspaceClient(), mutator.PopulateCurrentUser(), mutator.LoadGitDetails(), @@ -64,12 +61,20 @@ func Initialize() bundle.Mutator { pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseApplyMutators), mutator.ResolveVariableReferencesInLookup(), mutator.ResolveResourceReferences(), - mutator.ResolveVariableReferencesInComplexVariables(), mutator.ResolveVariableReferences( "bundle", "workspace", "variables", ), + + mutator.MergeJobClusters(), + mutator.MergeJobParameters(), + mutator.MergeJobTasks(), + mutator.MergePipelineClusters(), + mutator.MergeApps(), + + mutator.CaptureSchemaDependency(), + // Provide permission config errors & warnings after initializing all variables permissions.PermissionDiagnostics(), mutator.SetRunAs(), @@ -87,6 +92,8 @@ func Initialize() bundle.Mutator { mutator.TranslatePaths(), trampoline.WrapperWarning(), + apps.Validate(), + permissions.ValidateSharedRootPermissions(), permissions.ApplyBundlePermissions(), permissions.FilterCurrentUser(), diff --git a/bundle/run/app.go b/bundle/run/app.go new file mode 100644 index 000000000..b15f3f4b6 --- /dev/null +++ b/bundle/run/app.go @@ -0,0 +1,244 @@ +package run + +import ( + "context" + "errors" + "fmt" + "time" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/run/output" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/spf13/cobra" +) + +func logProgress(ctx context.Context, msg string) { + if msg == "" { + return + } + cmdio.LogString(ctx, "✓ "+msg) +} + +type appRunner struct { + key + + bundle *bundle.Bundle + app *resources.App +} + +func (a *appRunner) Name() string { + if a.app == nil { + return "" + } + + return a.app.Name +} + +func isAppStopped(app *apps.App) bool { + return app.ComputeStatus == nil || + (app.ComputeStatus.State == apps.ComputeStateStopped || app.ComputeStatus.State == apps.ComputeStateError) +} + +func (a *appRunner) Run(ctx context.Context, opts *Options) (output.RunOutput, error) { + app := a.app + b := a.bundle + if app == nil { + return nil, errors.New("app is not defined") + } + + logProgress(ctx, "Getting the status of the app "+app.Name) + w := b.WorkspaceClient() + + // Check the status of the app first. + createdApp, err := w.Apps.Get(ctx, apps.GetAppRequest{Name: app.Name}) + if err != nil { + return nil, err + } + + if createdApp.AppStatus != nil { + logProgress(ctx, fmt.Sprintf("App is in %s state", createdApp.AppStatus.State)) + } + + if createdApp.ComputeStatus != nil { + logProgress(ctx, fmt.Sprintf("App compute is in %s state", createdApp.ComputeStatus.State)) + } + + // There could be 2 reasons why the app is not running: + // 1. The app is new and was never deployed yet. + // 2. The app was stopped (compute not running). + // We need to start the app only if the compute is not running. + if isAppStopped(createdApp) { + err := a.start(ctx) + if err != nil { + return nil, err + } + } + + // Deploy the app. + err = a.deploy(ctx) + if err != nil { + return nil, err + } + + cmdio.LogString(ctx, "You can access the app at "+createdApp.Url) + return nil, nil +} + +func (a *appRunner) start(ctx context.Context) error { + app := a.app + b := a.bundle + w := b.WorkspaceClient() + + logProgress(ctx, "Starting the app "+app.Name) + wait, err := w.Apps.Start(ctx, apps.StartAppRequest{Name: app.Name}) + if err != nil { + return err + } + + startedApp, err := wait.OnProgress(func(p *apps.App) { + if p.AppStatus == nil { + return + } + logProgress(ctx, "App is starting...") + }).Get() + if err != nil { + return err + } + + // After the app is started (meaning the compute is running), the API will return the app object with the + // active and pending deployments fields (if any). If there are active or pending deployments, + // we need to wait for them to complete before we can do the new deployment. + // Otherwise, the new deployment will fail. + err = waitForDeploymentToComplete(ctx, w, startedApp) + if err != nil { + return err + } + + logProgress(ctx, "App is started!") + return nil +} + +func waitForDeploymentToComplete(ctx context.Context, w *databricks.WorkspaceClient, app *apps.App) error { + // We first wait for the active deployment to complete. + if app.ActiveDeployment != nil && + app.ActiveDeployment.Status.State == apps.AppDeploymentStateInProgress { + logProgress(ctx, "Waiting for the active deployment to complete...") + _, err := w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, app.ActiveDeployment.DeploymentId, 20*time.Minute, nil) + if err != nil { + return err + } + logProgress(ctx, "Active deployment is completed!") + } + + // Then, we wait for the pending deployment to complete. + if app.PendingDeployment != nil && + app.PendingDeployment.Status.State == apps.AppDeploymentStateInProgress { + logProgress(ctx, "Waiting for the pending deployment to complete...") + _, err := w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, app.PendingDeployment.DeploymentId, 20*time.Minute, nil) + if err != nil { + return err + } + logProgress(ctx, "Pending deployment is completed!") + } + + return nil +} + +func (a *appRunner) deploy(ctx context.Context) error { + app := a.app + b := a.bundle + w := b.WorkspaceClient() + + sourceCodePath := app.SourceCodePath + wait, err := w.Apps.Deploy(ctx, apps.CreateAppDeploymentRequest{ + AppName: app.Name, + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: sourceCodePath, + }, + }) + // If deploy returns an error, then there's an active deployment in progress, wait for it to complete. + // For this we first need to get an app and its acrive and pending deployments and then wait for them. + if err != nil { + app, err := w.Apps.Get(ctx, apps.GetAppRequest{Name: app.Name}) + if err != nil { + return fmt.Errorf("failed to get app %s: %w", app.Name, err) + } + + err = waitForDeploymentToComplete(ctx, w, app) + if err != nil { + return err + } + + // Now we can try to deploy the app again + wait, err = w.Apps.Deploy(ctx, apps.CreateAppDeploymentRequest{ + AppName: app.Name, + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: sourceCodePath, + }, + }) + if err != nil { + return err + } + } + + _, err = wait.OnProgress(func(ad *apps.AppDeployment) { + if ad.Status == nil { + return + } + logProgress(ctx, ad.Status.Message) + }).Get() + if err != nil { + return err + } + + return nil +} + +func (a *appRunner) Cancel(ctx context.Context) error { + // We should cancel the app by stopping it. + app := a.app + b := a.bundle + if app == nil { + return errors.New("app is not defined") + } + + w := b.WorkspaceClient() + + logProgress(ctx, "Stopping app "+app.Name) + wait, err := w.Apps.Stop(ctx, apps.StopAppRequest{Name: app.Name}) + if err != nil { + return err + } + + _, err = wait.OnProgress(func(p *apps.App) { + if p.AppStatus == nil { + return + } + logProgress(ctx, p.AppStatus.Message) + }).Get() + + logProgress(ctx, "App is stopped!") + return err +} + +func (a *appRunner) Restart(ctx context.Context, opts *Options) (output.RunOutput, error) { + // We should restart the app by just running it again meaning a new app deployment will be done. + return a.Run(ctx, opts) +} + +func (a *appRunner) ParseArgs(args []string, opts *Options) error { + if len(args) == 0 { + return nil + } + + return fmt.Errorf("received %d unexpected positional arguments", len(args)) +} + +func (a *appRunner) CompleteArgs(args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + return nil, cobra.ShellCompDirectiveNoFileComp +} diff --git a/bundle/run/app_test.go b/bundle/run/app_test.go new file mode 100644 index 000000000..77f197e8d --- /dev/null +++ b/bundle/run/app_test.go @@ -0,0 +1,276 @@ +package run + +import ( + "context" + "errors" + "os" + "path/filepath" + "testing" + "time" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +type testAppRunner struct { + m *mocks.MockWorkspaceClient + b *bundle.Bundle + ctx context.Context +} + +func (ta *testAppRunner) run(t *testing.T) { + r := appRunner{ + key: "my_app", + bundle: ta.b, + app: ta.b.Config.Resources.Apps["my_app"], + } + + _, err := r.Run(ta.ctx, &Options{}) + require.NoError(t, err) +} + +func setupBundle(t *testing.T) (context.Context, *bundle.Bundle, *mocks.MockWorkspaceClient) { + root := t.TempDir() + err := os.MkdirAll(filepath.Join(root, "my_app"), 0o700) + require.NoError(t, err) + + b := &bundle.Bundle{ + BundleRootPath: root, + SyncRoot: vfs.MustNew(root), + Config: config.Root{ + Workspace: config.Workspace{ + RootPath: "/Workspace/Users/foo@bar.com/", + }, + Resources: config.Resources{ + Apps: map[string]*resources.App{ + "my_app": { + App: &apps.App{ + Name: "my_app", + }, + SourceCodePath: "./my_app", + Config: map[string]any{ + "command": []string{"echo", "hello"}, + "env": []map[string]string{ + {"name": "MY_APP", "value": "my value"}, + }, + }, + }, + }, + }, + }, + } + + mwc := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(mwc.WorkspaceClient) + bundletest.SetLocation(b, "resources.apps.my_app", []dyn.Location{{File: "./databricks.yml"}}) + + ctx := cmdio.MockDiscard(context.Background()) + + diags := bundle.Apply(ctx, b, bundle.Seq( + mutator.DefineDefaultWorkspacePaths(), + mutator.TranslatePaths(), + )) + require.Empty(t, diags) + + return ctx, b, mwc +} + +func setupTestApp(t *testing.T, initialAppState apps.ApplicationState, initialComputeState apps.ComputeState) *testAppRunner { + ctx, b, mwc := setupBundle(t) + + appApi := mwc.GetMockAppsAPI() + appApi.EXPECT().Get(mock.Anything, apps.GetAppRequest{ + Name: "my_app", + }).Return(&apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: initialAppState, + }, + ComputeStatus: &apps.ComputeStatus{ + State: initialComputeState, + }, + }, nil) + + wait := &apps.WaitGetDeploymentAppSucceeded[apps.AppDeployment]{ + Poll: func(_ time.Duration, _ func(*apps.AppDeployment)) (*apps.AppDeployment, error) { + return nil, nil + }, + } + appApi.EXPECT().Deploy(mock.Anything, apps.CreateAppDeploymentRequest{ + AppName: "my_app", + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: "/Workspace/Users/foo@bar.com/files/my_app", + }, + }).Return(wait, nil) + + return &testAppRunner{ + m: mwc, + b: b, + ctx: ctx, + } +} + +func TestAppRunStartedApp(t *testing.T) { + r := setupTestApp(t, apps.ApplicationStateRunning, apps.ComputeStateActive) + r.run(t) +} + +func TestAppRunStoppedApp(t *testing.T) { + r := setupTestApp(t, apps.ApplicationStateCrashed, apps.ComputeStateStopped) + + appsApi := r.m.GetMockAppsAPI() + appsApi.EXPECT().Start(mock.Anything, apps.StartAppRequest{ + Name: "my_app", + }).Return(&apps.WaitGetAppActive[apps.App]{ + Poll: func(_ time.Duration, _ func(*apps.App)) (*apps.App, error) { + return &apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: apps.ApplicationStateRunning, + }, + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, + }, + }, nil + }, + }, nil) + + r.run(t) +} + +func TestAppRunWithAnActiveDeploymentInProgress(t *testing.T) { + r := setupTestApp(t, apps.ApplicationStateCrashed, apps.ComputeStateStopped) + + appsApi := r.m.GetMockAppsAPI() + appsApi.EXPECT().Start(mock.Anything, apps.StartAppRequest{ + Name: "my_app", + }).Return(&apps.WaitGetAppActive[apps.App]{ + Poll: func(_ time.Duration, _ func(*apps.App)) (*apps.App, error) { + return &apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: apps.ApplicationStateRunning, + }, + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, + }, + ActiveDeployment: &apps.AppDeployment{ + DeploymentId: "active_deployment_id", + Status: &apps.AppDeploymentStatus{ + State: apps.AppDeploymentStateInProgress, + }, + }, + PendingDeployment: &apps.AppDeployment{ + DeploymentId: "pending_deployment_id", + Status: &apps.AppDeploymentStatus{ + State: apps.AppDeploymentStateCancelled, + }, + }, + }, nil + }, + }, nil) + + appsApi.EXPECT().WaitGetDeploymentAppSucceeded(mock.Anything, "my_app", "active_deployment_id", mock.Anything, mock.Anything).Return(nil, nil) + + r.run(t) +} + +func TestAppDeployWithDeploymentInProgress(t *testing.T) { + ctx, b, mwc := setupBundle(t) + + appApi := mwc.GetMockAppsAPI() + appApi.EXPECT().Get(mock.Anything, apps.GetAppRequest{ + Name: "my_app", + }).Return(&apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: apps.ApplicationStateRunning, + }, + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, + }, + }, nil).Once() + + wait := &apps.WaitGetDeploymentAppSucceeded[apps.AppDeployment]{ + Poll: func(_ time.Duration, _ func(*apps.AppDeployment)) (*apps.AppDeployment, error) { + return nil, nil + }, + } + + // First deployment fails + appApi.EXPECT().Deploy(mock.Anything, apps.CreateAppDeploymentRequest{ + AppName: "my_app", + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: "/Workspace/Users/foo@bar.com/files/my_app", + }, + }).Return(nil, errors.New("deployment in progress")).Once() + + // After first deployment fails, we should get the app and wait for the deployment to complete + appApi.EXPECT().Get(mock.Anything, apps.GetAppRequest{ + Name: "my_app", + }).Return(&apps.App{ + Name: "my_app", + ActiveDeployment: &apps.AppDeployment{ + DeploymentId: "active_deployment_id", + Status: &apps.AppDeploymentStatus{ + State: apps.AppDeploymentStateInProgress, + }, + }, + }, nil).Once() + + appApi.EXPECT().WaitGetDeploymentAppSucceeded(mock.Anything, "my_app", "active_deployment_id", mock.Anything, mock.Anything).Return(nil, nil) + + // Second one should succeeed + appApi.EXPECT().Deploy(mock.Anything, apps.CreateAppDeploymentRequest{ + AppName: "my_app", + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: "/Workspace/Users/foo@bar.com/files/my_app", + }, + }).Return(wait, nil).Once() + + r := &testAppRunner{ + m: mwc, + b: b, + ctx: ctx, + } + r.run(t) +} + +func TestStopApp(t *testing.T) { + ctx, b, mwc := setupBundle(t) + appsApi := mwc.GetMockAppsAPI() + appsApi.EXPECT().Stop(mock.Anything, apps.StopAppRequest{ + Name: "my_app", + }).Return(&apps.WaitGetAppStopped[apps.App]{ + Poll: func(_ time.Duration, _ func(*apps.App)) (*apps.App, error) { + return &apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: apps.ApplicationStateUnavailable, + }, + }, nil + }, + }, nil) + + r := appRunner{ + key: "my_app", + bundle: b, + app: b.Config.Resources.Apps["my_app"], + } + + err := r.Cancel(ctx) + require.NoError(t, err) +} diff --git a/bundle/run/job_test.go b/bundle/run/job_test.go index 72aecc887..daf6cf063 100644 --- a/bundle/run/job_test.go +++ b/bundle/run/job_test.go @@ -1,7 +1,6 @@ package run import ( - "bytes" "context" "testing" "time" @@ -159,8 +158,8 @@ func TestJobRunnerRestart(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "")) + + ctx := cmdio.MockDiscard(context.Background()) ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) jobApi := m.GetMockJobsAPI() @@ -230,8 +229,8 @@ func TestJobRunnerRestartForContinuousUnpausedJobs(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + + ctx := cmdio.MockDiscard(context.Background()) ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) jobApi := m.GetMockJobsAPI() diff --git a/bundle/run/pipeline_test.go b/bundle/run/pipeline_test.go index bfa0c5846..56d800d35 100644 --- a/bundle/run/pipeline_test.go +++ b/bundle/run/pipeline_test.go @@ -1,7 +1,6 @@ package run import ( - "bytes" "context" "testing" "time" @@ -75,8 +74,8 @@ func TestPipelineRunnerRestart(t *testing.T) { Host: "https://test.com", } b.SetWorkpaceClient(m.WorkspaceClient) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + + ctx := cmdio.MockDiscard(context.Background()) ctx = cmdio.NewContext(ctx, cmdio.NewLogger(flags.ModeAppend)) mockWait := &pipelines.WaitGetPipelineIdle[struct{}]{ diff --git a/bundle/run/runner.go b/bundle/run/runner.go index 4c907d068..23c2c0a41 100644 --- a/bundle/run/runner.go +++ b/bundle/run/runner.go @@ -42,7 +42,7 @@ type Runner interface { // IsRunnable returns a filter that only allows runnable resources. func IsRunnable(ref refs.Reference) bool { switch ref.Resource.(type) { - case *resources.Job, *resources.Pipeline: + case *resources.Job, *resources.Pipeline, *resources.App: return true default: return false @@ -56,6 +56,12 @@ func ToRunner(b *bundle.Bundle, ref refs.Reference) (Runner, error) { return &jobRunner{key: key(ref.KeyWithType), bundle: b, job: resource}, nil case *resources.Pipeline: return &pipelineRunner{key: key(ref.KeyWithType), bundle: b, pipeline: resource}, nil + case *resources.App: + return &appRunner{ + key: key(ref.KeyWithType), + bundle: b, + app: resource, + }, nil default: return nil, fmt.Errorf("unsupported resource type: %T", resource) } diff --git a/bundle/schema/embed_test.go b/bundle/schema/embed_test.go index 59f1458cb..03d2165e4 100644 --- a/bundle/schema/embed_test.go +++ b/bundle/schema/embed_test.go @@ -59,8 +59,8 @@ func TestJsonSchema(t *testing.T) { } providers := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.GitProvider") - assert.Contains(t, providers.Enum, "gitHub") - assert.Contains(t, providers.Enum, "bitbucketCloud") - assert.Contains(t, providers.Enum, "gitHubEnterprise") - assert.Contains(t, providers.Enum, "bitbucketServer") + assert.Contains(t, providers.OneOf[0].Enum, "gitHub") + assert.Contains(t, providers.OneOf[0].Enum, "bitbucketCloud") + assert.Contains(t, providers.OneOf[0].Enum, "gitHubEnterprise") + assert.Contains(t, providers.OneOf[0].Enum, "bitbucketServer") } diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 2f78ffcca..4a3b56814 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -59,6 +59,81 @@ "cli": { "bundle": { "config": { + "resources.App": { + "oneOf": [ + { + "type": "object", + "properties": { + "active_deployment": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeployment" + }, + "app_status": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus" + }, + "compute_status": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus" + }, + "config": { + "$ref": "#/$defs/map/interface" + }, + "create_time": { + "$ref": "#/$defs/string" + }, + "creator": { + "$ref": "#/$defs/string" + }, + "default_source_code_path": { + "$ref": "#/$defs/string" + }, + "description": { + "$ref": "#/$defs/string" + }, + "name": { + "$ref": "#/$defs/string" + }, + "pending_deployment": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeployment" + }, + "permissions": { + "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission" + }, + "resources": { + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/apps.AppResource" + }, + "service_principal_client_id": { + "$ref": "#/$defs/string" + }, + "service_principal_id": { + "$ref": "#/$defs/int64" + }, + "service_principal_name": { + "$ref": "#/$defs/string" + }, + "source_code_path": { + "$ref": "#/$defs/string" + }, + "update_time": { + "$ref": "#/$defs/string" + }, + "updater": { + "$ref": "#/$defs/string" + }, + "url": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "source_code_path", + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "resources.Cluster": { "oneOf": [ { @@ -313,7 +388,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules" }, "job_clusters": { - "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", + "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.\nIf more than 100 job clusters are available, you can paginate through them using :method:jobs/get.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobCluster" }, "max_concurrent_runs": { @@ -351,7 +426,7 @@ "$ref": "#/$defs/map/string" }, "tasks": { - "description": "A list of task specifications to be executed by this job.", + "description": "A list of task specifications to be executed by this job.\nIf more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Task" }, "timeout_seconds": { @@ -1051,6 +1126,7 @@ "$ref": "#/$defs/string" }, "uuid": { + "description": "Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command).", "$ref": "#/$defs/string" } }, @@ -1273,6 +1349,9 @@ { "type": "object", "properties": { + "apps": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.App" + }, "clusters": { "description": "The cluster definitions for the bundle.", "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config/resources.Cluster", @@ -1528,6 +1607,391 @@ }, "databricks-sdk-go": { "service": { + "apps.AppDeployment": { + "oneOf": [ + { + "type": "object", + "properties": { + "create_time": { + "$ref": "#/$defs/string" + }, + "creator": { + "$ref": "#/$defs/string" + }, + "deployment_artifacts": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts" + }, + "deployment_id": { + "$ref": "#/$defs/string" + }, + "mode": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentMode" + }, + "source_code_path": { + "$ref": "#/$defs/string" + }, + "status": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus" + }, + "update_time": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppDeploymentArtifacts": { + "oneOf": [ + { + "type": "object", + "properties": { + "source_code_path": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppDeploymentMode": { + "oneOf": [ + { + "type": "string", + "enum": [ + "SNAPSHOT", + "AUTO_SYNC" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppDeploymentState": { + "oneOf": [ + { + "type": "string", + "enum": [ + "SUCCEEDED", + "FAILED", + "IN_PROGRESS", + "CANCELLED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppDeploymentStatus": { + "oneOf": [ + { + "type": "object", + "properties": { + "message": { + "$ref": "#/$defs/string" + }, + "state": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentState" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResource": { + "oneOf": [ + { + "type": "object", + "properties": { + "description": { + "$ref": "#/$defs/string" + }, + "job": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob" + }, + "name": { + "$ref": "#/$defs/string" + }, + "secret": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret" + }, + "serving_endpoint": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint" + }, + "sql_warehouse": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse" + } + }, + "additionalProperties": false, + "required": [ + "name" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceJob": { + "oneOf": [ + { + "type": "object", + "properties": { + "id": { + "$ref": "#/$defs/string" + }, + "permission": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceJobJobPermission" + } + }, + "additionalProperties": false, + "required": [ + "id", + "permission" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceJobJobPermission": { + "oneOf": [ + { + "type": "string", + "enum": [ + "CAN_MANAGE", + "IS_OWNER", + "CAN_MANAGE_RUN", + "CAN_VIEW" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceSecret": { + "oneOf": [ + { + "type": "object", + "properties": { + "key": { + "$ref": "#/$defs/string" + }, + "permission": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecretSecretPermission" + }, + "scope": { + "$ref": "#/$defs/string" + } + }, + "additionalProperties": false, + "required": [ + "key", + "permission", + "scope" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceSecretSecretPermission": { + "oneOf": [ + { + "type": "string", + "description": "Permission to grant on the secret scope. Supported permissions are: \"READ\", \"WRITE\", \"MANAGE\".", + "enum": [ + "READ", + "WRITE", + "MANAGE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceServingEndpoint": { + "oneOf": [ + { + "type": "object", + "properties": { + "name": { + "$ref": "#/$defs/string" + }, + "permission": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpointServingEndpointPermission" + } + }, + "additionalProperties": false, + "required": [ + "name", + "permission" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceServingEndpointServingEndpointPermission": { + "oneOf": [ + { + "type": "string", + "enum": [ + "CAN_MANAGE", + "CAN_QUERY", + "CAN_VIEW" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceSqlWarehouse": { + "oneOf": [ + { + "type": "object", + "properties": { + "id": { + "$ref": "#/$defs/string" + }, + "permission": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouseSqlWarehousePermission" + } + }, + "additionalProperties": false, + "required": [ + "id", + "permission" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.AppResourceSqlWarehouseSqlWarehousePermission": { + "oneOf": [ + { + "type": "string", + "enum": [ + "CAN_MANAGE", + "CAN_USE", + "IS_OWNER" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.ApplicationState": { + "oneOf": [ + { + "type": "string", + "enum": [ + "DEPLOYING", + "RUNNING", + "CRASHED", + "UNAVAILABLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.ApplicationStatus": { + "oneOf": [ + { + "type": "object", + "properties": { + "message": { + "$ref": "#/$defs/string" + }, + "state": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.ApplicationState" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.ComputeState": { + "oneOf": [ + { + "type": "string", + "enum": [ + "ERROR", + "DELETING", + "STARTING", + "STOPPING", + "UPDATING", + "STOPPED", + "ACTIVE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, + "apps.ComputeStatus": { + "oneOf": [ + { + "type": "object", + "properties": { + "message": { + "$ref": "#/$defs/string" + }, + "state": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.ComputeState" + } + }, + "additionalProperties": false + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "catalog.MonitorCronSchedule": { "oneOf": [ { @@ -1559,11 +2023,19 @@ ] }, "catalog.MonitorCronSchedulePauseStatus": { - "type": "string", - "description": "Read only field that indicates whether a schedule is paused or not.", - "enum": [ - "UNPAUSED", - "PAUSED" + "oneOf": [ + { + "type": "string", + "description": "Read only field that indicates whether a schedule is paused or not.", + "enum": [ + "UNPAUSED", + "PAUSED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "catalog.MonitorDataClassificationConfig": { @@ -1652,11 +2124,19 @@ ] }, "catalog.MonitorInferenceLogProblemType": { - "type": "string", - "description": "Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed.", - "enum": [ - "PROBLEM_TYPE_CLASSIFICATION", - "PROBLEM_TYPE_REGRESSION" + "oneOf": [ + { + "type": "string", + "description": "Problem type the model aims to solve. Determines the type of model-quality metrics that will be computed.", + "enum": [ + "PROBLEM_TYPE_CLASSIFICATION", + "PROBLEM_TYPE_REGRESSION" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "catalog.MonitorMetric": { @@ -1701,12 +2181,20 @@ ] }, "catalog.MonitorMetricType": { - "type": "string", - "description": "Can only be one of ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"``, ``\"CUSTOM_METRIC_TYPE_DERIVED\"``, or ``\"CUSTOM_METRIC_TYPE_DRIFT\"``.\nThe ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"`` and ``\"CUSTOM_METRIC_TYPE_DERIVED\"`` metrics\nare computed on a single table, whereas the ``\"CUSTOM_METRIC_TYPE_DRIFT\"`` compare metrics across\nbaseline and input table, or across the two consecutive time windows.\n- CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table\n- CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics\n- CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics\n", - "enum": [ - "CUSTOM_METRIC_TYPE_AGGREGATE", - "CUSTOM_METRIC_TYPE_DERIVED", - "CUSTOM_METRIC_TYPE_DRIFT" + "oneOf": [ + { + "type": "string", + "description": "Can only be one of ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"``, ``\"CUSTOM_METRIC_TYPE_DERIVED\"``, or ``\"CUSTOM_METRIC_TYPE_DRIFT\"``.\nThe ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"`` and ``\"CUSTOM_METRIC_TYPE_DERIVED\"`` metrics\nare computed on a single table, whereas the ``\"CUSTOM_METRIC_TYPE_DRIFT\"`` compare metrics across\nbaseline and input table, or across the two consecutive time windows.\n- CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table\n- CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics\n- CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics\n", + "enum": [ + "CUSTOM_METRIC_TYPE_AGGREGATE", + "CUSTOM_METRIC_TYPE_DERIVED", + "CUSTOM_METRIC_TYPE_DRIFT" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "catalog.MonitorNotifications": { @@ -1770,10 +2258,18 @@ ] }, "catalog.VolumeType": { - "type": "string", - "enum": [ - "EXTERNAL", - "MANAGED" + "oneOf": [ + { + "type": "string", + "enum": [ + "EXTERNAL", + "MANAGED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.Adlsgen2Info": { @@ -1872,12 +2368,20 @@ ] }, "compute.AwsAvailability": { - "type": "string", - "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.\n", - "enum": [ - "SPOT", - "ON_DEMAND", - "SPOT_WITH_FALLBACK" + "oneOf": [ + { + "type": "string", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.\n", + "enum": [ + "SPOT", + "ON_DEMAND", + "SPOT_WITH_FALLBACK" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.AzureAttributes": { @@ -1910,12 +2414,20 @@ ] }, "compute.AzureAvailability": { - "type": "string", - "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero (which only happens on pool clusters), this availability\ntype will be used for the entire cluster.", - "enum": [ - "SPOT_AZURE", - "ON_DEMAND_AZURE", - "SPOT_WITH_FALLBACK_AZURE" + "oneOf": [ + { + "type": "string", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero (which only happens on pool clusters), this availability\ntype will be used for the entire cluster.", + "enum": [ + "SPOT_AZURE", + "ON_DEMAND_AZURE", + "SPOT_WITH_FALLBACK_AZURE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.ClientsTypes": { @@ -2092,19 +2604,27 @@ ] }, "compute.DataSecurityMode": { - "type": "string", - "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used with `kind`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.\n", - "enum": [ - "DATA_SECURITY_MODE_AUTO", - "DATA_SECURITY_MODE_STANDARD", - "DATA_SECURITY_MODE_DEDICATED", - "NONE", - "SINGLE_USER", - "USER_ISOLATION", - "LEGACY_TABLE_ACL", - "LEGACY_PASSTHROUGH", - "LEGACY_SINGLE_USER", - "LEGACY_SINGLE_USER_STANDARD" + "oneOf": [ + { + "type": "string", + "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used with `kind`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.\n", + "enum": [ + "DATA_SECURITY_MODE_AUTO", + "DATA_SECURITY_MODE_STANDARD", + "DATA_SECURITY_MODE_DEDICATED", + "NONE", + "SINGLE_USER", + "USER_ISOLATION", + "LEGACY_TABLE_ACL", + "LEGACY_PASSTHROUGH", + "LEGACY_SINGLE_USER", + "LEGACY_SINGLE_USER_STANDARD" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.DbfsStorageInfo": { @@ -2172,11 +2692,19 @@ ] }, "compute.EbsVolumeType": { - "type": "string", - "description": "The type of EBS volumes that will be launched with this cluster.", - "enum": [ - "GENERAL_PURPOSE_SSD", - "THROUGHPUT_OPTIMIZED_HDD" + "oneOf": [ + { + "type": "string", + "description": "The type of EBS volumes that will be launched with this cluster.", + "enum": [ + "GENERAL_PURPOSE_SSD", + "THROUGHPUT_OPTIMIZED_HDD" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.Environment": { @@ -2243,12 +2771,20 @@ ] }, "compute.GcpAvailability": { - "type": "string", - "description": "This field determines whether the instance pool will contain preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", - "enum": [ - "PREEMPTIBLE_GCP", - "ON_DEMAND_GCP", - "PREEMPTIBLE_WITH_FALLBACK_GCP" + "oneOf": [ + { + "type": "string", + "description": "This field determines whether the instance pool will contain preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", + "enum": [ + "PREEMPTIBLE_GCP", + "ON_DEMAND_GCP", + "PREEMPTIBLE_WITH_FALLBACK_GCP" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.GcsStorageInfo": { @@ -2482,12 +3018,20 @@ ] }, "compute.RuntimeEngine": { - "type": "string", - "description": "Determines the cluster's runtime engine, either standard or Photon.\n\nThis field is not compatible with legacy `spark_version` values that contain `-photon-`.\nRemove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.\n\nIf left unspecified, the runtime engine defaults to standard unless the spark_version\ncontains -photon-, in which case Photon will be used.\n", - "enum": [ - "NULL", - "STANDARD", - "PHOTON" + "oneOf": [ + { + "type": "string", + "description": "Determines the cluster's runtime engine, either standard or Photon.\n\nThis field is not compatible with legacy `spark_version` values that contain `-photon-`.\nRemove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.\n\nIf left unspecified, the runtime engine defaults to standard unless the spark_version\ncontains -photon-, in which case Photon will be used.\n", + "enum": [ + "NULL", + "STANDARD", + "PHOTON" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "compute.S3StorageInfo": { @@ -2599,10 +3143,18 @@ ] }, "dashboards.LifecycleState": { - "type": "string", - "enum": [ - "ACTIVE", - "TRASHED" + "oneOf": [ + { + "type": "string", + "enum": [ + "ACTIVE", + "TRASHED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.CleanRoomsNotebookTask": { @@ -2640,10 +3192,18 @@ ] }, "jobs.Condition": { - "type": "string", - "enum": [ - "ANY_UPDATED", - "ALL_UPDATED" + "oneOf": [ + { + "type": "string", + "enum": [ + "ANY_UPDATED", + "ALL_UPDATED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.ConditionTask": { @@ -2678,15 +3238,23 @@ ] }, "jobs.ConditionTaskOp": { - "type": "string", - "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", - "enum": [ - "EQUAL_TO", - "GREATER_THAN", - "GREATER_THAN_OR_EQUAL", - "LESS_THAN", - "LESS_THAN_OR_EQUAL", - "NOT_EQUAL" + "oneOf": [ + { + "type": "string", + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", + "enum": [ + "EQUAL_TO", + "GREATER_THAN", + "GREATER_THAN_OR_EQUAL", + "LESS_THAN", + "LESS_THAN_OR_EQUAL", + "NOT_EQUAL" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.Continuous": { @@ -2842,23 +3410,39 @@ ] }, "jobs.Format": { - "type": "string", - "enum": [ - "SINGLE_TASK", - "MULTI_TASK" + "oneOf": [ + { + "type": "string", + "enum": [ + "SINGLE_TASK", + "MULTI_TASK" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.GitProvider": { - "type": "string", - "enum": [ - "gitHub", - "bitbucketCloud", - "azureDevOpsServices", - "gitHubEnterprise", - "bitbucketServer", - "gitLab", - "gitLabEnterpriseEdition", - "awsCodeCommit" + "oneOf": [ + { + "type": "string", + "enum": [ + "gitHub", + "bitbucketCloud", + "azureDevOpsServices", + "gitHubEnterprise", + "bitbucketServer", + "gitLab", + "gitLabEnterpriseEdition", + "awsCodeCommit" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.GitSnapshot": { @@ -2971,18 +3555,34 @@ ] }, "jobs.JobDeploymentKind": { - "type": "string", - "description": "* `BUNDLE`: The job is managed by Databricks Asset Bundle.", - "enum": [ - "BUNDLE" + "oneOf": [ + { + "type": "string", + "description": "* `BUNDLE`: The job is managed by Databricks Asset Bundle.", + "enum": [ + "BUNDLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobEditMode": { - "type": "string", - "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", - "enum": [ - "UI_LOCKED", - "EDITABLE" + "oneOf": [ + { + "type": "string", + "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", + "enum": [ + "UI_LOCKED", + "EDITABLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobEmailNotifications": { @@ -3150,29 +3750,53 @@ ] }, "jobs.JobSourceDirtyState": { - "type": "string", - "description": "Dirty state indicates the job is not fully synced with the job specification\nin the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", - "enum": [ - "NOT_SYNCED", - "DISCONNECTED" + "oneOf": [ + { + "type": "string", + "description": "Dirty state indicates the job is not fully synced with the job specification\nin the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", + "enum": [ + "NOT_SYNCED", + "DISCONNECTED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobsHealthMetric": { - "type": "string", - "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview.", - "enum": [ - "RUN_DURATION_SECONDS", - "STREAMING_BACKLOG_BYTES", - "STREAMING_BACKLOG_RECORDS", - "STREAMING_BACKLOG_SECONDS", - "STREAMING_BACKLOG_FILES" + "oneOf": [ + { + "type": "string", + "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview.", + "enum": [ + "RUN_DURATION_SECONDS", + "STREAMING_BACKLOG_BYTES", + "STREAMING_BACKLOG_RECORDS", + "STREAMING_BACKLOG_SECONDS", + "STREAMING_BACKLOG_FILES" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobsHealthOperator": { - "type": "string", - "description": "Specifies the operator used to compare the health metric value with the specified threshold.", - "enum": [ - "GREATER_THAN" + "oneOf": [ + { + "type": "string", + "description": "Specifies the operator used to compare the health metric value with the specified threshold.", + "enum": [ + "GREATER_THAN" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.JobsHealthRule": { @@ -3256,10 +3880,18 @@ ] }, "jobs.PauseStatus": { - "type": "string", - "enum": [ - "UNPAUSED", - "PAUSED" + "oneOf": [ + { + "type": "string", + "enum": [ + "UNPAUSED", + "PAUSED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.PeriodicTriggerConfiguration": { @@ -3289,11 +3921,19 @@ ] }, "jobs.PeriodicTriggerConfigurationTimeUnit": { - "type": "string", - "enum": [ - "HOURS", - "DAYS", - "WEEKS" + "oneOf": [ + { + "type": "string", + "enum": [ + "HOURS", + "DAYS", + "WEEKS" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.PipelineParams": { @@ -3395,15 +4035,23 @@ ] }, "jobs.RunIf": { - "type": "string", - "description": "An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\nPossible values are:\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", - "enum": [ - "ALL_SUCCESS", - "ALL_DONE", - "NONE_FAILED", - "AT_LEAST_ONE_SUCCESS", - "ALL_FAILED", - "AT_LEAST_ONE_FAILED" + "oneOf": [ + { + "type": "string", + "description": "An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\nPossible values are:\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", + "enum": [ + "ALL_SUCCESS", + "ALL_DONE", + "NONE_FAILED", + "AT_LEAST_ONE_SUCCESS", + "ALL_FAILED", + "AT_LEAST_ONE_FAILED" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.RunJobTask": { @@ -3463,11 +4111,19 @@ ] }, "jobs.Source": { - "type": "string", - "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\\\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", - "enum": [ - "WORKSPACE", - "GIT" + "oneOf": [ + { + "type": "string", + "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\\\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", + "enum": [ + "WORKSPACE", + "GIT" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "jobs.SparkJarTask": { @@ -4167,12 +4823,20 @@ ] }, "ml.ModelVersionStatus": { - "type": "string", - "description": "Current status of `model_version`", - "enum": [ - "PENDING_REGISTRATION", - "FAILED_REGISTRATION", - "READY" + "oneOf": [ + { + "type": "string", + "description": "Current status of `model_version`", + "enum": [ + "PENDING_REGISTRATION", + "FAILED_REGISTRATION", + "READY" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "ml.ModelVersionTag": { @@ -4217,11 +4881,40 @@ } ] }, + "pipelines.DayOfWeek": { + "oneOf": [ + { + "type": "string", + "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", + "enum": [ + "MONDAY", + "TUESDAY", + "WEDNESDAY", + "THURSDAY", + "FRIDAY", + "SATURDAY", + "SUNDAY" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "pipelines.DeploymentKind": { - "type": "string", - "description": "The deployment method that manages the pipeline:\n- BUNDLE: The pipeline is managed by a Databricks Asset Bundle.\n", - "enum": [ - "BUNDLE" + "oneOf": [ + { + "type": "string", + "description": "The deployment method that manages the pipeline:\n- BUNDLE: The pipeline is managed by a Databricks Asset Bundle.\n", + "enum": [ + "BUNDLE" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "pipelines.FileLibrary": { @@ -4527,11 +5220,19 @@ ] }, "pipelines.PipelineClusterAutoscaleMode": { - "type": "string", - "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n", - "enum": [ - "ENHANCED", - "LEGACY" + "oneOf": [ + { + "type": "string", + "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n", + "enum": [ + "ENHANCED", + "LEGACY" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "pipelines.PipelineDeployment": { @@ -4651,7 +5352,7 @@ "properties": { "days_of_week": { "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", - "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek" + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek" }, "start_hour": { "description": "An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day.\nContinuous pipeline restart is triggered only within a five-hour window starting at this hour.", @@ -4673,19 +5374,6 @@ } ] }, - "pipelines.RestartWindowDaysOfWeek": { - "type": "string", - "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", - "enum": [ - "MONDAY", - "TUESDAY", - "WEDNESDAY", - "THURSDAY", - "FRIDAY", - "SATURDAY", - "SUNDAY" - ] - }, "pipelines.SchemaSpec": { "oneOf": [ { @@ -4793,11 +5481,19 @@ ] }, "pipelines.TableSpecificConfigScdType": { - "type": "string", - "description": "The SCD type to use to ingest the table.", - "enum": [ - "SCD_TYPE_1", - "SCD_TYPE_2" + "oneOf": [ + { + "type": "string", + "description": "The SCD type to use to ingest the table.", + "enum": [ + "SCD_TYPE_1", + "SCD_TYPE_2" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.Ai21LabsConfig": { @@ -4904,11 +5600,19 @@ ] }, "serving.AiGatewayGuardrailPiiBehaviorBehavior": { - "type": "string", - "description": "Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned.", - "enum": [ - "NONE", - "BLOCK" + "oneOf": [ + { + "type": "string", + "description": "Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned.", + "enum": [ + "NONE", + "BLOCK" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AiGatewayGuardrails": { @@ -4994,18 +5698,34 @@ ] }, "serving.AiGatewayRateLimitKey": { - "type": "string", - "description": "Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", - "enum": [ - "user", - "endpoint" + "oneOf": [ + { + "type": "string", + "description": "Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", + "enum": [ + "user", + "endpoint" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AiGatewayRateLimitRenewalPeriod": { - "type": "string", - "description": "Renewal period field for a rate limit. Currently, only 'minute' is supported.", - "enum": [ - "minute" + "oneOf": [ + { + "type": "string", + "description": "Renewal period field for a rate limit. Currently, only 'minute' is supported.", + "enum": [ + "minute" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AiGatewayUsageTrackingConfig": { @@ -5069,13 +5789,21 @@ ] }, "serving.AmazonBedrockConfigBedrockProvider": { - "type": "string", - "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.", - "enum": [ - "anthropic", - "cohere", - "ai21labs", - "amazon" + "oneOf": [ + { + "type": "string", + "description": "The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.", + "enum": [ + "anthropic", + "cohere", + "ai21labs", + "amazon" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.AnthropicConfig": { @@ -5304,17 +6032,25 @@ ] }, "serving.ExternalModelProvider": { - "type": "string", - "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n", - "enum": [ - "ai21labs", - "anthropic", - "amazon-bedrock", - "cohere", - "databricks-model-serving", - "google-cloud-vertex-ai", - "openai", - "palm" + "oneOf": [ + { + "type": "string", + "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',\n'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.\",\n", + "enum": [ + "ai21labs", + "anthropic", + "amazon-bedrock", + "cohere", + "databricks-model-serving", + "google-cloud-vertex-ai", + "openai", + "palm" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.GoogleCloudVertexAiConfig": { @@ -5458,18 +6194,34 @@ ] }, "serving.RateLimitKey": { - "type": "string", - "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", - "enum": [ - "user", - "endpoint" + "oneOf": [ + { + "type": "string", + "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", + "enum": [ + "user", + "endpoint" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.RateLimitRenewalPeriod": { - "type": "string", - "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.", - "enum": [ - "minute" + "oneOf": [ + { + "type": "string", + "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.", + "enum": [ + "minute" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.Route": { @@ -5616,23 +6368,39 @@ ] }, "serving.ServedModelInputWorkloadSize": { - "type": "string", - "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n", - "enum": [ - "Small", - "Medium", - "Large" + "oneOf": [ + { + "type": "string", + "description": "The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.\n", + "enum": [ + "Small", + "Medium", + "Large" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.ServedModelInputWorkloadType": { - "type": "string", - "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", - "enum": [ - "CPU", - "GPU_SMALL", - "GPU_MEDIUM", - "GPU_LARGE", - "MULTIGPU_MEDIUM" + "oneOf": [ + { + "type": "string", + "description": "The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n\"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n", + "enum": [ + "CPU", + "GPU_SMALL", + "GPU_MEDIUM", + "GPU_LARGE", + "MULTIGPU_MEDIUM" + ] + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } ] }, "serving.TrafficConfig": { @@ -5718,6 +6486,20 @@ "cli": { "bundle": { "config": { + "resources.App": { + "oneOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/github.com/databricks/cli/bundle/config/resources.App" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "resources.Cluster": { "oneOf": [ { @@ -5947,6 +6729,20 @@ } } }, + "interface": { + "oneOf": [ + { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/interface" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "string": { "oneOf": [ { @@ -6015,6 +6811,20 @@ }, "databricks-sdk-go": { "service": { + "apps.AppResource": { + "oneOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/apps.AppResource" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "catalog.MonitorMetric": { "oneOf": [ { @@ -6225,6 +7035,20 @@ } ] }, + "pipelines.DayOfWeek": { + "oneOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "pipelines.IngestionConfig": { "oneOf": [ { @@ -6281,20 +7105,6 @@ } ] }, - "pipelines.RestartWindowDaysOfWeek": { - "oneOf": [ - { - "type": "array", - "items": { - "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek" - } - }, - { - "type": "string", - "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" - } - ] - }, "serving.AiGatewayRateLimit": { "oneOf": [ { @@ -6459,5 +7269,5 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Workspace" } }, - "additionalProperties": false + "additionalProperties": {} } \ No newline at end of file diff --git a/bundle/tests/apps/databricks.yml b/bundle/tests/apps/databricks.yml new file mode 100644 index 000000000..ad7e93006 --- /dev/null +++ b/bundle/tests/apps/databricks.yml @@ -0,0 +1,71 @@ +bundle: + name: apps + +workspace: + host: https://acme.cloud.databricks.com/ + +variables: + app_config: + type: complex + default: + command: + - "python" + - "app.py" + env: + - name: SOME_ENV_VARIABLE + value: "Some value" + +resources: + apps: + my_app: + name: "my-app" + description: "My App" + source_code_path: ./app + config: ${var.app_config} + + resources: + - name: "my-sql-warehouse" + sql_warehouse: + id: 1234 + permission: "CAN_USE" + - name: "my-job" + job: + id: 5678 + permission: "CAN_MANAGE_RUN" + permissions: + - user_name: "foo@bar.com" + level: "CAN_VIEW" + - service_principal_name: "my_sp" + level: "CAN_MANAGE" + + +targets: + default: + + development: + variables: + app_config: + command: + - "python" + - "dev.py" + env: + - name: SOME_ENV_VARIABLE_2 + value: "Some value 2" + resources: + apps: + my_app: + source_code_path: ./app-dev + resources: + - name: "my-sql-warehouse" + sql_warehouse: + id: 1234 + permission: "CAN_MANAGE" + - name: "my-job" + job: + id: 5678 + permission: "CAN_MANAGE" + - name: "my-secret" + secret: + key: "key" + scope: "scope" + permission: "CAN_USE" diff --git a/bundle/tests/apps_test.go b/bundle/tests/apps_test.go new file mode 100644 index 000000000..7fee60d14 --- /dev/null +++ b/bundle/tests/apps_test.go @@ -0,0 +1,60 @@ +package config_tests + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/stretchr/testify/assert" +) + +func TestApps(t *testing.T) { + b := load(t, "./apps") + assert.Equal(t, "apps", b.Config.Bundle.Name) + + diags := bundle.Apply(context.Background(), b, + bundle.Seq( + mutator.SetVariables(), + mutator.ResolveVariableReferences("variables"), + )) + assert.Empty(t, diags) + + app := b.Config.Resources.Apps["my_app"] + assert.Equal(t, "my-app", app.Name) + assert.Equal(t, "My App", app.Description) + assert.Equal(t, []any{"python", "app.py"}, app.Config["command"]) + assert.Equal(t, []any{map[string]any{"name": "SOME_ENV_VARIABLE", "value": "Some value"}}, app.Config["env"]) + + assert.Len(t, app.Resources, 2) + assert.Equal(t, "1234", app.Resources[0].SqlWarehouse.Id) + assert.Equal(t, "CAN_USE", string(app.Resources[0].SqlWarehouse.Permission)) + assert.Equal(t, "5678", app.Resources[1].Job.Id) + assert.Equal(t, "CAN_MANAGE_RUN", string(app.Resources[1].Job.Permission)) +} + +func TestAppsOverride(t *testing.T) { + b := loadTarget(t, "./apps", "development") + assert.Equal(t, "apps", b.Config.Bundle.Name) + + diags := bundle.Apply(context.Background(), b, + bundle.Seq( + mutator.SetVariables(), + mutator.ResolveVariableReferences("variables"), + )) + assert.Empty(t, diags) + app := b.Config.Resources.Apps["my_app"] + assert.Equal(t, "my-app", app.Name) + assert.Equal(t, "My App", app.Description) + assert.Equal(t, []any{"python", "dev.py"}, app.Config["command"]) + assert.Equal(t, []any{map[string]any{"name": "SOME_ENV_VARIABLE_2", "value": "Some value 2"}}, app.Config["env"]) + + assert.Len(t, app.Resources, 3) + assert.Equal(t, "1234", app.Resources[0].SqlWarehouse.Id) + assert.Equal(t, "CAN_MANAGE", string(app.Resources[0].SqlWarehouse.Permission)) + assert.Equal(t, "5678", app.Resources[1].Job.Id) + assert.Equal(t, "CAN_MANAGE", string(app.Resources[1].Job.Permission)) + assert.Equal(t, "key", app.Resources[2].Secret.Key) + assert.Equal(t, "scope", app.Resources[2].Secret.Scope) + assert.Equal(t, "CAN_USE", string(app.Resources[2].Secret.Permission)) +} diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index bb68b3059..9b246b7cc 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -47,6 +47,7 @@ func loadTargetWithDiags(path, env string) (*bundle.Bundle, diag.Diagnostics) { mutator.MergeJobParameters(), mutator.MergeJobTasks(), mutator.MergePipelineClusters(), + mutator.MergeApps(), )) return b, diags } diff --git a/bundle/tests/path_translation_test.go b/bundle/tests/path_translation_test.go deleted file mode 100644 index 05702d2a2..000000000 --- a/bundle/tests/path_translation_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package config_tests - -import ( - "context" - "path/filepath" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestPathTranslationFallback(t *testing.T) { - b := loadTarget(t, "./path_translation/fallback", "development") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - require.NoError(t, diags.Error()) - - j := b.Config.Resources.Jobs["my_job"] - assert.Len(t, j.Tasks, 6) - - assert.Equal(t, "notebook_example", filepath.ToSlash(j.Tasks[0].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[0].NotebookTask.NotebookPath)) - - assert.Equal(t, "spark_python_example", filepath.ToSlash(j.Tasks[1].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[1].SparkPythonTask.PythonFile)) - - assert.Equal(t, "dbt_example", filepath.ToSlash(j.Tasks[2].TaskKey)) - assert.Equal(t, "src/dbt_project", filepath.ToSlash(j.Tasks[2].DbtTask.ProjectDirectory)) - - assert.Equal(t, "sql_example", filepath.ToSlash(j.Tasks[3].TaskKey)) - assert.Equal(t, "src/sql.sql", filepath.ToSlash(j.Tasks[3].SqlTask.File.Path)) - - assert.Equal(t, "python_wheel_example", filepath.ToSlash(j.Tasks[4].TaskKey)) - assert.Equal(t, "dist/wheel1.whl", filepath.ToSlash(j.Tasks[4].Libraries[0].Whl)) - assert.Equal(t, "dist/wheel2.whl", filepath.ToSlash(j.Tasks[4].Libraries[1].Whl)) - - assert.Equal(t, "spark_jar_example", filepath.ToSlash(j.Tasks[5].TaskKey)) - assert.Equal(t, "target/jar1.jar", filepath.ToSlash(j.Tasks[5].Libraries[0].Jar)) - assert.Equal(t, "target/jar2.jar", filepath.ToSlash(j.Tasks[5].Libraries[1].Jar)) - - p := b.Config.Resources.Pipelines["my_pipeline"] - assert.Len(t, p.Libraries, 4) - - assert.Equal(t, "src/file1.py", filepath.ToSlash(p.Libraries[0].File.Path)) - assert.Equal(t, "src/notebook1", filepath.ToSlash(p.Libraries[1].Notebook.Path)) - assert.Equal(t, "src/file2.py", filepath.ToSlash(p.Libraries[2].File.Path)) - assert.Equal(t, "src/notebook2", filepath.ToSlash(p.Libraries[3].Notebook.Path)) -} - -func TestPathTranslationFallbackError(t *testing.T) { - b := loadTarget(t, "./path_translation/fallback", "error") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.ErrorContains(t, diags.Error(), `notebook this value is overridden not found`) -} - -func TestPathTranslationNominal(t *testing.T) { - b := loadTarget(t, "./path_translation/nominal", "development") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.NoError(t, diags.Error()) - - j := b.Config.Resources.Jobs["my_job"] - assert.Len(t, j.Tasks, 8) - - assert.Equal(t, "notebook_example", filepath.ToSlash(j.Tasks[0].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[0].NotebookTask.NotebookPath)) - - assert.Equal(t, "spark_python_example", filepath.ToSlash(j.Tasks[1].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[1].SparkPythonTask.PythonFile)) - - assert.Equal(t, "dbt_example", filepath.ToSlash(j.Tasks[2].TaskKey)) - assert.Equal(t, "src/dbt_project", filepath.ToSlash(j.Tasks[2].DbtTask.ProjectDirectory)) - - assert.Equal(t, "sql_example", filepath.ToSlash(j.Tasks[3].TaskKey)) - assert.Equal(t, "src/sql.sql", filepath.ToSlash(j.Tasks[3].SqlTask.File.Path)) - - assert.Equal(t, "python_wheel_example", filepath.ToSlash(j.Tasks[4].TaskKey)) - assert.Equal(t, "dist/wheel1.whl", filepath.ToSlash(j.Tasks[4].Libraries[0].Whl)) - assert.Equal(t, "dist/wheel2.whl", filepath.ToSlash(j.Tasks[4].Libraries[1].Whl)) - - assert.Equal(t, "spark_jar_example", filepath.ToSlash(j.Tasks[5].TaskKey)) - assert.Equal(t, "target/jar1.jar", filepath.ToSlash(j.Tasks[5].Libraries[0].Jar)) - assert.Equal(t, "target/jar2.jar", filepath.ToSlash(j.Tasks[5].Libraries[1].Jar)) - - assert.Equal(t, "for_each_notebook_example", filepath.ToSlash(j.Tasks[6].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[6].ForEachTask.Task.NotebookTask.NotebookPath)) - - assert.Equal(t, "for_each_spark_python_example", filepath.ToSlash(j.Tasks[7].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[7].ForEachTask.Task.SparkPythonTask.PythonFile)) - - p := b.Config.Resources.Pipelines["my_pipeline"] - assert.Len(t, p.Libraries, 4) - - assert.Equal(t, "src/file1.py", filepath.ToSlash(p.Libraries[0].File.Path)) - assert.Equal(t, "src/notebook1", filepath.ToSlash(p.Libraries[1].Notebook.Path)) - assert.Equal(t, "src/file2.py", filepath.ToSlash(p.Libraries[2].File.Path)) - assert.Equal(t, "src/notebook2", filepath.ToSlash(p.Libraries[3].Notebook.Path)) -} - -func TestPathTranslationNominalError(t *testing.T) { - b := loadTarget(t, "./path_translation/nominal", "error") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.ErrorContains(t, diags.Error(), `notebook this value is overridden not found`) -} diff --git a/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py b/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py b/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py b/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/quality_monitor_test.go b/bundle/tests/quality_monitor_test.go deleted file mode 100644 index e95c7b7c1..000000000 --- a/bundle/tests/quality_monitor_test.go +++ /dev/null @@ -1,59 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/databricks-sdk-go/service/catalog" - "github.com/stretchr/testify/assert" -) - -func assertExpectedMonitor(t *testing.T, p *resources.QualityMonitor) { - assert.Equal(t, "timestamp", p.InferenceLog.TimestampCol) - assert.Equal(t, "prediction", p.InferenceLog.PredictionCol) - assert.Equal(t, "model_id", p.InferenceLog.ModelIdCol) - assert.Equal(t, catalog.MonitorInferenceLogProblemType("PROBLEM_TYPE_REGRESSION"), p.InferenceLog.ProblemType) -} - -func TestMonitorTableNames(t *testing.T) { - b := loadTarget(t, "./quality_monitor", "development") - assert.Len(t, b.Config.Resources.QualityMonitors, 1) - assert.Equal(t, config.Development, b.Config.Bundle.Mode) - - p := b.Config.Resources.QualityMonitors["my_monitor"] - assert.Equal(t, "main.test.dev", p.TableName) - assert.Equal(t, "/Shared/provider-test/databricks_monitoring/main.test.thing1", p.AssetsDir) - assert.Equal(t, "main.dev", p.OutputSchemaName) - - assertExpectedMonitor(t, p) -} - -func TestMonitorStaging(t *testing.T) { - b := loadTarget(t, "./quality_monitor", "staging") - assert.Len(t, b.Config.Resources.QualityMonitors, 1) - - p := b.Config.Resources.QualityMonitors["my_monitor"] - assert.Equal(t, "main.test.staging", p.TableName) - assert.Equal(t, "/Shared/provider-test/databricks_monitoring/main.test.thing1", p.AssetsDir) - assert.Equal(t, "main.staging", p.OutputSchemaName) - - assertExpectedMonitor(t, p) -} - -func TestMonitorProduction(t *testing.T) { - b := loadTarget(t, "./quality_monitor", "production") - assert.Len(t, b.Config.Resources.QualityMonitors, 1) - - p := b.Config.Resources.QualityMonitors["my_monitor"] - assert.Equal(t, "main.test.prod", p.TableName) - assert.Equal(t, "/Shared/provider-test/databricks_monitoring/main.test.thing1", p.AssetsDir) - assert.Equal(t, "main.prod", p.OutputSchemaName) - - inferenceLog := p.InferenceLog - assert.Equal(t, []string{"1 day", "1 hour"}, inferenceLog.Granularities) - assert.Equal(t, "timestamp_prod", p.InferenceLog.TimestampCol) - assert.Equal(t, "prediction_prod", p.InferenceLog.PredictionCol) - assert.Equal(t, "model_id_prod", p.InferenceLog.ModelIdCol) - assert.Equal(t, catalog.MonitorInferenceLogProblemType("PROBLEM_TYPE_REGRESSION"), p.InferenceLog.ProblemType) -} diff --git a/bundle/tests/relative_path_translation_test.go b/bundle/tests/relative_path_translation_test.go deleted file mode 100644 index 0f553ac3d..000000000 --- a/bundle/tests/relative_path_translation_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestRelativePathTranslationDefault(t *testing.T) { - b, diags := initializeTarget(t, "./relative_path_translation", "default") - require.NoError(t, diags.Error()) - - t0 := b.Config.Resources.Jobs["job"].Tasks[0] - assert.Equal(t, "/Workspace/remote/src/file1.py", t0.SparkPythonTask.PythonFile) - t1 := b.Config.Resources.Jobs["job"].Tasks[1] - assert.Equal(t, "/Workspace/remote/src/file1.py", t1.SparkPythonTask.PythonFile) -} - -func TestRelativePathTranslationOverride(t *testing.T) { - b, diags := initializeTarget(t, "./relative_path_translation", "override") - require.NoError(t, diags.Error()) - - t0 := b.Config.Resources.Jobs["job"].Tasks[0] - assert.Equal(t, "/Workspace/remote/src/file2.py", t0.SparkPythonTask.PythonFile) - t1 := b.Config.Resources.Jobs["job"].Tasks[1] - assert.Equal(t, "/Workspace/remote/src/file2.py", t1.SparkPythonTask.PythonFile) -} diff --git a/bundle/tests/relative_path_with_includes/artifact_a/.gitkeep b/bundle/tests/relative_path_with_includes/artifact_a/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bundle/tests/relative_path_with_includes/subfolder/artifact_b/.gitkeep b/bundle/tests/relative_path_with_includes/subfolder/artifact_b/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/bundle/tests/relative_path_with_includes_test.go b/bundle/tests/relative_path_with_includes_test.go index 6e13628be..7249cac1f 100644 --- a/bundle/tests/relative_path_with_includes_test.go +++ b/bundle/tests/relative_path_with_includes_test.go @@ -17,8 +17,8 @@ func TestRelativePathsWithIncludes(t *testing.T) { diags := bundle.Apply(context.Background(), b, m) assert.NoError(t, diags.Error()) - assert.Equal(t, "artifact_a", b.Config.Artifacts["test_a"].Path) - assert.Equal(t, filepath.Join("subfolder", "artifact_b"), b.Config.Artifacts["test_b"].Path) + assert.Equal(t, b.SyncRootPath+"/artifact_a", b.Config.Artifacts["test_a"].Path) + assert.Equal(t, b.SyncRootPath+"/subfolder/artifact_b", b.Config.Artifacts["test_b"].Path) assert.ElementsMatch( t, @@ -37,6 +37,6 @@ func TestRelativePathsWithIncludes(t *testing.T) { b.Config.Sync.Exclude, ) - assert.Equal(t, filepath.Join("dist", "job_a.whl"), b.Config.Resources.Jobs["job_a"].Tasks[0].Libraries[0].Whl) - assert.Equal(t, filepath.Join("subfolder", "dist", "job_b.whl"), b.Config.Resources.Jobs["job_b"].Tasks[0].Libraries[0].Whl) + assert.Equal(t, "dist/job_a.whl", b.Config.Resources.Jobs["job_a"].Tasks[0].Libraries[0].Whl) + assert.Equal(t, "subfolder/dist/job_b.whl", b.Config.Resources.Jobs["job_b"].Tasks[0].Libraries[0].Whl) } diff --git a/cmd/account/federation-policy/federation-policy.go b/cmd/account/federation-policy/federation-policy.go index d78ac709a..e47bf8324 100755 --- a/cmd/account/federation-policy/federation-policy.go +++ b/cmd/account/federation-policy/federation-policy.go @@ -110,8 +110,9 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&createReq.PolicyId, "policy-id", createReq.PolicyId, `The identifier for the federation policy.`) cmd.Flags().StringVar(&createReq.Policy.Description, "description", createReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy cmd.Use = "create" @@ -180,7 +181,10 @@ func newDelete() *cobra.Command { cmd.Use = "delete POLICY_ID" cmd.Short = `Delete account federation policy.` - cmd.Long = `Delete account federation policy.` + cmd.Long = `Delete account federation policy. + + Arguments: + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -233,7 +237,10 @@ func newGet() *cobra.Command { cmd.Use = "get POLICY_ID" cmd.Short = `Get account federation policy.` - cmd.Long = `Get account federation policy.` + cmd.Long = `Get account federation policy. + + Arguments: + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -338,25 +345,22 @@ func newUpdate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&updateReq.UpdateMask, "update-mask", updateReq.UpdateMask, `The field mask specifies which fields of the policy to update.`) cmd.Flags().StringVar(&updateReq.Policy.Description, "description", updateReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy - cmd.Use = "update POLICY_ID UPDATE_MASK" + cmd.Use = "update POLICY_ID" cmd.Short = `Update account federation policy.` cmd.Long = `Update account federation policy. Arguments: - POLICY_ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - check := root.ExactArgs(2) + check := root.ExactArgs(1) return check(cmd, args) } @@ -378,7 +382,6 @@ func newUpdate() *cobra.Command { } } updateReq.PolicyId = args[0] - updateReq.UpdateMask = args[1] response, err := a.FederationPolicy.Update(ctx, updateReq) if err != nil { diff --git a/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go b/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go index 77f73bcd0..df36de239 100755 --- a/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go +++ b/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go @@ -117,8 +117,9 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&createReq.PolicyId, "policy-id", createReq.PolicyId, `The identifier for the federation policy.`) cmd.Flags().StringVar(&createReq.Policy.Description, "description", createReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy cmd.Use = "create SERVICE_PRINCIPAL_ID" @@ -198,7 +199,7 @@ func newDelete() *cobra.Command { Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: ` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -259,7 +260,7 @@ func newGet() *cobra.Command { Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: ` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -376,26 +377,23 @@ func newUpdate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&updateReq.UpdateMask, "update-mask", updateReq.UpdateMask, `The field mask specifies which fields of the policy to update.`) cmd.Flags().StringVar(&updateReq.Policy.Description, "description", updateReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy - cmd.Use = "update SERVICE_PRINCIPAL_ID POLICY_ID UPDATE_MASK" + cmd.Use = "update SERVICE_PRINCIPAL_ID POLICY_ID" cmd.Short = `Update service principal federation policy.` cmd.Long = `Update service principal federation policy. Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - check := root.ExactArgs(3) + check := root.ExactArgs(2) return check(cmd, args) } @@ -421,7 +419,6 @@ func newUpdate() *cobra.Command { return fmt.Errorf("invalid SERVICE_PRINCIPAL_ID: %s", args[0]) } updateReq.PolicyId = args[1] - updateReq.UpdateMask = args[2] response, err := a.ServicePrincipalFederationPolicy.Update(ctx, updateReq) if err != nil { diff --git a/cmd/bundle/generate.go b/cmd/bundle/generate.go index 7dea19ff9..d09c6feb4 100644 --- a/cmd/bundle/generate.go +++ b/cmd/bundle/generate.go @@ -17,6 +17,7 @@ func newGenerateCommand() *cobra.Command { cmd.AddCommand(generate.NewGenerateJobCommand()) cmd.AddCommand(generate.NewGeneratePipelineCommand()) cmd.AddCommand(generate.NewGenerateDashboardCommand()) + cmd.AddCommand(generate.NewGenerateAppCommand()) cmd.PersistentFlags().StringVar(&key, "key", "", `resource key to use for the generated configuration`) return cmd } diff --git a/cmd/bundle/generate/app.go b/cmd/bundle/generate/app.go new file mode 100644 index 000000000..9dbd4fe46 --- /dev/null +++ b/cmd/bundle/generate/app.go @@ -0,0 +1,166 @@ +package generate + +import ( + "context" + "errors" + "fmt" + "io" + "io/fs" + "path/filepath" + + "github.com/databricks/cli/bundle/config/generate" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/yamlsaver" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/textutil" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/spf13/cobra" + + "gopkg.in/yaml.v3" +) + +func NewGenerateAppCommand() *cobra.Command { + var configDir string + var sourceDir string + var appName string + var force bool + + cmd := &cobra.Command{ + Use: "app", + Short: "Generate bundle configuration for a Databricks app", + } + + cmd.Flags().StringVar(&appName, "existing-app-name", "", `App name to generate config for`) + cmd.MarkFlagRequired("existing-app-name") + + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Directory path where the output bundle config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src/app", `Directory path where the app files will be stored`) + cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) + + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + b, diags := root.MustConfigureBundle(cmd) + if err := diags.Error(); err != nil { + return diags.Error() + } + + w := b.WorkspaceClient() + cmdio.LogString(ctx, fmt.Sprintf("Loading app '%s' configuration", appName)) + app, err := w.Apps.Get(ctx, apps.GetAppRequest{Name: appName}) + if err != nil { + return err + } + + // Making sure the config directory and source directory are absolute paths. + if !filepath.IsAbs(configDir) { + configDir = filepath.Join(b.BundleRootPath, configDir) + } + + if !filepath.IsAbs(sourceDir) { + sourceDir = filepath.Join(b.BundleRootPath, sourceDir) + } + + downloader := newDownloader(w, sourceDir, configDir) + + sourceCodePath := app.DefaultSourceCodePath + err = downloader.markDirectoryForDownload(ctx, &sourceCodePath) + if err != nil { + return err + } + + appConfig, err := getAppConfig(ctx, app, w) + if err != nil { + return fmt.Errorf("failed to get app config: %w", err) + } + + // Making sure the source code path is relative to the config directory. + rel, err := filepath.Rel(configDir, sourceDir) + if err != nil { + return err + } + + v, err := generate.ConvertAppToValue(app, filepath.ToSlash(rel), appConfig) + if err != nil { + return err + } + + appKey := cmd.Flag("key").Value.String() + if appKey == "" { + appKey = textutil.NormalizeString(app.Name) + } + + result := map[string]dyn.Value{ + "resources": dyn.V(map[string]dyn.Value{ + "apps": dyn.V(map[string]dyn.Value{ + appKey: v, + }), + }), + } + + // If there are app.yaml or app.yml files in the source code path, they will be downloaded but we don't want to include them in the bundle. + // We include this configuration inline, so we need to remove these files. + for _, configFile := range []string{"app.yml", "app.yaml"} { + delete(downloader.files, filepath.Join(sourceDir, configFile)) + } + + err = downloader.FlushToDisk(ctx, force) + if err != nil { + return err + } + + filename := filepath.Join(configDir, appKey+".app.yml") + + saver := yamlsaver.NewSaver() + err = saver.SaveAsYAML(result, filename, force) + if err != nil { + return err + } + + cmdio.LogString(ctx, "App configuration successfully saved to "+filename) + return nil + } + + return cmd +} + +func getAppConfig(ctx context.Context, app *apps.App, w *databricks.WorkspaceClient) (map[string]any, error) { + sourceCodePath := app.DefaultSourceCodePath + + f, err := filer.NewWorkspaceFilesClient(w, sourceCodePath) + if err != nil { + return nil, err + } + + // The app config is stored in app.yml or app.yaml file in the source code path. + configFileNames := []string{"app.yml", "app.yaml"} + for _, configFile := range configFileNames { + r, err := f.Read(ctx, configFile) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + continue + } + return nil, err + } + defer r.Close() + + cmdio.LogString(ctx, "Reading app configuration from "+configFile) + content, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + var appConfig map[string]any + err = yaml.Unmarshal(content, &appConfig) + if err != nil { + cmdio.LogString(ctx, fmt.Sprintf("Failed to parse app configuration:\n%s\nerr: %v", string(content), err)) + return nil, nil + } + + return appConfig, nil + } + + return nil, nil +} diff --git a/cmd/bundle/generate/dashboard.go b/cmd/bundle/generate/dashboard.go index fa3c91b2a..d56d246c2 100644 --- a/cmd/bundle/generate/dashboard.go +++ b/cmd/bundle/generate/dashboard.go @@ -441,8 +441,8 @@ func NewGenerateDashboardCommand() *cobra.Command { cmd.Flags().MarkHidden("existing-dashboard-id") // Output flags. - cmd.Flags().StringVarP(&d.resourceDir, "resource-dir", "d", "./resources", `directory to write the configuration to`) - cmd.Flags().StringVarP(&d.dashboardDir, "dashboard-dir", "s", "./src", `directory to write the dashboard representation to`) + cmd.Flags().StringVarP(&d.resourceDir, "resource-dir", "d", "resources", `directory to write the configuration to`) + cmd.Flags().StringVarP(&d.dashboardDir, "dashboard-dir", "s", "src", `directory to write the dashboard representation to`) cmd.Flags().BoolVarP(&d.force, "force", "f", false, `force overwrite existing files in the output directory`) // Exactly one of the lookup flags must be provided. diff --git a/cmd/bundle/generate/job.go b/cmd/bundle/generate/job.go index 827d270e5..d97891cd5 100644 --- a/cmd/bundle/generate/job.go +++ b/cmd/bundle/generate/job.go @@ -32,13 +32,8 @@ func NewGenerateJobCommand() *cobra.Command { cmd.Flags().Int64Var(&jobId, "existing-job-id", 0, `Job ID of the job to generate config for`) cmd.MarkFlagRequired("existing-job-id") - wd, err := os.Getwd() - if err != nil { - wd = "." - } - - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join(wd, "resources"), `Dir path where the output config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join(wd, "src"), `Dir path where the downloaded files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Dir path where the output config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src", `Dir path where the downloaded files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/generate/pipeline.go b/cmd/bundle/generate/pipeline.go index 863b0b2f7..1d2c345d6 100644 --- a/cmd/bundle/generate/pipeline.go +++ b/cmd/bundle/generate/pipeline.go @@ -32,13 +32,8 @@ func NewGeneratePipelineCommand() *cobra.Command { cmd.Flags().StringVar(&pipelineId, "existing-pipeline-id", "", `ID of the pipeline to generate config for`) cmd.MarkFlagRequired("existing-pipeline-id") - wd, err := os.Getwd() - if err != nil { - wd = "." - } - - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join(wd, "resources"), `Dir path where the output config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join(wd, "src"), `Dir path where the downloaded files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Dir path where the output config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src", `Dir path where the downloaded files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/generate/utils.go b/cmd/bundle/generate/utils.go index dbfad9438..c2c9bbb55 100644 --- a/cmd/bundle/generate/utils.go +++ b/cmd/bundle/generate/utils.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/databricks/databricks-sdk-go/service/workspace" "golang.org/x/sync/errgroup" ) @@ -63,6 +64,37 @@ func (n *downloader) markFileForDownload(ctx context.Context, filePath *string) return nil } +func (n *downloader) markDirectoryForDownload(ctx context.Context, dirPath *string) error { + _, err := n.w.Workspace.GetStatusByPath(ctx, *dirPath) + if err != nil { + return err + } + + objects, err := n.w.Workspace.RecursiveList(ctx, *dirPath) + if err != nil { + return err + } + + for _, obj := range objects { + if obj.ObjectType == workspace.ObjectTypeDirectory { + continue + } + + err := n.markFileForDownload(ctx, &obj.Path) + if err != nil { + return err + } + } + + rel, err := filepath.Rel(n.configDir, n.sourceDir) + if err != nil { + return err + } + + *dirPath = rel + return nil +} + func (n *downloader) markNotebookForDownload(ctx context.Context, notebookPath *string) error { info, err := n.w.Workspace.GetStatusByPath(ctx, *notebookPath) if err != nil { @@ -106,9 +138,7 @@ func (n *downloader) FlushToDisk(ctx context.Context, force bool) error { } errs, errCtx := errgroup.WithContext(ctx) - for k, v := range n.files { - targetPath := k - filePath := v + for targetPath, filePath := range n.files { errs.Go(func() error { reader, err := n.w.Workspace.Download(errCtx, filePath) if err != nil { diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 687c141ec..1911abe19 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -1,175 +1,15 @@ package bundle import ( - "context" "errors" "fmt" - "io/fs" - "os" - "path/filepath" - "slices" - "strings" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/dbr" - "github.com/databricks/cli/libs/filer" - "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/template" "github.com/spf13/cobra" ) -var gitUrlPrefixes = []string{ - "https://", - "git@", -} - -type nativeTemplate struct { - name string - gitUrl string - description string - aliases []string - hidden bool -} - -const customTemplate = "custom..." - -var nativeTemplates = []nativeTemplate{ - { - name: "default-python", - description: "The default Python template for Notebooks / Delta Live Tables / Workflows", - }, - { - name: "default-sql", - description: "The default SQL template for .sql files that run with Databricks SQL", - }, - { - name: "dbt-sql", - description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", - }, - { - name: "mlops-stacks", - gitUrl: "https://github.com/databricks/mlops-stacks", - description: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)", - aliases: []string{"mlops-stack"}, - }, - { - name: "default-pydabs", - gitUrl: "https://databricks.github.io/workflows-authoring-toolkit/pydabs-template.git", - hidden: true, - description: "The default PyDABs template", - }, - { - name: customTemplate, - description: "Bring your own template", - }, -} - -// Return template descriptions for command-line help -func nativeTemplateHelpDescriptions() string { - var lines []string - for _, template := range nativeTemplates { - if template.name != customTemplate && !template.hidden { - lines = append(lines, fmt.Sprintf("- %s: %s", template.name, template.description)) - } - } - return strings.Join(lines, "\n") -} - -// Return template options for an interactive prompt -func nativeTemplateOptions() []cmdio.Tuple { - names := make([]cmdio.Tuple, 0, len(nativeTemplates)) - for _, template := range nativeTemplates { - if template.hidden { - continue - } - tuple := cmdio.Tuple{ - Name: template.name, - Id: template.description, - } - names = append(names, tuple) - } - return names -} - -func getNativeTemplateByDescription(description string) string { - for _, template := range nativeTemplates { - if template.description == description { - return template.name - } - } - return "" -} - -func getUrlForNativeTemplate(name string) string { - for _, template := range nativeTemplates { - if template.name == name { - return template.gitUrl - } - if slices.Contains(template.aliases, name) { - return template.gitUrl - } - } - return "" -} - -func getFsForNativeTemplate(name string) (fs.FS, error) { - builtin, err := template.Builtin() - if err != nil { - return nil, err - } - - // If this is a built-in template, the return value will be non-nil. - var templateFS fs.FS - for _, entry := range builtin { - if entry.Name == name { - templateFS = entry.FS - break - } - } - - return templateFS, nil -} - -func isRepoUrl(url string) bool { - result := false - for _, prefix := range gitUrlPrefixes { - if strings.HasPrefix(url, prefix) { - result = true - break - } - } - return result -} - -// Computes the repo name from the repo URL. Treats the last non empty word -// when splitting at '/' as the repo name. For example: for url git@github.com:databricks/cli.git -// the name would be "cli.git" -func repoName(url string) string { - parts := strings.Split(strings.TrimRight(url, "/"), "/") - return parts[len(parts)-1] -} - -func constructOutputFiler(ctx context.Context, outputDir string) (filer.Filer, error) { - outputDir, err := filepath.Abs(outputDir) - if err != nil { - return nil, err - } - - // If the CLI is running on DBR and we're writing to the workspace file system, - // use the extension-aware workspace filesystem filer to instantiate the template. - // - // It is not possible to write notebooks through the workspace filesystem's FUSE mount. - // Therefore this is the only way we can initialize templates that contain notebooks - // when running the CLI on DBR and initializing a template to the workspace. - // - if strings.HasPrefix(outputDir, "/Workspace/") && dbr.RunsOnRuntime(ctx) { - return filer.NewWorkspaceFilesExtensionsClient(root.WorkspaceClient(ctx), outputDir) - } - - return filer.NewLocalClient(outputDir) -} - func newInitCommand() *cobra.Command { cmd := &cobra.Command{ Use: "init [TEMPLATE_PATH]", @@ -182,7 +22,7 @@ TEMPLATE_PATH optionally specifies which template to use. It can be one of the f - a local file system path with a template directory - a Git repository URL, e.g. https://github.com/my/repository -See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, nativeTemplateHelpDescriptions()), +See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.`, template.HelpDescriptions()), } var configFile string @@ -202,88 +42,32 @@ See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more inf return errors.New("only one of --tag or --branch can be specified") } - // Git ref to use for template initialization - ref := branch - if tag != "" { - ref = tag + var templatePathOrUrl string + if len(args) > 0 { + templatePathOrUrl = args[0] + } + r := template.Resolver{ + TemplatePathOrUrl: templatePathOrUrl, + ConfigFile: configFile, + OutputDir: outputDir, + TemplateDir: templateDir, + Tag: tag, + Branch: branch, } ctx := cmd.Context() - var templatePath string - if len(args) > 0 { - templatePath = args[0] - } else { - var err error - if !cmdio.IsPromptSupported(ctx) { - return errors.New("please specify a template") - } - description, err := cmdio.SelectOrdered(ctx, nativeTemplateOptions(), "Template to use") - if err != nil { - return err - } - templatePath = getNativeTemplateByDescription(description) - } - - outputFiler, err := constructOutputFiler(ctx, outputDir) - if err != nil { - return err - } - - if templatePath == customTemplate { + tmpl, err := r.Resolve(ctx) + if errors.Is(err, template.ErrCustomSelected) { cmdio.LogString(ctx, "Please specify a path or Git repository to use a custom template.") cmdio.LogString(ctx, "See https://docs.databricks.com/en/dev-tools/bundles/templates.html to learn more about custom templates.") return nil } - - // Expand templatePath to a git URL if it's an alias for a known native template - // and we know it's git URL. - if gitUrl := getUrlForNativeTemplate(templatePath); gitUrl != "" { - templatePath = gitUrl - } - - if !isRepoUrl(templatePath) { - if templateDir != "" { - return errors.New("--template-dir can only be used with a Git repository URL") - } - - templateFS, err := getFsForNativeTemplate(templatePath) - if err != nil { - return err - } - - // If this is not a built-in template, then it must be a local file system path. - if templateFS == nil { - templateFS = os.DirFS(templatePath) - } - - // skip downloading the repo because input arg is not a URL. We assume - // it's a path on the local file system in that case - return template.Materialize(ctx, configFile, templateFS, outputFiler) - } - - // Create a temporary directory with the name of the repository. The '*' - // character is replaced by a random string in the generated temporary directory. - repoDir, err := os.MkdirTemp("", repoName(templatePath)+"-*") if err != nil { return err } + defer tmpl.Reader.Cleanup(ctx) - // start the spinner - promptSpinner := cmdio.Spinner(ctx) - promptSpinner <- "Downloading the template\n" - - // TODO: Add automated test that the downloaded git repo is cleaned up. - // Clone the repository in the temporary directory - err = git.Clone(ctx, templatePath, ref, repoDir) - close(promptSpinner) - if err != nil { - return err - } - - // Clean up downloaded repository once the template is materialized. - defer os.RemoveAll(repoDir) - templateFS := os.DirFS(filepath.Join(repoDir, templateDir)) - return template.Materialize(ctx, configFile, templateFS, outputFiler) + return tmpl.Writer.Materialize(ctx, tmpl.Reader) } return cmd } diff --git a/cmd/cmd.go b/cmd/cmd.go index 5b53a4ae5..5d835409f 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -15,7 +15,6 @@ import ( "github.com/databricks/cli/cmd/sync" "github.com/databricks/cli/cmd/version" "github.com/databricks/cli/cmd/workspace" - "github.com/databricks/cli/cmd/workspace/apps" "github.com/spf13/cobra" ) @@ -68,7 +67,6 @@ func New(ctx context.Context) *cobra.Command { // Add other subcommands. cli.AddCommand(api.New()) - cli.AddCommand(apps.New()) cli.AddCommand(auth.New()) cli.AddCommand(bundle.New()) cli.AddCommand(configure.New()) diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py index 6873257d5..a162da342 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py @@ -1 +1 @@ -print(f'setting up important infrastructure') +print(f"setting up important infrastructure") diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py index 769ee73ee..e5866d6ae 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py @@ -2,26 +2,34 @@ import os, sys, json payload = json.loads(sys.argv[1]) -if 'echo' == payload['command']: - json.dump({ - 'command': payload['command'], - 'flags': payload['flags'], - 'env': {k:v for k,v in os.environ.items()} - }, sys.stdout) +if "echo" == payload["command"]: + json.dump( + { + "command": payload["command"], + "flags": payload["flags"], + "env": {k: v for k, v in os.environ.items()}, + }, + sys.stdout, + ) sys.exit(0) -if 'table' == payload['command']: +if "table" == payload["command"]: sys.stderr.write("some intermediate info\n") - json.dump({'records': [ - {'key': 'First', 'value': 'Second'}, - {'key': 'Third', 'value': 'Fourth'}, - ]}, sys.stdout) + json.dump( + { + "records": [ + {"key": "First", "value": "Second"}, + {"key": "Third", "value": "Fourth"}, + ] + }, + sys.stdout, + ) sys.exit(0) -print(f'host is {os.environ["DATABRICKS_HOST"]}') +print(f"host is {os.environ['DATABRICKS_HOST']}") -print(f'[{payload["command"]}] command flags are {payload["flags"]}') +print(f"[{payload['command']}] command flags are {payload['flags']}") -answer = input('What is your name? ') +answer = input("What is your name? ") -print(f'Hello, {answer}!') +print(f"Hello, {answer}!") diff --git a/cmd/workspace/apps/apps.go b/cmd/workspace/apps/apps.go index a103ba7a8..f7c08ece1 100755 --- a/cmd/workspace/apps/apps.go +++ b/cmd/workspace/apps/apps.go @@ -78,6 +78,7 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().BoolVar(&createReq.NoCompute, "no-compute", createReq.NoCompute, `If true, the app will not be started after creation.`) // TODO: complex arg: active_deployment // TODO: complex arg: app_status // TODO: complex arg: compute_status diff --git a/cmd/workspace/apps/overrides.go b/cmd/workspace/apps/overrides.go new file mode 100644 index 000000000..e14068717 --- /dev/null +++ b/cmd/workspace/apps/overrides.go @@ -0,0 +1,28 @@ +package apps + +import ( + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/spf13/cobra" +) + +func listOverride(listCmd *cobra.Command, listReq *apps.ListAppsRequest) { + listCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` + {{header "Name"}} {{header "Url"}} {{header "ComputeStatus"}} {{header "DeploymentStatus"}}`) + listCmd.Annotations["template"] = cmdio.Heredoc(` + {{range .}}{{.Name | green}} {{.Url}} {{if .ComputeStatus}}{{if eq .ComputeStatus.State "ACTIVE"}}{{green "%s" .ComputeStatus.State }}{{else}}{{blue "%s" .ComputeStatus.State}}{{end}}{{end}} {{if .ActiveDeployment}}{{if eq .ActiveDeployment.Status.State "SUCCEEDED"}}{{green "%s" .ActiveDeployment.Status.State }}{{else}}{{blue "%s" .ActiveDeployment.Status.State}}{{end}}{{end}} + {{end}}`) +} + +func listDeploymentsOverride(listDeploymentsCmd *cobra.Command, listDeploymentsReq *apps.ListAppDeploymentsRequest) { + listDeploymentsCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` + {{header "DeploymentId"}} {{header "State"}} {{header "CreatedAt"}}`) + listDeploymentsCmd.Annotations["template"] = cmdio.Heredoc(` + {{range .}}{{.DeploymentId}} {{if eq .Status.State "SUCCEEDED"}}{{green "%s" .Status.State }}{{else}}{{blue "%s" .Status.State}}{{end}} {{.CreateTime}} + {{end}}`) +} + +func init() { + listOverrides = append(listOverrides, listOverride) + listDeploymentsOverrides = append(listDeploymentsOverrides, listDeploymentsOverride) +} diff --git a/cmd/workspace/jobs/jobs.go b/cmd/workspace/jobs/jobs.go index b067937e2..38a88f014 100755 --- a/cmd/workspace/jobs/jobs.go +++ b/cmd/workspace/jobs/jobs.go @@ -625,11 +625,19 @@ func newGet() *cobra.Command { // TODO: short flags + cmd.Flags().StringVar(&getReq.PageToken, "page-token", getReq.PageToken, `Use next_page_token returned from the previous GetJob to request the next page of the job's sub-resources.`) + cmd.Use = "get JOB_ID" cmd.Short = `Get a single job.` cmd.Long = `Get a single job. Retrieves the details for a single job. + + In Jobs API 2.2, requests for a single job support pagination of tasks and + job_clusters when either exceeds 100 elements. Use the next_page_token + field to check for more results and pass its value as the page_token in + subsequent requests. Arrays with fewer than 100 elements in a page will be + empty on later pages. Arguments: JOB_ID: The canonical identifier of the job to retrieve information about. This @@ -847,13 +855,19 @@ func newGetRun() *cobra.Command { cmd.Flags().BoolVar(&getRunReq.IncludeHistory, "include-history", getRunReq.IncludeHistory, `Whether to include the repair history in the response.`) cmd.Flags().BoolVar(&getRunReq.IncludeResolvedValues, "include-resolved-values", getRunReq.IncludeResolvedValues, `Whether to include resolved parameter values in the response.`) - cmd.Flags().StringVar(&getRunReq.PageToken, "page-token", getRunReq.PageToken, `To list the next page of job tasks, set this field to the value of the next_page_token returned in the GetJob response.`) + cmd.Flags().StringVar(&getRunReq.PageToken, "page-token", getRunReq.PageToken, `Use next_page_token returned from the previous GetRun to request the next page of the run's sub-resources.`) cmd.Use = "get-run RUN_ID" cmd.Short = `Get a single job run.` cmd.Long = `Get a single job run. - Retrieve the metadata of a run. + Retrieves the metadata of a run. + + In Jobs API 2.2, requests for a single job run support pagination of tasks + and job_clusters when either exceeds 100 elements. Use the next_page_token + field to check for more results and pass its value as the page_token in + subsequent requests. Arrays with fewer than 100 elements in a page will be + empty on later pages. Arguments: RUN_ID: The canonical identifier of the run for which to retrieve the metadata. diff --git a/cmd/workspace/pipelines/pipelines.go b/cmd/workspace/pipelines/pipelines.go index 38636e83b..e94d4c5a8 100755 --- a/cmd/workspace/pipelines/pipelines.go +++ b/cmd/workspace/pipelines/pipelines.go @@ -974,6 +974,7 @@ func newUpdate() *cobra.Command { cmd.Flags().BoolVar(&updateReq.Photon, "photon", updateReq.Photon, `Whether Photon is enabled for this pipeline.`) cmd.Flags().StringVar(&updateReq.PipelineId, "pipeline-id", updateReq.PipelineId, `Unique identifier for this pipeline.`) // TODO: complex arg: restart_window + // TODO: complex arg: run_as cmd.Flags().StringVar(&updateReq.Schema, "schema", updateReq.Schema, `The default schema (database) where tables are read from or published to.`) cmd.Flags().BoolVar(&updateReq.Serverless, "serverless", updateReq.Serverless, `Whether serverless compute is enabled for this pipeline.`) cmd.Flags().StringVar(&updateReq.Storage, "storage", updateReq.Storage, `DBFS root directory for storing checkpoints and tables.`) diff --git a/cmd/workspace/shares/shares.go b/cmd/workspace/shares/shares.go index f70963f29..62c3407f4 100755 --- a/cmd/workspace/shares/shares.go +++ b/cmd/workspace/shares/shares.go @@ -391,6 +391,7 @@ func newUpdate() *cobra.Command { cmd.Flags().StringVar(&updateReq.Comment, "comment", updateReq.Comment, `User-provided free-form text description.`) cmd.Flags().StringVar(&updateReq.NewName, "new-name", updateReq.NewName, `New name for the share.`) + cmd.Flags().StringVar(&updateReq.Owner, "owner", updateReq.Owner, `Username of current owner of share.`) cmd.Flags().StringVar(&updateReq.StorageRoot, "storage-root", updateReq.StorageRoot, `Storage root URL for the share.`) // TODO: array: updates diff --git a/go.mod b/go.mod index 86bc1c368..930963f89 100644 --- a/go.mod +++ b/go.mod @@ -5,13 +5,14 @@ go 1.23 toolchain go1.23.4 require ( + github.com/BurntSushi/toml v1.4.0 // MIT github.com/Masterminds/semver/v3 v3.3.1 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 - github.com/databricks/databricks-sdk-go v0.54.0 // Apache 2.0 + github.com/databricks/databricks-sdk-go v0.55.0 // Apache 2.0 github.com/fatih/color v1.18.0 // MIT github.com/google/uuid v1.6.0 // BSD-3-Clause github.com/hashicorp/go-version v1.7.0 // MPL 2.0 - github.com/hashicorp/hc-install v0.9.0 // MPL 2.0 + github.com/hashicorp/hc-install v0.9.1 // MPL 2.0 github.com/hashicorp/terraform-exec v0.21.0 // MPL 2.0 github.com/hashicorp/terraform-json v0.23.0 // MPL 2.0 github.com/hexops/gotextdiff v1.0.3 // BSD 3-Clause "New" or "Revised" License @@ -26,9 +27,9 @@ require ( github.com/wI2L/jsondiff v0.6.1 // MIT golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/mod v0.22.0 - golang.org/x/oauth2 v0.24.0 + golang.org/x/oauth2 v0.25.0 golang.org/x/sync v0.10.0 - golang.org/x/term v0.27.0 + golang.org/x/term v0.28.0 golang.org/x/text v0.21.0 gopkg.in/ini.v1 v1.67.0 // Apache 2.0 gopkg.in/yaml.v3 v3.0.1 @@ -38,7 +39,7 @@ require ( cloud.google.com/go/auth v0.4.2 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/compute/metadata v0.3.0 // indirect - github.com/ProtonMail/go-crypto v1.1.0-alpha.2 // indirect + github.com/ProtonMail/go-crypto v1.1.3 // indirect github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect github.com/cloudflare/circl v1.3.7 // indirect @@ -69,7 +70,7 @@ require ( go.opentelemetry.io/otel/trace v1.24.0 // indirect golang.org/x/crypto v0.31.0 // indirect golang.org/x/net v0.33.0 // indirect - golang.org/x/sys v0.28.0 // indirect + golang.org/x/sys v0.29.0 // indirect golang.org/x/time v0.5.0 // indirect google.golang.org/api v0.182.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240521202816-d264139d666e // indirect diff --git a/go.sum b/go.sum index f6cf79607..d025b3947 100644 --- a/go.sum +++ b/go.sum @@ -8,12 +8,14 @@ cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1h dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0= +github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/Masterminds/semver/v3 v3.3.1 h1:QtNSWtVZ3nBfk8mAOu/B6v7FMJ+NHTIgUPi7rj+4nv4= github.com/Masterminds/semver/v3 v3.3.1/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= -github.com/ProtonMail/go-crypto v1.1.0-alpha.2 h1:bkyFVUP+ROOARdgCiJzNQo2V2kiB97LyUpzH9P6Hrlg= -github.com/ProtonMail/go-crypto v1.1.0-alpha.2/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= +github.com/ProtonMail/go-crypto v1.1.3 h1:nRBOetoydLeUb4nHajyO2bKqMLfWQ/ZPwkXqXxPxCFk= +github.com/ProtonMail/go-crypto v1.1.3/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew1u1fNQOlOtuGxQY= github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4= github.com/briandowns/spinner v1.23.1 h1:t5fDPmScwUjozhDj4FA46p5acZWIPXYE30qW2Ptu650= @@ -30,10 +32,10 @@ github.com/cloudflare/circl v1.3.7 h1:qlCDlTPz2n9fu58M0Nh1J/JzcFpfgkFHHX3O35r5vc github.com/cloudflare/circl v1.3.7/go.mod h1:sRTcRWXGLrKw6yIGJ+l7amYJFfAXbZG0kBSc8r4zxgA= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg= -github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/databricks/databricks-sdk-go v0.54.0 h1:L8gsA3NXs+uYU3QtW/OUgjxMQxOH24k0MT9JhB3zLlM= -github.com/databricks/databricks-sdk-go v0.54.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= +github.com/cyphar/filepath-securejoin v0.2.5 h1:6iR5tXJ/e6tJZzzdMc1km3Sa7RRIVBKAK32O2s7AYfo= +github.com/cyphar/filepath-securejoin v0.2.5/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= +github.com/databricks/databricks-sdk-go v0.55.0 h1:ReziD6spzTDltM0ml80LggKo27F3oUjgTinCFDJDnak= +github.com/databricks/databricks-sdk-go v0.55.0/go.mod h1:JpLizplEs+up9/Z4Xf2x++o3sM9eTTWFGzIXAptKJzI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -50,10 +52,10 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= -github.com/go-git/go-billy/v5 v5.5.0 h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+mTU= -github.com/go-git/go-billy/v5 v5.5.0/go.mod h1:hmexnoNsr2SJU1Ju67OaNz5ASJY3+sHgFRpCtpDCKow= -github.com/go-git/go-git/v5 v5.12.0 h1:7Md+ndsjrzZxbddRDZjF14qK+NN56sy6wkqaVrjZtys= -github.com/go-git/go-git/v5 v5.12.0/go.mod h1:FTM9VKtnI2m65hNI/TenDDDnUf2Q9FHnXYjuz9i5OEY= +github.com/go-git/go-billy/v5 v5.6.0 h1:w2hPNtoehvJIxR00Vb4xX94qHQi/ApZfX+nBE2Cjio8= +github.com/go-git/go-billy/v5 v5.6.0/go.mod h1:sFDq7xD3fn3E0GOwUSZqHo9lrkmx8xJhA0ZrfvjBRGM= +github.com/go-git/go-git/v5 v5.13.0 h1:vLn5wlGIh/X78El6r3Jr+30W16Blk0CTcxTYcYPWi5E= +github.com/go-git/go-git/v5 v5.13.0/go.mod h1:Wjo7/JyVKtQgUNdXYXIepzWfJQkUEIGvkvVkiXRR/zw= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -103,8 +105,8 @@ github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISH github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= -github.com/hashicorp/hc-install v0.9.0 h1:2dIk8LcvANwtv3QZLckxcjyF5w8KVtiMxu6G6eLhghE= -github.com/hashicorp/hc-install v0.9.0/go.mod h1:+6vOP+mf3tuGgMApVYtmsnDoKWMDcFXeTxCACYZ8SFg= +github.com/hashicorp/hc-install v0.9.1 h1:gkqTfE3vVbafGQo6VZXcy2v5yoz2bE0+nhZXruCuODQ= +github.com/hashicorp/hc-install v0.9.1/go.mod h1:pWWvN/IrfeBK4XPeXXYkL6EjMufHkCK5DvwxeLKuBf0= github.com/hashicorp/terraform-exec v0.21.0 h1:uNkLAe95ey5Uux6KJdua6+cv8asgILFVWkd/RG0D2XQ= github.com/hashicorp/terraform-exec v0.21.0/go.mod h1:1PPeMYou+KDUSSeRE9szMZ/oHf4fYUmB923Wzbq1ICg= github.com/hashicorp/terraform-json v0.23.0 h1:sniCkExU4iKtTADReHzACkk8fnpQXrdD2xoR+lppBkI= @@ -141,8 +143,8 @@ github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDj github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= -github.com/skeema/knownhosts v1.2.2 h1:Iug2P4fLmDw9f41PB6thxUkNUkJzB5i+1/exaj40L3A= -github.com/skeema/knownhosts v1.2.2/go.mod h1:xYbVRSPxqBZFrdmDyMmsOs+uX1UZC3nTN3ThzgDxUwo= +github.com/skeema/knownhosts v1.3.0 h1:AM+y0rI04VksttfwjkSTNQorvGqmwATnvnAHpSgc0LY= +github.com/skeema/knownhosts v1.3.0/go.mod h1:sPINvnADmT/qYH1kfv+ePMmOBTH6Tbl7b5LvTDjFK7M= github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= @@ -207,8 +209,8 @@ golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= -golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70= +golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -224,10 +226,10 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= -golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= +golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= +golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= diff --git a/integration/bundle/apps_test.go b/integration/bundle/apps_test.go new file mode 100644 index 000000000..23cd784be --- /dev/null +++ b/integration/bundle/apps_test.go @@ -0,0 +1,139 @@ +package bundle_test + +import ( + "fmt" + "io" + "testing" + + "github.com/databricks/cli/integration/internal/acc" + "github.com/databricks/cli/internal/testcli" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/testdiff" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/google/uuid" + "github.com/stretchr/testify/require" +) + +func TestDeployBundleWithApp(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + // TODO: should only skip app run when app can be created with no_compute option. + if testing.Short() { + t.Log("Skip the app creation and run in short mode") + return + } + + if testutil.GetCloud(t) == testutil.GCP { + t.Skip("Skipping test for GCP cloud because /api/2.0/apps is temporarily unavailable there.") + } + + uniqueId := uuid.New().String() + appId := "app-" + uuid.New().String()[0:8] + nodeTypeId := testutil.GetCloud(t).NodeTypeID() + instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID") + + root := initTestTemplate(t, ctx, "apps", map[string]any{ + "unique_id": uniqueId, + "app_id": appId, + "node_type_id": nodeTypeId, + "spark_version": defaultSparkVersion, + "instance_pool_id": instancePoolId, + }) + + t.Cleanup(func() { + destroyBundle(t, ctx, root) + app, err := wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: "test-app"}) + if err != nil { + require.ErrorContains(t, err, "does not exist") + } else { + require.Contains(t, []apps.ApplicationState{apps.ApplicationStateUnavailable}, app.AppStatus.State) + } + }) + + ctx, replacements := testdiff.WithReplacementsMap(ctx) + replacements.Set(uniqueId, "$UNIQUE_PRJ") + + user, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + require.NotNil(t, user) + testdiff.PrepareReplacementsUser(t, replacements, *user) + testdiff.PrepareReplacementsWorkspaceClient(t, replacements, wt.W) + testdiff.PrepareReplacementsUUID(t, replacements) + testdiff.PrepareReplacementsNumber(t, replacements) + testdiff.PrepareReplacementsTemporaryDirectory(t, replacements) + + testutil.Chdir(t, root) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "validate"}, + testutil.TestData("testdata/apps/bundle_validate.txt"), + ) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "deploy", "--force-lock", "--auto-approve"}, + testutil.TestData("testdata/apps/bundle_deploy.txt"), + ) + + // App should exists after bundle deployment + app, err := wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: appId}) + require.NoError(t, err) + require.NotNil(t, app) + + // Check app config + currentUser, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + + pathToAppYml := fmt.Sprintf("/Workspace/Users/%s/.bundle/%s/files/app/app.yml", currentUser.UserName, uniqueId) + reader, err := wt.W.Workspace.Download(ctx, pathToAppYml) + require.NoError(t, err) + + data, err := io.ReadAll(reader) + require.NoError(t, err) + + job, err := wt.W.Jobs.GetBySettingsName(ctx, "test-job-with-cluster-"+uniqueId) + require.NoError(t, err) + + content := string(data) + require.Contains(t, content, fmt.Sprintf(`command: + - flask + - --app + - app + - run +env: + - name: JOB_ID + value: "%d"`, job.JobId)) + + // Try to run the app + _, out := runResourceWithStderr(t, ctx, root, "test_app") + require.Contains(t, out, app.Url) + + // App should be in the running state + app, err = wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: appId}) + require.NoError(t, err) + require.NotNil(t, app) + require.Equal(t, apps.ApplicationStateRunning, app.AppStatus.State) + + // Stop the app + wait, err := wt.W.Apps.Stop(ctx, apps.StopAppRequest{Name: appId}) + require.NoError(t, err) + app, err = wait.Get() + require.NoError(t, err) + require.NotNil(t, app) + require.Equal(t, apps.ApplicationStateUnavailable, app.AppStatus.State) + + // Try to run the app again + _, out = runResourceWithStderr(t, ctx, root, "test_app") + require.Contains(t, out, app.Url) + + // App should be in the running state + app, err = wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: appId}) + require.NoError(t, err) + require.NotNil(t, app) + require.Equal(t, apps.ApplicationStateRunning, app.AppStatus.State) + + // Redeploy it again just to check that it can be redeployed + deployBundle(t, ctx, root) +} diff --git a/integration/bundle/bundles/apps/databricks_template_schema.json b/integration/bundle/bundles/apps/databricks_template_schema.json new file mode 100644 index 000000000..c9faeabf3 --- /dev/null +++ b/integration/bundle/bundles/apps/databricks_template_schema.json @@ -0,0 +1,24 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for job name" + }, + "app_id": { + "type": "string", + "description": "Unique ID for app name" + }, + "spark_version": { + "type": "string", + "description": "Spark version used for job cluster" + }, + "node_type_id": { + "type": "string", + "description": "Node type id for job cluster" + }, + "instance_pool_id": { + "type": "string", + "description": "Instance pool id for job cluster" + } + } +} diff --git a/integration/bundle/bundles/apps/template/app/app.py b/integration/bundle/bundles/apps/template/app/app.py new file mode 100644 index 000000000..a60c786fe --- /dev/null +++ b/integration/bundle/bundles/apps/template/app/app.py @@ -0,0 +1,15 @@ +import os + +from databricks.sdk import WorkspaceClient +from flask import Flask + +app = Flask(__name__) + + +@app.route("/") +def home(): + job_id = os.getenv("JOB_ID") + + w = WorkspaceClient() + job = w.jobs.get(job_id) + return job.settings.name diff --git a/integration/bundle/bundles/apps/template/databricks.yml.tmpl b/integration/bundle/bundles/apps/template/databricks.yml.tmpl new file mode 100644 index 000000000..4d862a06f --- /dev/null +++ b/integration/bundle/bundles/apps/template/databricks.yml.tmpl @@ -0,0 +1,42 @@ +bundle: + name: basic + +workspace: + root_path: "~/.bundle/{{.unique_id}}" + +resources: + apps: + test_app: + name: "{{.app_id}}" + description: "App which manages job created by this bundle" + source_code_path: ./app + config: + command: + - flask + - --app + - app + - run + env: + - name: JOB_ID + value: ${resources.jobs.foo.id} + + resources: + - name: "app-job" + description: "A job for app to be able to work with" + job: + id: ${resources.jobs.foo.id} + permission: "CAN_MANAGE_RUN" + + jobs: + foo: + name: test-job-with-cluster-{{.unique_id}} + tasks: + - task_key: my_notebook_task + new_cluster: + num_workers: 1 + spark_version: "{{.spark_version}}" + node_type_id: "{{.node_type_id}}" + data_security_mode: USER_ISOLATION + instance_pool_id: "{{.instance_pool_id}}" + spark_python_task: + python_file: ./hello_world.py diff --git a/integration/bundle/bundles/apps/template/hello_world.py b/integration/bundle/bundles/apps/template/hello_world.py new file mode 100644 index 000000000..f301245e2 --- /dev/null +++ b/integration/bundle/bundles/apps/template/hello_world.py @@ -0,0 +1 @@ +print("Hello World!") diff --git a/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl b/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl index 4ebeb2655..4ea687cf1 100644 --- a/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl +++ b/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl @@ -9,7 +9,6 @@ variables: description: The catalog the DLT pipeline should use. default: main - resources: pipelines: foo: @@ -19,6 +18,13 @@ resources: path: ./nb.sql development: true catalog: ${var.catalog} + target: ${resources.schemas.bar.id} + + schemas: + bar: + name: test-schema-{{.unique_id}} + catalog_name: ${var.catalog} + comment: This schema was created from DABs include: - "*.yml" diff --git a/integration/bundle/helpers_test.go b/integration/bundle/helpers_test.go index e884cd8c6..b4f9c9086 100644 --- a/integration/bundle/helpers_test.go +++ b/integration/bundle/helpers_test.go @@ -16,7 +16,6 @@ import ( "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/env" - "github.com/databricks/cli/libs/filer" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/folders" "github.com/databricks/cli/libs/template" @@ -40,10 +39,19 @@ func initTestTemplateWithBundleRoot(t testutil.TestingT, ctx context.Context, te cmd := cmdio.NewIO(ctx, flags.OutputJSON, strings.NewReader(""), os.Stdout, os.Stderr, "", "bundles") ctx = cmdio.InContext(ctx, cmd) - out, err := filer.NewLocalClient(bundleRoot) + r := template.Resolver{ + TemplatePathOrUrl: templateRoot, + ConfigFile: configFilePath, + OutputDir: bundleRoot, + } + + tmpl, err := r.Resolve(ctx) require.NoError(t, err) - err = template.Materialize(ctx, configFilePath, os.DirFS(templateRoot), out) + defer tmpl.Reader.Cleanup(ctx) + + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) require.NoError(t, err) + return bundleRoot } @@ -119,6 +127,17 @@ func runResource(t testutil.TestingT, ctx context.Context, path, key string) (st return stdout.String(), err } +func runResourceWithStderr(t testutil.TestingT, ctx context.Context, path, key string) (string, string) { + ctx = env.Set(ctx, "BUNDLE_ROOT", path) + ctx = cmdio.NewContext(ctx, cmdio.Default()) + + c := testcli.NewRunner(t, ctx, "bundle", "run", key) + stdout, stderr, err := c.Run() + require.NoError(t, err) + + return stdout.String(), stderr.String() +} + func runResourceWithParams(t testutil.TestingT, ctx context.Context, path, key string, params ...string) (string, error) { ctx = env.Set(ctx, "BUNDLE_ROOT", path) ctx = cmdio.NewContext(ctx, cmdio.Default()) diff --git a/integration/bundle/init_default_python_test.go b/integration/bundle/init_default_python_test.go index c93e6b50b..931660032 100644 --- a/integration/bundle/init_default_python_test.go +++ b/integration/bundle/init_default_python_test.go @@ -58,7 +58,10 @@ func testDefaultPython(t *testing.T, pythonVersion string) { require.NoError(t, err) require.NotNil(t, user) testdiff.PrepareReplacementsUser(t, replacements, *user) - testdiff.PrepareReplacements(t, replacements, wt.W) + testdiff.PrepareReplacementsWorkspaceClient(t, replacements, wt.W) + testdiff.PrepareReplacementsUUID(t, replacements) + testdiff.PrepareReplacementsNumber(t, replacements) + testdiff.PrepareReplacementsTemporaryDirectory(t, replacements) tmpDir := t.TempDir() testutil.Chdir(t, tmpDir) diff --git a/integration/bundle/testdata/apps/bundle_deploy.txt b/integration/bundle/testdata/apps/bundle_deploy.txt new file mode 100644 index 000000000..b077f327d --- /dev/null +++ b/integration/bundle/testdata/apps/bundle_deploy.txt @@ -0,0 +1,5 @@ +Uploading bundle files to /Workspace/Users/$USERNAME/.bundle/$UNIQUE_PRJ/files... +Note: Databricks apps included in this bundle may increase initial deployment time due to compute provisioning. +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/integration/bundle/testdata/apps/bundle_validate.txt b/integration/bundle/testdata/apps/bundle_validate.txt new file mode 100644 index 000000000..dc9016a0f --- /dev/null +++ b/integration/bundle/testdata/apps/bundle_validate.txt @@ -0,0 +1,7 @@ +Name: basic +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/$UNIQUE_PRJ + +Validation OK! diff --git a/integration/bundle/testdata/default_python/bundle_init.txt b/integration/bundle/testdata/default_python/bundle_init.txt index 6cfc32f98..c2917ea4e 100644 --- a/integration/bundle/testdata/default_python/bundle_init.txt +++ b/integration/bundle/testdata/default_python/bundle_init.txt @@ -1,6 +1,6 @@ Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'project_name_$UNIQUE_PRJ/databricks.yml'): https://$DATABRICKS_HOST +Workspace to use (auto-detected, edit in 'project_name_$UNIQUE_PRJ/databricks.yml'): $DATABRICKS_URL ✨ Your new project has been created in the 'project_name_$UNIQUE_PRJ' directory! diff --git a/integration/bundle/testdata/default_python/bundle_summary.txt b/integration/bundle/testdata/default_python/bundle_summary.txt index a0bcfdbc8..318cd2543 100644 --- a/integration/bundle/testdata/default_python/bundle_summary.txt +++ b/integration/bundle/testdata/default_python/bundle_summary.txt @@ -23,7 +23,7 @@ "resources/project_name_$UNIQUE_PRJ.pipeline.yml" ], "workspace": { - "host": "https://$DATABRICKS_HOST", + "host": "$DATABRICKS_URL", "current_user": { "active": true, "displayName": "$USERNAME", @@ -141,7 +141,7 @@ "unit": "DAYS" } }, - "url": "https://$DATABRICKS_HOST/jobs/?o=" + "url": "$DATABRICKS_URL/jobs/?o=" } }, "pipelines": { @@ -165,7 +165,7 @@ ], "name": "[dev $USERNAME] project_name_$UNIQUE_PRJ_pipeline", "target": "project_name_$UNIQUE_PRJ_dev", - "url": "https://$DATABRICKS_HOST/pipelines/?o=" + "url": "$DATABRICKS_URL/pipelines/?o=" } } }, diff --git a/integration/bundle/testdata/default_python/bundle_validate.txt b/integration/bundle/testdata/default_python/bundle_validate.txt index 88a5fdd18..578fd6494 100644 --- a/integration/bundle/testdata/default_python/bundle_validate.txt +++ b/integration/bundle/testdata/default_python/bundle_validate.txt @@ -1,7 +1,7 @@ Name: project_name_$UNIQUE_PRJ Target: dev Workspace: - Host: https://$DATABRICKS_HOST + Host: $DATABRICKS_URL User: $USERNAME Path: /Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev diff --git a/integration/cmd/fs/cat_test.go b/integration/cmd/fs/cat_test.go index 3e964fe6e..14ec8140e 100644 --- a/integration/cmd/fs/cat_test.go +++ b/integration/cmd/fs/cat_test.go @@ -18,13 +18,11 @@ func TestFsCat(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Write(context.Background(), "hello.txt", strings.NewReader("abcd"), filer.CreateParentDirectories) require.NoError(t, err) @@ -40,13 +38,11 @@ func TestFsCatOnADir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "dir1") require.NoError(t, err) @@ -61,13 +57,11 @@ func TestFsCatOnNonExistentFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "cat", path.Join(tmpDir, "non-existent-file")) assert.ErrorIs(t, err, fs.ErrNotExist) diff --git a/integration/cmd/fs/cp_test.go b/integration/cmd/fs/cp_test.go index 76aef7acf..6d0266555 100644 --- a/integration/cmd/fs/cp_test.go +++ b/integration/cmd/fs/cp_test.go @@ -126,14 +126,12 @@ func TestFsCpDir(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", sourceDir, targetDir, "--recursive") @@ -147,14 +145,12 @@ func TestFsCpFileToFile(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceFile(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", path.Join(sourceDir, "foo.txt"), path.Join(targetDir, "bar.txt")) @@ -168,14 +164,12 @@ func TestFsCpFileToDir(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceFile(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", path.Join(sourceDir, "foo.txt"), targetDir) @@ -205,14 +199,12 @@ func TestFsCpDirToDirFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -231,14 +223,12 @@ func TestFsCpFileToDirFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -255,14 +245,12 @@ func TestFsCpFileToFileFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -279,14 +267,12 @@ func TestFsCpDirToDirWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -303,14 +289,12 @@ func TestFsCpFileToFileWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -327,14 +311,12 @@ func TestFsCpFileToDirWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -351,13 +333,11 @@ func TestFsCpErrorsWhenSourceIsDirWithoutRecursiveFlag(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "cp", path.Join(tmpDir), path.Join(tmpDir, "foobar")) r := regexp.MustCompile("source path .* is a directory. Please specify the --recursive flag") @@ -376,14 +356,12 @@ func TestFsCpSourceIsDirectoryButTargetIsFile(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target diff --git a/integration/cmd/fs/ls_test.go b/integration/cmd/fs/ls_test.go index 25929fdf3..0f53193bf 100644 --- a/integration/cmd/fs/ls_test.go +++ b/integration/cmd/fs/ls_test.go @@ -43,13 +43,11 @@ func TestFsLs(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json") @@ -77,13 +75,11 @@ func TestFsLsWithAbsolutePaths(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json", "--absolute") @@ -111,13 +107,11 @@ func TestFsLsOnFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "ls", path.Join(tmpDir, "a", "hello.txt"), "--output=json") @@ -131,13 +125,11 @@ func TestFsLsOnEmptyDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json") assert.Equal(t, "", stderr.String()) @@ -155,13 +147,11 @@ func TestFsLsForNonexistingDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "ls", path.Join(tmpDir, "nonexistent"), "--output=json") assert.ErrorIs(t, err, fs.ErrNotExist) diff --git a/integration/cmd/fs/mkdir_test.go b/integration/cmd/fs/mkdir_test.go index eff0599a7..5cea0599c 100644 --- a/integration/cmd/fs/mkdir_test.go +++ b/integration/cmd/fs/mkdir_test.go @@ -17,13 +17,11 @@ func TestFsMkdir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a" stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "mkdir", path.Join(tmpDir, "a")) @@ -43,13 +41,11 @@ func TestFsMkdirCreatesIntermediateDirectories(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a/b/c" stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "mkdir", path.Join(tmpDir, "a", "b", "c")) @@ -81,13 +77,11 @@ func TestFsMkdirWhenDirectoryAlreadyExists(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a" err := f.Mkdir(context.Background(), "a") diff --git a/integration/cmd/fs/rm_test.go b/integration/cmd/fs/rm_test.go index 018c7920e..fc19bb5b5 100644 --- a/integration/cmd/fs/rm_test.go +++ b/integration/cmd/fs/rm_test.go @@ -17,14 +17,12 @@ func TestFsRmFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a file ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Write(context.Background(), "hello.txt", strings.NewReader("abcd"), filer.CreateParentDirectories) require.NoError(t, err) @@ -48,14 +46,12 @@ func TestFsRmEmptyDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a directory ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "a") require.NoError(t, err) @@ -79,14 +75,12 @@ func TestFsRmNonEmptyDirectory(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a directory ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "a") require.NoError(t, err) @@ -110,13 +104,11 @@ func TestFsRmForNonExistentFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) // Expect error if file does not exist _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "rm", path.Join(tmpDir, "does-not-exist")) @@ -129,13 +121,11 @@ func TestFsRmDirRecursively(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // Create a directory err := f.Mkdir(context.Background(), "a") diff --git a/integration/libs/filer/filer_test.go b/integration/libs/filer/filer_test.go index 21c839e1b..bc1713b30 100644 --- a/integration/libs/filer/filer_test.go +++ b/integration/libs/filer/filer_test.go @@ -128,11 +128,9 @@ func TestFilerRecursiveDelete(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() // Common tests we run across all filers to ensure consistent behavior. @@ -239,11 +237,9 @@ func TestFilerReadWrite(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() // Common tests we run across all filers to ensure consistent behavior. @@ -348,11 +344,9 @@ func TestFilerReadDir(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() commonFilerReadDirTest(t, ctx, f) diff --git a/integration/libs/locker/locker_test.go b/integration/libs/locker/locker_test.go index 524996465..93cb1ffce 100644 --- a/integration/libs/locker/locker_test.go +++ b/integration/libs/locker/locker_test.go @@ -66,9 +66,8 @@ func TestLock(t *testing.T) { } var wg sync.WaitGroup - for i := range numConcurrentLocks { + for currentIndex := range numConcurrentLocks { wg.Add(1) - currentIndex := i go func() { defer wg.Done() time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond) diff --git a/internal/testcli/runner.go b/internal/testcli/runner.go index d32fa3947..f462f44fc 100644 --- a/internal/testcli/runner.go +++ b/internal/testcli/runner.go @@ -39,6 +39,8 @@ type Runner struct { StderrLines <-chan string errch <-chan error + + Verbose bool } func consumeLines(ctx context.Context, wg *sync.WaitGroup, r io.Reader) <-chan string { @@ -139,7 +141,9 @@ func (r *Runner) RunBackground() { go func() { err := root.Execute(ctx, cli) if err != nil { - r.Logf("Error running command: %s", err) + if r.Verbose { + r.Logf("Error running command: %s", err) + } } // Close pipes to signal EOF. @@ -154,7 +158,9 @@ func (r *Runner) RunBackground() { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(r.stdout.Bytes())) for scanner.Scan() { - r.Logf("[databricks stdout]: %s", scanner.Text()) + if r.Verbose { + r.Logf("[databricks stdout]: %s", scanner.Text()) + } } } @@ -162,7 +168,9 @@ func (r *Runner) RunBackground() { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(r.stderr.Bytes())) for scanner.Scan() { - r.Logf("[databricks stderr]: %s", scanner.Text()) + if r.Verbose { + r.Logf("[databricks stderr]: %s", scanner.Text()) + } } } @@ -196,18 +204,24 @@ func (r *Runner) Run() (bytes.Buffer, bytes.Buffer, error) { cli.SetErr(&stderr) cli.SetArgs(r.args) - r.Logf(" args: %s", strings.Join(r.args, ", ")) + if r.Verbose { + r.Logf(" args: %s", strings.Join(r.args, ", ")) + } err := root.Execute(ctx, cli) if err != nil { - r.Logf(" error: %s", err) + if r.Verbose { + r.Logf(" error: %s", err) + } } if stdout.Len() > 0 { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(stdout.Bytes())) for scanner.Scan() { - r.Logf("stdout: %s", scanner.Text()) + if r.Verbose { + r.Logf("stdout: %s", scanner.Text()) + } } } @@ -215,7 +229,9 @@ func (r *Runner) Run() (bytes.Buffer, bytes.Buffer, error) { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(stderr.Bytes())) for scanner.Scan() { - r.Logf("stderr: %s", scanner.Text()) + if r.Verbose { + r.Logf("stderr: %s", scanner.Text()) + } } } @@ -275,8 +291,9 @@ func NewRunner(t testutil.TestingT, ctx context.Context, args ...string) *Runner return &Runner{ TestingT: t, - ctx: ctx, - args: args, + ctx: ctx, + args: args, + Verbose: true, } } diff --git a/libs/auth/env.go b/libs/auth/env.go new file mode 100644 index 000000000..c58cc53e3 --- /dev/null +++ b/libs/auth/env.go @@ -0,0 +1,26 @@ +package auth + +import "github.com/databricks/databricks-sdk-go/config" + +// Env generates the authentication environment variables we need to set for +// downstream applications from the CLI to work correctly. +func Env(cfg *config.Config) map[string]string { + out := make(map[string]string) + for _, attr := range config.ConfigAttributes { + // Ignore profile so that downstream tools don't try and reload + // the profile. We know the current configuration is already valid since + // otherwise the CLI would have thrown an error when loading it. + if attr.Name == "profile" { + continue + } + if len(attr.EnvVars) == 0 { + continue + } + if attr.IsZero(cfg) { + continue + } + out[attr.EnvVars[0]] = attr.GetString(cfg) + } + + return out +} diff --git a/libs/auth/env_test.go b/libs/auth/env_test.go new file mode 100644 index 000000000..be1cfc7ac --- /dev/null +++ b/libs/auth/env_test.go @@ -0,0 +1,42 @@ +package auth + +import ( + "testing" + + "github.com/databricks/databricks-sdk-go/config" + "github.com/stretchr/testify/assert" +) + +func TestAuthEnv(t *testing.T) { + in := &config.Config{ + Profile: "thisshouldbeignored", + Host: "https://test.com", + Token: "test-token", + Password: "test-password", + MetadataServiceURL: "http://somurl.com", + + AzureUseMSI: true, + AzureTenantID: "test-tenant-id", + AzureClientID: "test-client-id", + AzureClientSecret: "test-client-secret", + + ActionsIDTokenRequestToken: "test-actions-id-token-request-token", + } + + expected := map[string]string{ + "DATABRICKS_HOST": "https://test.com", + "DATABRICKS_TOKEN": "test-token", + "DATABRICKS_PASSWORD": "test-password", + "DATABRICKS_METADATA_SERVICE_URL": "http://somurl.com", + + "ARM_USE_MSI": "true", + "ARM_TENANT_ID": "test-tenant-id", + "ARM_CLIENT_ID": "test-client-id", + "ARM_CLIENT_SECRET": "test-client-secret", + + "ACTIONS_ID_TOKEN_REQUEST_TOKEN": "test-actions-id-token-request-token", + } + + out := Env(in) + assert.Equal(t, expected, out) +} diff --git a/libs/cmdio/io.go b/libs/cmdio/io.go index c0e9e868a..11b75157d 100644 --- a/libs/cmdio/io.go +++ b/libs/cmdio/io.go @@ -285,3 +285,14 @@ func fromContext(ctx context.Context) *cmdIO { } return io } + +// Mocks the context with a cmdio object that discards all output. +func MockDiscard(ctx context.Context) context.Context { + return InContext(ctx, &cmdIO{ + interactive: false, + outputFormat: flags.OutputText, + in: io.NopCloser(strings.NewReader("")), + out: io.Discard, + err: io.Discard, + }) +} diff --git a/libs/cmdio/logger.go b/libs/cmdio/logger.go index 7edad5bf0..48b76ce42 100644 --- a/libs/cmdio/logger.go +++ b/libs/cmdio/logger.go @@ -189,7 +189,7 @@ func (l *Logger) writeJson(event Event) { // we panic because there we cannot catch this in jobs.RunNowAndWait panic(err) } - _, _ = l.Writer.Write([]byte(b)) + _, _ = l.Writer.Write(b) _, _ = l.Writer.Write([]byte("\n")) } diff --git a/libs/databrickscfg/cfgpickers/clusters.go b/libs/databrickscfg/cfgpickers/clusters.go index e27d13690..ba920b59b 100644 --- a/libs/databrickscfg/cfgpickers/clusters.go +++ b/libs/databrickscfg/cfgpickers/clusters.go @@ -136,7 +136,18 @@ func loadInteractiveClusters(ctx context.Context, w *databricks.WorkspaceClient, promptSpinner := cmdio.Spinner(ctx) promptSpinner <- "Loading list of clusters to select from" defer close(promptSpinner) - all, err := w.Clusters.ListAll(ctx, compute.ListClustersRequest{}) + all, err := w.Clusters.ListAll(ctx, compute.ListClustersRequest{ + // Maximum page size to optimize for load time. + PageSize: 100, + + // Filter out system clusters. + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{ + compute.ClusterSourceApi, + compute.ClusterSourceUi, + }, + }, + }) if err != nil { return nil, fmt.Errorf("list clusters: %w", err) } diff --git a/libs/databrickscfg/cfgpickers/clusters_test.go b/libs/databrickscfg/cfgpickers/clusters_test.go index cde09aa44..840916e91 100644 --- a/libs/databrickscfg/cfgpickers/clusters_test.go +++ b/libs/databrickscfg/cfgpickers/clusters_test.go @@ -1,12 +1,10 @@ package cfgpickers import ( - "bytes" "context" "testing" "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/flags" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/qa" "github.com/databricks/databricks-sdk-go/service/compute" @@ -70,7 +68,7 @@ func TestFirstCompatibleCluster(t *testing.T) { cfg, server := qa.HTTPFixtures{ { Method: "GET", - Resource: "/api/2.1/clusters/list?", + Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=API&filter_by.cluster_sources=UI&page_size=100", Response: compute.ListClustersResponse{ Clusters: []compute.ClusterDetails{ { @@ -114,8 +112,8 @@ func TestFirstCompatibleCluster(t *testing.T) { defer server.Close() w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg))) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx := cmdio.MockDiscard(context.Background()) + clusterID, err := AskForCluster(ctx, w, WithDatabricksConnect("13.1")) require.NoError(t, err) require.Equal(t, "bcd-id", clusterID) @@ -125,7 +123,7 @@ func TestNoCompatibleClusters(t *testing.T) { cfg, server := qa.HTTPFixtures{ { Method: "GET", - Resource: "/api/2.1/clusters/list?", + Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=API&filter_by.cluster_sources=UI&page_size=100", Response: compute.ListClustersResponse{ Clusters: []compute.ClusterDetails{ { @@ -161,8 +159,7 @@ func TestNoCompatibleClusters(t *testing.T) { defer server.Close() w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg))) - ctx := context.Background() - ctx = cmdio.InContext(ctx, cmdio.NewIO(ctx, flags.OutputText, &bytes.Buffer{}, &bytes.Buffer{}, &bytes.Buffer{}, "", "...")) + ctx := cmdio.MockDiscard(context.Background()) _, err := AskForCluster(ctx, w, WithDatabricksConnect("13.1")) require.Equal(t, ErrNoCompatibleClusters, err) } diff --git a/libs/diag/diagnostic.go b/libs/diag/diagnostic.go index a4f8c7b6b..0c7699b4e 100644 --- a/libs/diag/diagnostic.go +++ b/libs/diag/diagnostic.go @@ -86,6 +86,16 @@ func Infof(format string, args ...any) Diagnostics { } } +// Recommendationf creates a new recommendation diagnostic. +func Recommendationf(format string, args ...any) Diagnostics { + return []Diagnostic{ + { + Severity: Recommendation, + Summary: fmt.Sprintf(format, args...), + }, + } +} + // Diagnostics holds zero or more instances of [Diagnostic]. type Diagnostics []Diagnostic diff --git a/libs/dyn/merge/elements_by_key.go b/libs/dyn/merge/elements_by_key.go index e6e640d14..df393003a 100644 --- a/libs/dyn/merge/elements_by_key.go +++ b/libs/dyn/merge/elements_by_key.go @@ -7,7 +7,7 @@ type elementsByKey struct { keyFunc func(dyn.Value) string } -func (e elementsByKey) Map(_ dyn.Path, v dyn.Value) (dyn.Value, error) { +func (e elementsByKey) doMap(_ dyn.Path, v dyn.Value, mergeFunc func(a, b dyn.Value) (dyn.Value, error)) (dyn.Value, error) { // We know the type of this value is a sequence. // For additional defence, return self if it is not. elements, ok := v.AsSequence() @@ -33,7 +33,7 @@ func (e elementsByKey) Map(_ dyn.Path, v dyn.Value) (dyn.Value, error) { } // Merge this instance into the reference. - nv, err := Merge(ref, elements[i]) + nv, err := mergeFunc(ref, elements[i]) if err != nil { return v, err } @@ -55,6 +55,26 @@ func (e elementsByKey) Map(_ dyn.Path, v dyn.Value) (dyn.Value, error) { return dyn.NewValue(out, v.Locations()), nil } +func (e elementsByKey) Map(_ dyn.Path, v dyn.Value) (dyn.Value, error) { + return e.doMap(nil, v, Merge) +} + +func (e elementsByKey) MapWithOverride(p dyn.Path, v dyn.Value) (dyn.Value, error) { + return e.doMap(nil, v, func(a, b dyn.Value) (dyn.Value, error) { + return Override(a, b, OverrideVisitor{ + VisitInsert: func(_ dyn.Path, v dyn.Value) (dyn.Value, error) { + return v, nil + }, + VisitDelete: func(valuePath dyn.Path, left dyn.Value) error { + return nil + }, + VisitUpdate: func(_ dyn.Path, a, b dyn.Value) (dyn.Value, error) { + return b, nil + }, + }) + }) +} + // ElementsByKey returns a [dyn.MapFunc] that operates on a sequence // where each element is a map. It groups elements by a key and merges // elements with the same key. @@ -65,3 +85,7 @@ func (e elementsByKey) Map(_ dyn.Path, v dyn.Value) (dyn.Value, error) { func ElementsByKey(key string, keyFunc func(dyn.Value) string) dyn.MapFunc { return elementsByKey{key, keyFunc}.Map } + +func ElementsByKeyWithOverride(key string, keyFunc func(dyn.Value) string) dyn.MapFunc { + return elementsByKey{key, keyFunc}.MapWithOverride +} diff --git a/libs/dyn/merge/elements_by_key_test.go b/libs/dyn/merge/elements_by_key_test.go index ef316cc66..09efece07 100644 --- a/libs/dyn/merge/elements_by_key_test.go +++ b/libs/dyn/merge/elements_by_key_test.go @@ -50,3 +50,42 @@ func TestElementByKey(t *testing.T) { }, ) } + +func TestElementByKeyWithOverride(t *testing.T) { + vin := dyn.V([]dyn.Value{ + dyn.V(map[string]dyn.Value{ + "key": dyn.V("foo"), + "value": dyn.V(42), + }), + dyn.V(map[string]dyn.Value{ + "key": dyn.V("bar"), + "value": dyn.V(43), + }), + dyn.V(map[string]dyn.Value{ + "key": dyn.V("foo"), + "othervalue": dyn.V(44), + }), + }) + + keyFunc := func(v dyn.Value) string { + return strings.ToLower(v.MustString()) + } + + vout, err := dyn.MapByPath(vin, dyn.EmptyPath, ElementsByKeyWithOverride("key", keyFunc)) + require.NoError(t, err) + assert.Len(t, vout.MustSequence(), 2) + assert.Equal(t, + vout.Index(0).AsAny(), + map[string]any{ + "key": "foo", + "othervalue": 44, + }, + ) + assert.Equal(t, + vout.Index(1).AsAny(), + map[string]any{ + "key": "bar", + "value": 43, + }, + ) +} diff --git a/libs/dyn/value_underlying.go b/libs/dyn/value_underlying.go index 0a867375d..a33ecd38e 100644 --- a/libs/dyn/value_underlying.go +++ b/libs/dyn/value_underlying.go @@ -81,7 +81,7 @@ func (v Value) AsInt() (int64, bool) { case int32: return int64(vv), true case int64: - return int64(vv), true + return vv, true default: return 0, false } diff --git a/libs/exec/exec_test.go b/libs/exec/exec_test.go index c363c1f7c..f245f9dd1 100644 --- a/libs/exec/exec_test.go +++ b/libs/exec/exec_test.go @@ -85,7 +85,7 @@ func testExecutorWithShell(t *testing.T, shell string) { // Create temporary directory with only the shell executable in the PATH. tmpDir := t.TempDir() - t.Setenv("PATH", tmpDir) + t.Setenv("PATH", fmt.Sprintf("%s%c%s", tmpDir, os.PathListSeparator, os.Getenv("PATH"))) if runtime.GOOS == "windows" { err = os.Symlink(p, fmt.Sprintf("%s/%s.exe", tmpDir, shell)) require.NoError(t, err) diff --git a/libs/filer/files_client.go b/libs/filer/files_client.go index 98a534684..88bbadd32 100644 --- a/libs/filer/files_client.go +++ b/libs/filer/files_client.go @@ -303,8 +303,6 @@ func (w *FilesClient) recursiveDelete(ctx context.Context, name string) error { group.SetLimit(maxFilesRequestsInFlight) for _, file := range filesToDelete { - file := file - // Skip the file if the context has already been cancelled. select { case <-groupCtx.Done(): diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 9ee2722e1..0127d180c 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -16,7 +16,7 @@ import ( "github.com/databricks/databricks-sdk-go/service/workspace" ) -type workspaceFilesExtensionsClient struct { +type WorkspaceFilesExtensionsClient struct { workspaceClient *databricks.WorkspaceClient wsfs Filer @@ -32,7 +32,7 @@ type workspaceFileStatus struct { nameForWorkspaceAPI string } -func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) (wsfsFileInfo, error) { +func (w *WorkspaceFilesExtensionsClient) stat(ctx context.Context, name string) (wsfsFileInfo, error) { info, err := w.wsfs.Stat(ctx, name) if err != nil { return wsfsFileInfo{}, err @@ -42,7 +42,7 @@ func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) // This function returns the stat for the provided notebook. The stat object itself contains the path // with the extension since it is meant to be used in the context of a fs.FileInfo. -func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { +func (w *WorkspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { ext := path.Ext(name) nameWithoutExt := strings.TrimSuffix(name, ext) @@ -104,7 +104,7 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex }, nil } -func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx context.Context, name string) (*workspaceFileStatus, error) { +func (w *WorkspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx context.Context, name string) (*workspaceFileStatus, error) { stat, err := w.stat(ctx, name) if err != nil { return nil, err @@ -184,7 +184,7 @@ func newWorkspaceFilesExtensionsClient(w *databricks.WorkspaceClient, root strin filer = newWorkspaceFilesReadaheadCache(filer) } - return &workspaceFilesExtensionsClient{ + return &WorkspaceFilesExtensionsClient{ workspaceClient: w, wsfs: filer, @@ -193,7 +193,7 @@ func newWorkspaceFilesExtensionsClient(w *databricks.WorkspaceClient, root strin }, nil } -func (w *workspaceFilesExtensionsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { +func (w *WorkspaceFilesExtensionsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { entries, err := w.wsfs.ReadDir(ctx, name) if err != nil { return nil, err @@ -235,7 +235,7 @@ func (w *workspaceFilesExtensionsClient) ReadDir(ctx context.Context, name strin // Note: The import API returns opaque internal errors for namespace clashes // (e.g. a file and a notebook or a directory and a notebook). Thus users of this // method should be careful to avoid such clashes. -func (w *workspaceFilesExtensionsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { +func (w *WorkspaceFilesExtensionsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { if w.readonly { return ReadOnlyError{"write"} } @@ -244,7 +244,7 @@ func (w *workspaceFilesExtensionsClient) Write(ctx context.Context, name string, } // Try to read the file as a regular file. If the file is not found, try to read it as a notebook. -func (w *workspaceFilesExtensionsClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { +func (w *WorkspaceFilesExtensionsClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { // Ensure that the file / notebook exists. We do this check here to avoid reading // the content of a notebook called `foo` when the user actually wanted // to read the content of a file called `foo`. @@ -283,7 +283,7 @@ func (w *workspaceFilesExtensionsClient) Read(ctx context.Context, name string) } // Try to delete the file as a regular file. If the file is not found, try to delete it as a notebook. -func (w *workspaceFilesExtensionsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { +func (w *WorkspaceFilesExtensionsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { if w.readonly { return ReadOnlyError{"delete"} } @@ -320,7 +320,7 @@ func (w *workspaceFilesExtensionsClient) Delete(ctx context.Context, name string } // Try to stat the file as a regular file. If the file is not found, try to stat it as a notebook. -func (w *workspaceFilesExtensionsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { +func (w *WorkspaceFilesExtensionsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { info, err := w.wsfs.Stat(ctx, name) // If the file is not found, it might be a notebook. @@ -361,7 +361,7 @@ func (w *workspaceFilesExtensionsClient) Stat(ctx context.Context, name string) // Note: The import API returns opaque internal errors for namespace clashes // (e.g. a file and a notebook or a directory and a notebook). Thus users of this // method should be careful to avoid such clashes. -func (w *workspaceFilesExtensionsClient) Mkdir(ctx context.Context, name string) error { +func (w *WorkspaceFilesExtensionsClient) Mkdir(ctx context.Context, name string) error { if w.readonly { return ReadOnlyError{"mkdir"} } diff --git a/libs/filer/workspace_files_extensions_client_test.go b/libs/filer/workspace_files_extensions_client_test.go index 10a2bebf0..9ea837fa9 100644 --- a/libs/filer/workspace_files_extensions_client_test.go +++ b/libs/filer/workspace_files_extensions_client_test.go @@ -181,7 +181,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { root: NewWorkspaceRootPath("/dir"), } - workspaceFilesExtensionsClient := workspaceFilesExtensionsClient{ + workspaceFilesExtensionsClient := WorkspaceFilesExtensionsClient{ workspaceClient: mockedWorkspaceClient.WorkspaceClient, wsfs: &workspaceFilesClient, } diff --git a/libs/notebook/detect.go b/libs/notebook/detect.go index 40c850945..579cc1de3 100644 --- a/libs/notebook/detect.go +++ b/libs/notebook/detect.go @@ -47,7 +47,7 @@ func (f file) close() error { func (f file) readHeader() (string, error) { // Scan header line with some padding. buf := make([]byte, headerLength) - n, err := f.f.Read([]byte(buf)) + n, err := f.f.Read(buf) if err != nil && err != io.EOF { return "", err } diff --git a/libs/notebook/testdata/.ruff.toml b/libs/notebook/testdata/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/libs/notebook/testdata/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/libs/sync/testdata/.ruff.toml b/libs/sync/testdata/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/libs/sync/testdata/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/libs/template/builtin.go b/libs/template/builtin.go index dcb3a8858..5b10534ef 100644 --- a/libs/template/builtin.go +++ b/libs/template/builtin.go @@ -8,14 +8,14 @@ import ( //go:embed all:templates var builtinTemplates embed.FS -// BuiltinTemplate represents a template that is built into the CLI. -type BuiltinTemplate struct { +// builtinTemplate represents a template that is built into the CLI. +type builtinTemplate struct { Name string FS fs.FS } -// Builtin returns the list of all built-in templates. -func Builtin() ([]BuiltinTemplate, error) { +// builtin returns the list of all built-in templates. +func builtin() ([]builtinTemplate, error) { templates, err := fs.Sub(builtinTemplates, "templates") if err != nil { return nil, err @@ -26,7 +26,7 @@ func Builtin() ([]BuiltinTemplate, error) { return nil, err } - var out []BuiltinTemplate + var out []builtinTemplate for _, entry := range entries { if !entry.IsDir() { continue @@ -37,7 +37,7 @@ func Builtin() ([]BuiltinTemplate, error) { return nil, err } - out = append(out, BuiltinTemplate{ + out = append(out, builtinTemplate{ Name: entry.Name(), FS: templateFS, }) diff --git a/libs/template/builtin_test.go b/libs/template/builtin_test.go index 79e04cb84..162a227ea 100644 --- a/libs/template/builtin_test.go +++ b/libs/template/builtin_test.go @@ -9,12 +9,12 @@ import ( ) func TestBuiltin(t *testing.T) { - out, err := Builtin() + out, err := builtin() require.NoError(t, err) assert.GreaterOrEqual(t, len(out), 3) // Create a map of templates by name for easier lookup - templates := make(map[string]*BuiltinTemplate) + templates := make(map[string]*builtinTemplate) for _, tmpl := range out { templates[tmpl.Name] = &tmpl } diff --git a/libs/template/materialize.go b/libs/template/materialize.go deleted file mode 100644 index 86a6a8c37..000000000 --- a/libs/template/materialize.go +++ /dev/null @@ -1,94 +0,0 @@ -package template - -import ( - "context" - "errors" - "fmt" - "io/fs" - - "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/libs/filer" -) - -const ( - libraryDirName = "library" - templateDirName = "template" - schemaFileName = "databricks_template_schema.json" -) - -// This function materializes the input templates as a project, using user defined -// configurations. -// Parameters: -// -// ctx: context containing a cmdio object. This is used to prompt the user -// configFilePath: file path containing user defined config values -// templateFS: root of the template definition -// outputFiler: filer to use for writing the initialized template -func Materialize(ctx context.Context, configFilePath string, templateFS fs.FS, outputFiler filer.Filer) error { - if _, err := fs.Stat(templateFS, schemaFileName); errors.Is(err, fs.ErrNotExist) { - return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName) - } - - config, err := newConfig(ctx, templateFS, schemaFileName) - if err != nil { - return err - } - - // Read and assign config values from file - if configFilePath != "" { - err = config.assignValuesFromFile(configFilePath) - if err != nil { - return err - } - } - - helpers := loadHelpers(ctx) - r, err := newRenderer(ctx, config.values, helpers, templateFS, templateDirName, libraryDirName) - if err != nil { - return err - } - - // Print welcome message - welcome := config.schema.WelcomeMessage - if welcome != "" { - welcome, err = r.executeTemplate(welcome) - if err != nil { - return err - } - cmdio.LogString(ctx, welcome) - } - - // Prompt user for any missing config values. Assign default values if - // terminal is not TTY - err = config.promptOrAssignDefaultValues(r) - if err != nil { - return err - } - err = config.validate() - if err != nil { - return err - } - - // Walk and render the template, since input configuration is complete - err = r.walk() - if err != nil { - return err - } - - err = r.persistToDisk(ctx, outputFiler) - if err != nil { - return err - } - - success := config.schema.SuccessMessage - if success == "" { - cmdio.LogString(ctx, "✨ Successfully initialized template") - } else { - success, err = r.executeTemplate(success) - if err != nil { - return err - } - cmdio.LogString(ctx, success) - } - return nil -} diff --git a/libs/template/materialize_test.go b/libs/template/materialize_test.go deleted file mode 100644 index c9331b43f..000000000 --- a/libs/template/materialize_test.go +++ /dev/null @@ -1,23 +0,0 @@ -package template - -import ( - "context" - "os" - "testing" - - "github.com/databricks/cli/cmd/root" - "github.com/databricks/databricks-sdk-go" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestMaterializeForNonTemplateDirectory(t *testing.T) { - tmpDir := t.TempDir() - w, err := databricks.NewWorkspaceClient(&databricks.Config{}) - require.NoError(t, err) - ctx := root.SetWorkspaceClient(context.Background(), w) - - // Try to materialize a non-template directory. - err = Materialize(ctx, "", os.DirFS(tmpDir), nil) - assert.EqualError(t, err, "not a bundle template: expected to find a template schema file at "+schemaFileName) -} diff --git a/libs/template/reader.go b/libs/template/reader.go new file mode 100644 index 000000000..8e32a75cf --- /dev/null +++ b/libs/template/reader.go @@ -0,0 +1,119 @@ +package template + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/log" +) + +type Reader interface { + // FS returns a file system that contains the template + // definition files. + FS(ctx context.Context) (fs.FS, error) + + // Cleanup releases any resources associated with the reader + // like cleaning up temporary directories. + Cleanup(ctx context.Context) +} + +type builtinReader struct { + name string +} + +func (r *builtinReader) FS(ctx context.Context) (fs.FS, error) { + builtin, err := builtin() + if err != nil { + return nil, err + } + + for _, entry := range builtin { + if entry.Name == r.name { + return entry.FS, nil + } + } + + return nil, fmt.Errorf("builtin template %s not found", r.name) +} + +func (r *builtinReader) Cleanup(ctx context.Context) {} + +type gitReader struct { + gitUrl string + // tag or branch to checkout + ref string + // subdirectory within the repository that contains the template + templateDir string + // temporary directory where the repository is cloned + tmpRepoDir string + + // Function to clone the repository. This is a function pointer to allow + // mocking in tests. + cloneFunc func(ctx context.Context, url, reference, targetPath string) error +} + +// Computes the repo name from the repo URL. Treats the last non empty word +// when splitting at '/' as the repo name. For example: for url git@github.com:databricks/cli.git +// the name would be "cli.git" +func repoName(url string) string { + parts := strings.Split(strings.TrimRight(url, "/"), "/") + return parts[len(parts)-1] +} + +func (r *gitReader) FS(ctx context.Context) (fs.FS, error) { + // Calling FS twice will lead to two downloaded copies of the git repo. + // In the future if you need to call FS twice, consider adding some caching + // logic here to avoid multiple downloads. + if r.tmpRepoDir != "" { + return nil, errors.New("FS called twice on git reader") + } + + // Create a temporary directory with the name of the repository. The '*' + // character is replaced by a random string in the generated temporary directory. + repoDir, err := os.MkdirTemp("", repoName(r.gitUrl)+"-*") + if err != nil { + return nil, err + } + r.tmpRepoDir = repoDir + + // start the spinner + promptSpinner := cmdio.Spinner(ctx) + promptSpinner <- "Downloading the template\n" + + err = r.cloneFunc(ctx, r.gitUrl, r.ref, repoDir) + close(promptSpinner) + if err != nil { + return nil, err + } + + return os.DirFS(filepath.Join(repoDir, r.templateDir)), nil +} + +func (r *gitReader) Cleanup(ctx context.Context) { + if r.tmpRepoDir == "" { + return + } + + // Cleanup is best effort. Only log errors. + err := os.RemoveAll(r.tmpRepoDir) + if err != nil { + log.Debugf(ctx, "Error cleaning up tmp directory %s for git template reader for URL %s: %s", r.tmpRepoDir, r.gitUrl, err) + } +} + +type localReader struct { + // Path on the local filesystem that contains the template + path string +} + +func (r *localReader) FS(ctx context.Context) (fs.FS, error) { + return os.DirFS(r.path), nil +} + +func (r *localReader) Cleanup(ctx context.Context) {} diff --git a/libs/template/reader_test.go b/libs/template/reader_test.go new file mode 100644 index 000000000..89c8d9fdf --- /dev/null +++ b/libs/template/reader_test.go @@ -0,0 +1,101 @@ +package template + +import ( + "context" + "io/fs" + "path/filepath" + "testing" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/cmdio" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBuiltInReader(t *testing.T) { + exists := []string{ + "default-python", + "default-sql", + "dbt-sql", + "experimental-jobs-as-code", + } + + for _, name := range exists { + t.Run(name, func(t *testing.T) { + r := &builtinReader{name: name} + fsys, err := r.FS(context.Background()) + assert.NoError(t, err) + assert.NotNil(t, fsys) + + // Assert file content returned is accurate and every template has a welcome + // message defined. + b, err := fs.ReadFile(fsys, "databricks_template_schema.json") + require.NoError(t, err) + assert.Contains(t, string(b), "welcome_message") + }) + } + + t.Run("doesnotexist", func(t *testing.T) { + r := &builtinReader{name: "doesnotexist"} + _, err := r.FS(context.Background()) + assert.EqualError(t, err, "builtin template doesnotexist not found") + }) +} + +func TestGitUrlReader(t *testing.T) { + ctx := cmdio.MockDiscard(context.Background()) + + var args []string + numCalls := 0 + cloneFunc := func(ctx context.Context, url, reference, targetPath string) error { + numCalls++ + args = []string{url, reference, targetPath} + testutil.WriteFile(t, filepath.Join(targetPath, "a", "b", "c", "somefile"), "somecontent") + return nil + } + r := &gitReader{ + gitUrl: "someurl", + cloneFunc: cloneFunc, + ref: "sometag", + templateDir: "a/b/c", + } + + // Assert cloneFunc is called with the correct args. + fsys, err := r.FS(ctx) + require.NoError(t, err) + require.NotEmpty(t, r.tmpRepoDir) + assert.Equal(t, 1, numCalls) + assert.DirExists(t, r.tmpRepoDir) + assert.Equal(t, []string{"someurl", "sometag", r.tmpRepoDir}, args) + + // Assert the fs returned is rooted at the templateDir. + b, err := fs.ReadFile(fsys, "somefile") + require.NoError(t, err) + assert.Equal(t, "somecontent", string(b)) + + // Assert second call to FS returns an error. + _, err = r.FS(ctx) + assert.ErrorContains(t, err, "FS called twice on git reader") + + // Assert the downloaded repository is cleaned up. + _, err = fs.Stat(fsys, ".") + require.NoError(t, err) + r.Cleanup(ctx) + _, err = fs.Stat(fsys, ".") + assert.ErrorIs(t, err, fs.ErrNotExist) +} + +func TestLocalReader(t *testing.T) { + tmpDir := t.TempDir() + testutil.WriteFile(t, filepath.Join(tmpDir, "somefile"), "somecontent") + ctx := context.Background() + + r := &localReader{path: tmpDir} + fsys, err := r.FS(ctx) + require.NoError(t, err) + + // Assert the fs returned is rooted at correct location. + b, err := fs.ReadFile(fsys, "somefile") + require.NoError(t, err) + assert.Equal(t, "somecontent", string(b)) +} diff --git a/libs/template/resolver.go b/libs/template/resolver.go new file mode 100644 index 000000000..2cc8bf1c7 --- /dev/null +++ b/libs/template/resolver.go @@ -0,0 +1,122 @@ +package template + +import ( + "context" + "errors" + "strings" + + "github.com/databricks/cli/libs/git" +) + +var gitUrlPrefixes = []string{ + "https://", + "git@", +} + +func isRepoUrl(url string) bool { + result := false + for _, prefix := range gitUrlPrefixes { + if strings.HasPrefix(url, prefix) { + result = true + break + } + } + return result +} + +type Resolver struct { + // One of the following three: + // 1. Path to a local template directory. + // 2. URL to a Git repository containing a template. + // 3. Name of a built-in template. + TemplatePathOrUrl string + + // Path to a JSON file containing the configuration values to be used for + // template initialization. + ConfigFile string + + // Directory to write the initialized template to. + OutputDir string + + // Directory path within a Git repository containing the template. + TemplateDir string + + // Git tag or branch to download the template from. Only one of these can be + // specified. + Tag string + Branch string +} + +// ErrCustomSelected is returned when the user selects the "custom..." option +// in the prompt UI when they run `databricks bundle init`. This error signals +// the upstream callsite to show documentation to the user on how to use a custom +// template. +var ErrCustomSelected = errors.New("custom template selected") + +// Configures the reader and the writer for template and returns +// a handle to the template. +// Prompts the user if needed. +func (r Resolver) Resolve(ctx context.Context) (*Template, error) { + if r.Tag != "" && r.Branch != "" { + return nil, errors.New("only one of tag or branch can be specified") + } + + // Git ref to use for template initialization + ref := r.Branch + if r.Tag != "" { + ref = r.Tag + } + + var err error + var templateName TemplateName + + if r.TemplatePathOrUrl == "" { + // Prompt the user to select a template + // if a template path or URL is not provided. + templateName, err = SelectTemplate(ctx) + if err != nil { + return nil, err + } + } else { + templateName = TemplateName(r.TemplatePathOrUrl) + } + + tmpl := GetDatabricksTemplate(templateName) + + // If we could not find a databricks template with the name provided by the user, + // then we assume that the user provided us with a reference to a custom template. + // + // This reference could be one of: + // 1. Path to a local template directory. + // 2. URL to a Git repository containing a template. + // + // We resolve the appropriate reader according to the reference provided by the user. + if tmpl == nil { + tmpl = &Template{ + name: Custom, + // We use a writer that does not log verbose telemetry for custom templates. + // This is important because template definitions can contain PII that we + // do not want to centralize. + Writer: &defaultWriter{}, + } + + if isRepoUrl(r.TemplatePathOrUrl) { + tmpl.Reader = &gitReader{ + gitUrl: r.TemplatePathOrUrl, + ref: ref, + templateDir: r.TemplateDir, + cloneFunc: git.Clone, + } + } else { + tmpl.Reader = &localReader{ + path: r.TemplatePathOrUrl, + } + } + } + err = tmpl.Writer.Configure(ctx, r.ConfigFile, r.OutputDir) + if err != nil { + return nil, err + } + + return tmpl, nil +} diff --git a/libs/template/resolver_test.go b/libs/template/resolver_test.go new file mode 100644 index 000000000..1dee1c45f --- /dev/null +++ b/libs/template/resolver_test.go @@ -0,0 +1,110 @@ +package template + +import ( + "context" + "testing" + + "github.com/databricks/cli/libs/cmdio" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTemplateResolverBothTagAndBranch(t *testing.T) { + r := Resolver{ + Tag: "tag", + Branch: "branch", + } + + _, err := r.Resolve(context.Background()) + assert.EqualError(t, err, "only one of tag or branch can be specified") +} + +func TestTemplateResolverErrorsWhenPromptingIsNotSupported(t *testing.T) { + r := Resolver{} + ctx := cmdio.MockDiscard(context.Background()) + + _, err := r.Resolve(ctx) + assert.EqualError(t, err, "prompting is not supported. Please specify the path, name or URL of the template to use") +} + +func TestTemplateResolverForDefaultTemplates(t *testing.T) { + for _, name := range []string{ + "default-python", + "default-sql", + "dbt-sql", + } { + t.Run(name, func(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: name, + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + assert.Equal(t, &builtinReader{name: name}, tmpl.Reader) + assert.IsType(t, &writerWithFullTelemetry{}, tmpl.Writer) + }) + } + + t.Run("mlops-stacks", func(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: "mlops-stacks", + ConfigFile: "/config/file", + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + // Assert reader and writer configuration + assert.Equal(t, "https://github.com/databricks/mlops-stacks", tmpl.Reader.(*gitReader).gitUrl) + assert.Equal(t, "/config/file", tmpl.Writer.(*writerWithFullTelemetry).configPath) + }) +} + +func TestTemplateResolverForCustomUrl(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: "https://www.example.com/abc", + Tag: "tag", + TemplateDir: "/template/dir", + ConfigFile: "/config/file", + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + assert.Equal(t, Custom, tmpl.name) + + // Assert reader configuration + assert.Equal(t, "https://www.example.com/abc", tmpl.Reader.(*gitReader).gitUrl) + assert.Equal(t, "tag", tmpl.Reader.(*gitReader).ref) + assert.Equal(t, "/template/dir", tmpl.Reader.(*gitReader).templateDir) + + // Assert writer configuration + assert.Equal(t, "/config/file", tmpl.Writer.(*defaultWriter).configPath) +} + +func TestTemplateResolverForCustomPath(t *testing.T) { + r := Resolver{ + TemplatePathOrUrl: "/custom/path", + ConfigFile: "/config/file", + } + + tmpl, err := r.Resolve(context.Background()) + require.NoError(t, err) + + assert.Equal(t, Custom, tmpl.name) + + // Assert reader configuration + assert.Equal(t, "/custom/path", tmpl.Reader.(*localReader).path) + + // Assert writer configuration + assert.Equal(t, "/config/file", tmpl.Writer.(*defaultWriter).configPath) +} + +func TestBundleInitIsRepoUrl(t *testing.T) { + assert.True(t, isRepoUrl("git@github.com:databricks/cli.git")) + assert.True(t, isRepoUrl("https://github.com/databricks/cli.git")) + + assert.False(t, isRepoUrl("./local")) + assert.False(t, isRepoUrl("foo")) +} diff --git a/libs/template/template.go b/libs/template/template.go new file mode 100644 index 000000000..44834436b --- /dev/null +++ b/libs/template/template.go @@ -0,0 +1,140 @@ +package template + +import ( + "context" + "errors" + "fmt" + "slices" + "strings" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/git" +) + +type Template struct { + Reader Reader + Writer Writer + + name TemplateName + description string + aliases []string + hidden bool +} + +type TemplateName string + +const ( + DefaultPython TemplateName = "default-python" + DefaultSql TemplateName = "default-sql" + DbtSql TemplateName = "dbt-sql" + MlopsStacks TemplateName = "mlops-stacks" + DefaultPydabs TemplateName = "default-pydabs" + Custom TemplateName = "custom" + ExperimentalJobsAsCode TemplateName = "experimental-jobs-as-code" +) + +var databricksTemplates = []Template{ + { + name: DefaultPython, + description: "The default Python template for Notebooks / Delta Live Tables / Workflows", + Reader: &builtinReader{name: string(DefaultPython)}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: DefaultSql, + description: "The default SQL template for .sql files that run with Databricks SQL", + Reader: &builtinReader{name: string(DefaultSql)}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: DbtSql, + description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", + Reader: &builtinReader{name: string(DbtSql)}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: MlopsStacks, + description: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)", + aliases: []string{"mlops-stack"}, + Reader: &gitReader{gitUrl: "https://github.com/databricks/mlops-stacks", cloneFunc: git.Clone}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: DefaultPydabs, + hidden: true, + description: "The default PyDABs template", + Reader: &gitReader{gitUrl: "https://databricks.github.io/workflows-authoring-toolkit/pydabs-template.git", cloneFunc: git.Clone}, + Writer: &writerWithFullTelemetry{}, + }, + { + name: ExperimentalJobsAsCode, + hidden: true, + description: "Jobs as code template (experimental)", + Reader: &builtinReader{name: string(ExperimentalJobsAsCode)}, + Writer: &writerWithFullTelemetry{}, + }, +} + +func HelpDescriptions() string { + var lines []string + for _, template := range databricksTemplates { + if template.name != Custom && !template.hidden { + lines = append(lines, fmt.Sprintf("- %s: %s", template.name, template.description)) + } + } + return strings.Join(lines, "\n") +} + +var customTemplateDescription = "Bring your own template" + +func options() []cmdio.Tuple { + names := make([]cmdio.Tuple, 0, len(databricksTemplates)) + for _, template := range databricksTemplates { + if template.hidden { + continue + } + tuple := cmdio.Tuple{ + Name: string(template.name), + Id: template.description, + } + names = append(names, tuple) + } + + names = append(names, cmdio.Tuple{ + Name: "custom...", + Id: customTemplateDescription, + }) + return names +} + +func SelectTemplate(ctx context.Context) (TemplateName, error) { + if !cmdio.IsPromptSupported(ctx) { + return "", errors.New("prompting is not supported. Please specify the path, name or URL of the template to use") + } + description, err := cmdio.SelectOrdered(ctx, options(), "Template to use") + if err != nil { + return "", err + } + + if description == customTemplateDescription { + return TemplateName(""), ErrCustomSelected + } + + for _, template := range databricksTemplates { + if template.description == description { + return template.name, nil + } + } + + return "", fmt.Errorf("template with description %s not found", description) +} + +func GetDatabricksTemplate(name TemplateName) *Template { + for _, template := range databricksTemplates { + if template.name == name || slices.Contains(template.aliases, string(name)) { + return &template + } + } + + return nil +} diff --git a/cmd/bundle/init_test.go b/libs/template/template_test.go similarity index 59% rename from cmd/bundle/init_test.go rename to libs/template/template_test.go index 475b2e149..80391e58b 100644 --- a/cmd/bundle/init_test.go +++ b/libs/template/template_test.go @@ -1,4 +1,4 @@ -package bundle +package template import ( "testing" @@ -7,12 +7,23 @@ import ( "github.com/stretchr/testify/assert" ) -func TestBundleInitIsRepoUrl(t *testing.T) { - assert.True(t, isRepoUrl("git@github.com:databricks/cli.git")) - assert.True(t, isRepoUrl("https://github.com/databricks/cli.git")) +func TestTemplateHelpDescriptions(t *testing.T) { + expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows +- default-sql: The default SQL template for .sql files that run with Databricks SQL +- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) +- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)` + assert.Equal(t, expected, HelpDescriptions()) +} - assert.False(t, isRepoUrl("./local")) - assert.False(t, isRepoUrl("foo")) +func TestTemplateOptions(t *testing.T) { + expected := []cmdio.Tuple{ + {Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"}, + {Name: "default-sql", Id: "The default SQL template for .sql files that run with Databricks SQL"}, + {Name: "dbt-sql", Id: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"}, + {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, + {Name: "custom...", Id: "Bring your own template"}, + } + assert.Equal(t, expected, options()) } func TestBundleInitRepoName(t *testing.T) { @@ -27,28 +38,41 @@ func TestBundleInitRepoName(t *testing.T) { assert.Equal(t, "www.github.com", repoName("https://www.github.com")) } -func TestNativeTemplateOptions(t *testing.T) { - expected := []cmdio.Tuple{ - {Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"}, - {Name: "default-sql", Id: "The default SQL template for .sql files that run with Databricks SQL"}, - {Name: "dbt-sql", Id: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"}, - {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, - {Name: "custom...", Id: "Bring your own template"}, +func TestTemplateTelemetryIsCapturedForAllDefaultTemplates(t *testing.T) { + for _, tmpl := range databricksTemplates { + w := tmpl.Writer + + // Assert telemetry is captured for all databricks templates, i.e. templates + // owned by databricks. + assert.IsType(t, &writerWithFullTelemetry{}, w) } - assert.Equal(t, expected, nativeTemplateOptions()) } -func TestNativeTemplateHelpDescriptions(t *testing.T) { - expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows -- default-sql: The default SQL template for .sql files that run with Databricks SQL -- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) -- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)` - assert.Equal(t, expected, nativeTemplateHelpDescriptions()) -} +func TestTemplateGetDatabricksTemplate(t *testing.T) { + names := []TemplateName{ + DefaultPython, + DefaultSql, + DbtSql, + MlopsStacks, + DefaultPydabs, + } -func TestGetUrlForNativeTemplate(t *testing.T) { - assert.Equal(t, "https://github.com/databricks/mlops-stacks", getUrlForNativeTemplate("mlops-stacks")) - assert.Equal(t, "https://github.com/databricks/mlops-stacks", getUrlForNativeTemplate("mlops-stack")) - assert.Equal(t, "", getUrlForNativeTemplate("default-python")) - assert.Equal(t, "", getUrlForNativeTemplate("invalid")) + for _, name := range names { + tmpl := GetDatabricksTemplate(name) + assert.Equal(t, tmpl.name, name) + } + + notExist := []string{ + "/some/path", + "doesnotexist", + "https://www.someurl.com", + } + + for _, name := range notExist { + tmpl := GetDatabricksTemplate(TemplateName(name)) + assert.Nil(t, tmpl) + } + + // Assert the alias works. + assert.Equal(t, MlopsStacks, GetDatabricksTemplate(TemplateName("mlops-stack")).name) } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl index 42164dff0..d3e9beef3 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl @@ -29,7 +29,8 @@ "source": [ {{- if (eq .include_python "yes") }} "import sys\n", - "sys.path.append('../src')\n", + "\n", + "sys.path.append(\"../src\")\n", "from {{.project_name}} import main\n", "\n", "main.get_taxis(spark).show(10)" diff --git a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl index a0852c725..e3b70c605 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl @@ -5,28 +5,32 @@ This file is primarily used by the setuptools library and typically should not be executed directly. See README.md for how to deploy, test, and run the {{.project_name}} project. """ + from setuptools import setup, find_packages import sys -sys.path.append('./src') + +sys.path.append("./src") import datetime import {{.project_name}} +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + setup( name="{{.project_name}}", # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) # to ensure that changes to wheel package are picked up when used on all-purpose clusters - version={{.project_name}}.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"), + version={{.project_name}}.__version__ + "+" + local_version, url="https://databricks.com", author="{{user_name}}", description="wheel file based on {{.project_name}}/src", - packages=find_packages(where='./src'), - package_dir={'': 'src'}, + packages=find_packages(where="./src"), + package_dir={"": "src"}, entry_points={ "packages": [ - "main={{.project_name}}.main:main" - ] + "main={{.project_name}}.main:main", + ], }, install_requires=[ # Dependencies in case the output wheel file is used as a library dependency. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl index 253ed321c..d0286639f 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -35,6 +35,7 @@ "# Import DLT and src/{{.project_name}}\n", "import dlt\n", "import sys\n", + "\n", "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", "from pyspark.sql.functions import expr\n", "from {{.project_name}} import main" @@ -63,17 +64,18 @@ {{- if (eq .include_python "yes") }} "@dlt.view\n", "def taxi_raw():\n", - " return main.get_taxis(spark)\n", + " return main.get_taxis(spark)\n", {{else}} "\n", "@dlt.view\n", "def taxi_raw():\n", - " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", + " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", {{end -}} "\n", + "\n", "@dlt.table\n", "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" ] } ], diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl index c514c6dc5..5ae344c7e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -1,21 +1,25 @@ from pyspark.sql import SparkSession, DataFrame + def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") + return spark.read.table("samples.nyctaxi.trips") # Create a new Databricks Connect session. If this fails, # check that you have configured Databricks Connect correctly. # See https://docs.databricks.com/dev-tools/databricks-connect.html. def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + def main(): - get_taxis(get_spark()).show(5) + get_taxis(get_spark()).show(5) -if __name__ == '__main__': - main() + +if __name__ == "__main__": + main() diff --git a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json new file mode 100644 index 000000000..00d59af5f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json @@ -0,0 +1,28 @@ +{ + "welcome_message": "\nWelcome to (EXPERIMENTAL) \"Jobs as code\" template for Databricks Asset Bundles!", + "properties": { + "project_name": { + "type": "string", + "default": "jobs_as_code_project", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project", + "order": 1, + "pattern": "^[A-Za-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." + }, + "include_notebook": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", + "order": 2 + }, + "include_python": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a stub (sample) Python package in '{{.project_name}}/src'", + "order": 3 + } + }, + "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." +} diff --git a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl new file mode 100644 index 000000000..7d0c88e7d --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl @@ -0,0 +1,7 @@ +{{define "latest_lts_dbr_version" -}} + 15.4.x-scala2.12 +{{- end}} + +{{define "latest_lts_db_connect_version_spec" -}} + >=15.4,<15.5 +{{- end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl new file mode 100644 index 000000000..2f8e8ae3e --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl @@ -0,0 +1,30 @@ +# Preamble + +This file only template directives; it is skipped for the actual output. + +{{skip "__preamble"}} + +# TODO add DLT support, placeholder for now +{{$notDLT := true }} +{{$notNotebook := not (eq .include_notebook "yes")}} +{{$notPython := not (eq .include_python "yes")}} + +{{if $notPython}} + {{skip "{{.project_name}}/src/{{.project_name}}"}} + {{skip "{{.project_name}}/tests/main_test.py"}} +{{end}} + +{{if $notDLT}} + {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.py"}} +{{end}} + +{{if $notNotebook}} + {{skip "{{.project_name}}/src/notebook.ipynb"}} +{{end}} + +{{if (and $notDLT $notNotebook $notPython)}} + {{skip "{{.project_name}}/resources/{{.project_name}}_job.py"}} +{{else}} + {{skip "{{.project_name}}/resources/.gitkeep"}} +{{end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl new file mode 100644 index 000000000..497ce3723 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl @@ -0,0 +1,60 @@ +# {{.project_name}} + +The '{{.project_name}}' project was generated by using the "Jobs as code" template. + +## Prerequisites + +1. Install Databricks CLI 0.238 or later. + See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). + +2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). + We use uv to create a virtual environment and install the required dependencies. + +3. Authenticate to your Databricks workspace if you have not done so already: + ``` + $ databricks configure + ``` + +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. + {{- if (eq .include_python "yes") }} Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + {{- end}} + +5. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. + +## Deploy and run jobs + +1. Create a new virtual environment and install the required dependencies: + ``` + $ uv sync + ``` + +2. To deploy the bundle to the development target: + ``` + $ databricks bundle deploy --target dev + ``` + + *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] {{.project_name}}_job` to your workspace. + You can find that job by opening your workspace and clicking on **Workflows**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/{{.project_name}}_job.py). The schedule + is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). + +4. To run a job: + ``` + $ databricks bundle run + ``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 000000000..758ec3f16 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,51 @@ +# This is a Databricks asset bundle definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: {{.project_name}} + uuid: {{bundle_uuid}} + databricks_cli_version: ">= 0.238.0" + +experimental: + python: + # Activate virtual environment before loading resources defined in Python. + # If disabled, defaults to using the Python interpreter available in the current shell. + venv_path: .venv + # Functions called to load resources defined in Python. See resources/__init__.py + resources: + - "resources:load_resources" + +{{ if .include_python -}} +artifacts: + default: + type: whl + path: . + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build + +{{ end -}} +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: {{workspace_host}} + + prod: + mode: production + workspace: + host: {{workspace_host}} + # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE + run_as: + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl new file mode 100644 index 000000000..ee9570302 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl @@ -0,0 +1,27 @@ +# Fixtures +{{- /* +We don't want to have too many README.md files, since they +stand out so much. But we do need to have a file here to make +sure the folder is added to Git. +*/}} + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl new file mode 100644 index 000000000..cee0d8946 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl @@ -0,0 +1,57 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "{{.project_name}}" +requires-python = ">=3.10" +description = "wheel file based on {{.project_name}}" + +# Dependencies in case the output wheel file is used as a library dependency. +# For defining dependencies, when this package is used in Databricks, see: +# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +# +# Example: +# dependencies = [ +# "requests==x.y.z", +# ] +dependencies = [ +] + +# see setup.py +dynamic = ["version"] + +{{ if .include_python -}} +[project.entry-points.packages] +main = "{{.project_name}}.main:main" + +{{ end -}} + +[tool.setuptools] +{{ if .include_python -}} +py-modules = ["resources", "{{.project_name}}"] + +{{ else }} +py-modules = ["resources"] + +{{ end -}} +[tool.uv] +## Dependencies for local development +dev-dependencies = [ + "databricks-bundles==0.7.0", + + ## Add code completion support for DLT + # "databricks-dlt", + + ## databricks-connect can be used to run parts of this project locally. + ## See https://docs.databricks.com/dev-tools/databricks-connect.html. + ## + ## Uncomment line below to install a version of db-connect that corresponds to + ## the Databricks Runtime version used for this project. + # "databricks-connect{{template "latest_lts_db_connect_version_spec"}}", +] + +override-dependencies = [ + # pyspark package conflicts with 'databricks-connect' + "pyspark; sys_platform == 'never'", +] diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py new file mode 100644 index 000000000..fbcb9dc5f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import ( + Bundle, + Resources, + load_resources_from_current_package_module, +) + + +def load_resources(bundle: Bundle) -> Resources: + """ + 'load_resources' function is referenced in databricks.yml and is responsible for loading + bundle resources defined in Python code. This function is called by Databricks CLI during + bundle deployment. After deployment, this function is not used. + """ + + # the default implementation loads all Python files in 'resources' directory + return load_resources_from_current_package_module() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl new file mode 100644 index 000000000..7c7a0d33f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl @@ -0,0 +1,108 @@ +{{$include_dlt := "no" -}} +from databricks.bundles.jobs import Job + +""" +The main job for {{.project_name}}. + +{{- /* Clarify what this job is for for DLT-only users. */}} +{{if and (eq $include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} +This job runs {{.project_name}}_pipeline on a schedule. +{{end -}} +""" + + +{{.project_name}}_job = Job.from_dict( + { + "name": "{{.project_name}}_job", + "trigger": { + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + "periodic": { + "interval": 1, + "unit": "DAYS", + }, + }, + {{- if not is_service_principal}} + "email_notifications": { + "on_failure": [ + "{{user_name}}", + ], + }, + {{else}} + {{- end -}} + "tasks": [ + {{- if eq .include_notebook "yes" -}} + {{- "\n " -}} + { + "task_key": "notebook_task", + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "src/notebook.ipynb", + }, + }, + {{- end -}} + {{- if (eq $include_dlt "yes") -}} + {{- "\n " -}} + { + "task_key": "refresh_pipeline", + {{- if (eq .include_notebook "yes" )}} + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + {{- end}} + "pipeline_task": { + {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}} + "pipeline_id": "${resources.pipelines.{{.project_name}}_pipeline.id}", + }, + }, + {{- end -}} + {{- if (eq .include_python "yes") -}} + {{- "\n " -}} + { + "task_key": "main_task", + {{- if (eq $include_dlt "yes") }} + "depends_on": [ + { + "task_key": "refresh_pipeline", + }, + ], + {{- else if (eq .include_notebook "yes" )}} + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + {{- end}} + "job_cluster_key": "job_cluster", + "python_wheel_task": { + "package_name": "{{.project_name}}", + "entry_point": "main", + }, + "libraries": [ + # By default we just include the .whl file generated for the {{.project_name}} package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + { + "whl": "dist/*.whl", + }, + ], + }, + {{- end -}} + {{""}} + ], + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "spark_version": "{{template "latest_lts_dbr_version"}}", + "node_type_id": "{{smallest_node_type}}", + "autoscale": { + "min_workers": 1, + "max_workers": 4, + }, + }, + }, + ], + } +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl new file mode 100644 index 000000000..c8579ae65 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl @@ -0,0 +1,24 @@ +from databricks.bundles.pipelines import Pipeline + +{{.project_name}}_pipeline = Pipeline.from_dict( + { + "name": "{{.project_name}}_pipeline", + "target": "{{.project_name}}_${bundle.target}", + {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + "catalog": "catalog_name", + {{- else}} + "catalog": "{{default_catalog}}", + {{- end}} + "libraries": [ + { + "notebook": { + "path": "src/dlt_pipeline.ipynb", + }, + }, + ], + "configuration": { + "bundle.sourcePath": "${workspace.file_path}/src", + }, + } +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl new file mode 100644 index 000000000..19c9d0ebe --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl @@ -0,0 +1,18 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the {{.project_name}} project. +""" + +import os + +from setuptools import setup + +local_version = os.getenv("LOCAL_VERSION") +version = "0.0.1" + +setup( + version=f"{version}+{local_version}" if local_version else version, +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl new file mode 100644 index 000000000..629106dbf --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "# Import DLT and src/{{.project_name}}\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from {{.project_name}} import main" + {{else}} + "import dlt\n", + "from pyspark.sql.functions import expr\n", + "from pyspark.sql import SparkSession\n", + "\n", + "spark = SparkSession.builder.getOrCreate()" + {{end -}} + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + {{else}} + "@dlt.view\n", + "def taxi_raw():\n", + " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", + {{end -}} + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl new file mode 100644 index 000000000..6782a053b --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -0,0 +1,79 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "ee353e42-ff58-4955-9608-12865bd0950e", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "from {{.project_name}} import main\n", + "\n", + "main.get_taxis(spark).show(10)" + {{else}} + "spark.range(10)" + {{end -}} + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl new file mode 100644 index 000000000..e69de29bb diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl new file mode 100644 index 000000000..6f89fca53 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl @@ -0,0 +1,8 @@ +from {{.project_name}}.main import get_taxis, get_spark + +# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/libs/template/writer.go b/libs/template/writer.go new file mode 100644 index 000000000..e3d5af583 --- /dev/null +++ b/libs/template/writer.go @@ -0,0 +1,171 @@ +package template + +import ( + "context" + "errors" + "fmt" + "io/fs" + "path/filepath" + "strings" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/filer" +) + +const ( + libraryDirName = "library" + templateDirName = "template" + schemaFileName = "databricks_template_schema.json" +) + +type Writer interface { + // Configure the writer with: + // 1. The path to the config file (if any) that contains input values for the + // template. + // 2. The output directory where the template will be materialized. + Configure(ctx context.Context, configPath, outputDir string) error + + // Materialize the template to the local file system. + Materialize(ctx context.Context, r Reader) error +} + +type defaultWriter struct { + configPath string + outputFiler filer.Filer + + // Internal state + config *config + renderer *renderer +} + +func constructOutputFiler(ctx context.Context, outputDir string) (filer.Filer, error) { + outputDir, err := filepath.Abs(outputDir) + if err != nil { + return nil, err + } + + // If the CLI is running on DBR and we're writing to the workspace file system, + // use the extension-aware workspace filesystem filer to instantiate the template. + // + // It is not possible to write notebooks through the workspace filesystem's FUSE mount. + // Therefore this is the only way we can initialize templates that contain notebooks + // when running the CLI on DBR and initializing a template to the workspace. + // + if strings.HasPrefix(outputDir, "/Workspace/") && dbr.RunsOnRuntime(ctx) { + return filer.NewWorkspaceFilesExtensionsClient(root.WorkspaceClient(ctx), outputDir) + } + + return filer.NewLocalClient(outputDir) +} + +func (tmpl *defaultWriter) Configure(ctx context.Context, configPath, outputDir string) error { + tmpl.configPath = configPath + + outputFiler, err := constructOutputFiler(ctx, outputDir) + if err != nil { + return err + } + + tmpl.outputFiler = outputFiler + return nil +} + +func (tmpl *defaultWriter) promptForInput(ctx context.Context, reader Reader) error { + readerFs, err := reader.FS(ctx) + if err != nil { + return err + } + if _, err := fs.Stat(readerFs, schemaFileName); errors.Is(err, fs.ErrNotExist) { + return fmt.Errorf("not a bundle template: expected to find a template schema file at %s", schemaFileName) + } + + tmpl.config, err = newConfig(ctx, readerFs, schemaFileName) + if err != nil { + return err + } + + // Read and assign config values from file + if tmpl.configPath != "" { + err = tmpl.config.assignValuesFromFile(tmpl.configPath) + if err != nil { + return err + } + } + + helpers := loadHelpers(ctx) + tmpl.renderer, err = newRenderer(ctx, tmpl.config.values, helpers, readerFs, templateDirName, libraryDirName) + if err != nil { + return err + } + + // Print welcome message + welcome := tmpl.config.schema.WelcomeMessage + if welcome != "" { + welcome, err = tmpl.renderer.executeTemplate(welcome) + if err != nil { + return err + } + cmdio.LogString(ctx, welcome) + } + + // Prompt user for any missing config values. Assign default values if + // terminal is not TTY + err = tmpl.config.promptOrAssignDefaultValues(tmpl.renderer) + if err != nil { + return err + } + return tmpl.config.validate() +} + +func (tmpl *defaultWriter) printSuccessMessage(ctx context.Context) error { + success := tmpl.config.schema.SuccessMessage + if success == "" { + cmdio.LogString(ctx, "✨ Successfully initialized template") + return nil + } + + success, err := tmpl.renderer.executeTemplate(success) + if err != nil { + return err + } + cmdio.LogString(ctx, success) + return nil +} + +func (tmpl *defaultWriter) Materialize(ctx context.Context, reader Reader) error { + err := tmpl.promptForInput(ctx, reader) + if err != nil { + return err + } + + // Walk the template file tree and compute in-memory representations of the + // output files. + err = tmpl.renderer.walk() + if err != nil { + return err + } + + // Flush the output files to disk. + err = tmpl.renderer.persistToDisk(ctx, tmpl.outputFiler) + if err != nil { + return err + } + + return tmpl.printSuccessMessage(ctx) +} + +func (tmpl *defaultWriter) LogTelemetry(ctx context.Context) error { + // TODO, only log the template name and uuid. + return nil +} + +type writerWithFullTelemetry struct { + defaultWriter +} + +func (tmpl *writerWithFullTelemetry) LogTelemetry(ctx context.Context) error { + // TODO, log template name, uuid and enum args as well. + return nil +} diff --git a/libs/template/writer_test.go b/libs/template/writer_test.go new file mode 100644 index 000000000..9d57966ee --- /dev/null +++ b/libs/template/writer_test.go @@ -0,0 +1,58 @@ +package template + +import ( + "context" + "runtime" + "testing" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go" + workspaceConfig "github.com/databricks/databricks-sdk-go/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDefaultWriterConfigure(t *testing.T) { + // Test on local file system. + w := &defaultWriter{} + err := w.Configure(context.Background(), "/foo/bar", "/out/abc") + assert.NoError(t, err) + + assert.Equal(t, "/foo/bar", w.configPath) + assert.IsType(t, &filer.LocalClient{}, w.outputFiler) +} + +func TestDefaultWriterConfigureOnDBR(t *testing.T) { + // This test is not valid on windows because a DBR image is always based on + // Linux. + if runtime.GOOS == "windows" { + t.Skip("Skipping test on Windows") + } + + ctx := dbr.MockRuntime(context.Background(), true) + ctx = root.SetWorkspaceClient(ctx, &databricks.WorkspaceClient{ + Config: &workspaceConfig.Config{Host: "https://myhost.com"}, + }) + w := &defaultWriter{} + err := w.Configure(ctx, "/foo/bar", "/Workspace/out/abc") + assert.NoError(t, err) + + assert.Equal(t, "/foo/bar", w.configPath) + assert.IsType(t, &filer.WorkspaceFilesExtensionsClient{}, w.outputFiler) +} + +func TestMaterializeForNonTemplateDirectory(t *testing.T) { + tmpDir1 := t.TempDir() + tmpDir2 := t.TempDir() + ctx := context.Background() + + w := &defaultWriter{} + err := w.Configure(ctx, "/foo/bar", tmpDir1) + require.NoError(t, err) + + // Try to materialize a non-template directory. + err = w.Materialize(ctx, &localReader{path: tmpDir2}) + assert.EqualError(t, err, "not a bundle template: expected to find a template schema file at databricks_template_schema.json") +} diff --git a/libs/testdiff/context.go b/libs/testdiff/context.go new file mode 100644 index 000000000..7b6f5ff88 --- /dev/null +++ b/libs/testdiff/context.go @@ -0,0 +1,34 @@ +package testdiff + +import ( + "context" +) + +type key int + +const ( + replacementsMapKey = key(1) +) + +func WithReplacementsMap(ctx context.Context) (context.Context, *ReplacementsContext) { + value := ctx.Value(replacementsMapKey) + if value != nil { + if existingMap, ok := value.(*ReplacementsContext); ok { + return ctx, existingMap + } + } + + newMap := &ReplacementsContext{} + ctx = context.WithValue(ctx, replacementsMapKey, newMap) + return ctx, newMap +} + +func GetReplacementsMap(ctx context.Context) *ReplacementsContext { + value := ctx.Value(replacementsMapKey) + if value != nil { + if existingMap, ok := value.(*ReplacementsContext); ok { + return existingMap + } + } + return nil +} diff --git a/libs/testdiff/context_test.go b/libs/testdiff/context_test.go new file mode 100644 index 000000000..5a0191009 --- /dev/null +++ b/libs/testdiff/context_test.go @@ -0,0 +1,30 @@ +package testdiff + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetReplacementsMap_Nil(t *testing.T) { + ctx := context.Background() + repls := GetReplacementsMap(ctx) + assert.Nil(t, repls) +} + +func TestGetReplacementsMap_NotNil(t *testing.T) { + ctx := context.Background() + ctx, _ = WithReplacementsMap(ctx) + repls := GetReplacementsMap(ctx) + assert.NotNil(t, repls) +} + +func TestWithReplacementsMap_UseExisting(t *testing.T) { + ctx := context.Background() + ctx, r1 := WithReplacementsMap(ctx) + ctx, r2 := WithReplacementsMap(ctx) + repls := GetReplacementsMap(ctx) + assert.Equal(t, r1, repls) + assert.Equal(t, r2, repls) +} diff --git a/libs/testdiff/golden.go b/libs/testdiff/golden.go index 02213c88a..c1c51b6c5 100644 --- a/libs/testdiff/golden.go +++ b/libs/testdiff/golden.go @@ -3,17 +3,11 @@ package testdiff import ( "context" "flag" - "fmt" "os" - "regexp" - "slices" "strings" "testing" "github.com/databricks/cli/internal/testutil" - "github.com/databricks/cli/libs/iamutil" - "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" ) @@ -71,12 +65,6 @@ func AssertOutputJQ(t testutil.TestingT, ctx context.Context, out, outTitle, exp } } -var ( - uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`) - numIdRegex = regexp.MustCompile(`[0-9]{3,}`) - privatePathRegex = regexp.MustCompile(`(/tmp|/private)(/.*)/([a-zA-Z0-9]+)`) -) - func ReplaceOutput(t testutil.TestingT, ctx context.Context, out string) string { t.Helper() out = NormalizeNewlines(out) @@ -84,138 +72,7 @@ func ReplaceOutput(t testutil.TestingT, ctx context.Context, out string) string if replacements == nil { t.Fatal("WithReplacementsMap was not called") } - out = replacements.Replace(out) - out = uuidRegex.ReplaceAllString(out, "") - out = numIdRegex.ReplaceAllString(out, "") - out = privatePathRegex.ReplaceAllString(out, "/tmp/.../$3") - - return out -} - -type key int - -const ( - replacementsMapKey = key(1) -) - -type Replacement struct { - Old string - New string -} - -type ReplacementsContext struct { - Repls []Replacement -} - -func (r *ReplacementsContext) Replace(s string) string { - // QQQ Should probably only replace whole words - for _, repl := range r.Repls { - s = strings.ReplaceAll(s, repl.Old, repl.New) - } - return s -} - -func (r *ReplacementsContext) Set(old, new string) { - if old == "" || new == "" { - return - } - r.Repls = append(r.Repls, Replacement{Old: old, New: new}) -} - -func WithReplacementsMap(ctx context.Context) (context.Context, *ReplacementsContext) { - value := ctx.Value(replacementsMapKey) - if value != nil { - if existingMap, ok := value.(*ReplacementsContext); ok { - return ctx, existingMap - } - } - - newMap := &ReplacementsContext{} - ctx = context.WithValue(ctx, replacementsMapKey, newMap) - return ctx, newMap -} - -func GetReplacementsMap(ctx context.Context) *ReplacementsContext { - value := ctx.Value(replacementsMapKey) - if value != nil { - if existingMap, ok := value.(*ReplacementsContext); ok { - return existingMap - } - } - return nil -} - -func PrepareReplacements(t testutil.TestingT, r *ReplacementsContext, w *databricks.WorkspaceClient) { - t.Helper() - // in some clouds (gcp) w.Config.Host includes "https://" prefix in others it's really just a host (azure) - host := strings.TrimPrefix(strings.TrimPrefix(w.Config.Host, "http://"), "https://") - r.Set(host, "$DATABRICKS_HOST") - r.Set(w.Config.ClusterID, "$DATABRICKS_CLUSTER_ID") - r.Set(w.Config.WarehouseID, "$DATABRICKS_WAREHOUSE_ID") - r.Set(w.Config.ServerlessComputeID, "$DATABRICKS_SERVERLESS_COMPUTE_ID") - r.Set(w.Config.MetadataServiceURL, "$DATABRICKS_METADATA_SERVICE_URL") - r.Set(w.Config.AccountID, "$DATABRICKS_ACCOUNT_ID") - r.Set(w.Config.Token, "$DATABRICKS_TOKEN") - r.Set(w.Config.Username, "$DATABRICKS_USERNAME") - r.Set(w.Config.Password, "$DATABRICKS_PASSWORD") - r.Set(w.Config.Profile, "$DATABRICKS_CONFIG_PROFILE") - r.Set(w.Config.ConfigFile, "$DATABRICKS_CONFIG_FILE") - r.Set(w.Config.GoogleServiceAccount, "$DATABRICKS_GOOGLE_SERVICE_ACCOUNT") - r.Set(w.Config.GoogleCredentials, "$GOOGLE_CREDENTIALS") - r.Set(w.Config.AzureResourceID, "$DATABRICKS_AZURE_RESOURCE_ID") - r.Set(w.Config.AzureClientSecret, "$ARM_CLIENT_SECRET") - // r.Set(w.Config.AzureClientID, "$ARM_CLIENT_ID") - r.Set(w.Config.AzureClientID, "$USERNAME") - r.Set(w.Config.AzureTenantID, "$ARM_TENANT_ID") - r.Set(w.Config.ActionsIDTokenRequestURL, "$ACTIONS_ID_TOKEN_REQUEST_URL") - r.Set(w.Config.ActionsIDTokenRequestToken, "$ACTIONS_ID_TOKEN_REQUEST_TOKEN") - r.Set(w.Config.AzureEnvironment, "$ARM_ENVIRONMENT") - r.Set(w.Config.ClientID, "$DATABRICKS_CLIENT_ID") - r.Set(w.Config.ClientSecret, "$DATABRICKS_CLIENT_SECRET") - r.Set(w.Config.DatabricksCliPath, "$DATABRICKS_CLI_PATH") - // This is set to words like "path" that happen too frequently - // r.Set(w.Config.AuthType, "$DATABRICKS_AUTH_TYPE") -} - -func PrepareReplacementsUser(t testutil.TestingT, r *ReplacementsContext, u iam.User) { - t.Helper() - // There could be exact matches or overlap between different name fields, so sort them by length - // to ensure we match the largest one first and map them all to the same token - names := []string{ - u.DisplayName, - u.UserName, - iamutil.GetShortUserName(&u), - u.Name.FamilyName, - u.Name.GivenName, - } - if u.Name != nil { - names = append(names, u.Name.FamilyName) - names = append(names, u.Name.GivenName) - } - for _, val := range u.Emails { - names = append(names, val.Value) - } - stableSortReverseLength(names) - - for _, name := range names { - r.Set(name, "$USERNAME") - } - - for ind, val := range u.Groups { - r.Set(val.Value, fmt.Sprintf("$USER.Groups[%d]", ind)) - } - - r.Set(u.Id, "$USER.Id") - - for ind, val := range u.Roles { - r.Set(val.Value, fmt.Sprintf("$USER.Roles[%d]", ind)) - } -} - -func stableSortReverseLength(strs []string) { - slices.SortStableFunc(strs, func(a, b string) int { - return len(b) - len(a) - }) + return replacements.Replace(out) } func NormalizeNewlines(input string) string { diff --git a/libs/testdiff/golden_test.go b/libs/testdiff/golden_test.go deleted file mode 100644 index 0fc32be21..000000000 --- a/libs/testdiff/golden_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package testdiff - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestSort(t *testing.T) { - input := []string{"a", "bc", "cd"} - stableSortReverseLength(input) - assert.Equal(t, []string{"bc", "cd", "a"}, input) -} diff --git a/libs/testdiff/replacement.go b/libs/testdiff/replacement.go new file mode 100644 index 000000000..b512374a3 --- /dev/null +++ b/libs/testdiff/replacement.go @@ -0,0 +1,213 @@ +package testdiff + +import ( + "encoding/json" + "fmt" + "path/filepath" + "regexp" + "runtime" + "slices" + "strings" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/iamutil" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/iam" +) + +const ( + testerName = "$USERNAME" +) + +var ( + uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`) + numIdRegex = regexp.MustCompile(`[0-9]{3,}`) + privatePathRegex = regexp.MustCompile(`(/tmp|/private)(/.*)/([a-zA-Z0-9]+)`) +) + +type Replacement struct { + Old *regexp.Regexp + New string +} + +type ReplacementsContext struct { + Repls []Replacement +} + +func (r *ReplacementsContext) Clone() ReplacementsContext { + return ReplacementsContext{Repls: slices.Clone(r.Repls)} +} + +func (r *ReplacementsContext) Replace(s string) string { + // QQQ Should probably only replace whole words + for _, repl := range r.Repls { + s = repl.Old.ReplaceAllString(s, repl.New) + } + return s +} + +func (r *ReplacementsContext) append(pattern *regexp.Regexp, replacement string) { + r.Repls = append(r.Repls, Replacement{ + Old: pattern, + New: replacement, + }) +} + +func (r *ReplacementsContext) appendLiteral(old, new string) { + r.append( + // Transform the input strings such that they can be used as literal strings in regular expressions. + regexp.MustCompile(regexp.QuoteMeta(old)), + // Transform the replacement string such that `$` is interpreted as a literal dollar sign. + // For more information about how the replacement string is used, see [regexp.Regexp.Expand]. + strings.ReplaceAll(new, `$`, `$$`), + ) +} + +func (r *ReplacementsContext) Set(old, new string) { + if old == "" || new == "" { + return + } + + // Always include both verbatim and json version of replacement. + // This helps when the string in question contains \ or other chars that need to be quoted. + // In that case we cannot rely that json(old) == '"{old}"' and need to add it explicitly. + + encodedNew, err := json.Marshal(new) + if err == nil { + encodedOld, err := json.Marshal(old) + if err == nil { + encodedStrNew := trimQuotes(string(encodedNew)) + encodedStrOld := trimQuotes(string(encodedOld)) + if encodedStrNew != new || encodedStrOld != old { + r.appendLiteral(encodedStrOld, encodedStrNew) + } + } + } + + r.appendLiteral(old, new) +} + +func trimQuotes(s string) string { + if len(s) > 0 && s[0] == '"' { + s = s[1:] + } + if len(s) > 0 && s[len(s)-1] == '"' { + s = s[:len(s)-1] + } + return s +} + +func (r *ReplacementsContext) SetPath(old, new string) { + if old != "" && old != "." { + // Converts C:\Users\DENIS~1.BIL -> C:\Users\denis.bilenko + oldEvalled, err1 := filepath.EvalSymlinks(old) + if err1 == nil && oldEvalled != old { + r.SetPathNoEval(oldEvalled, new) + } + } + + r.SetPathNoEval(old, new) +} + +func (r *ReplacementsContext) SetPathNoEval(old, new string) { + r.Set(old, new) + + if runtime.GOOS != "windows" { + return + } + + // Support both forward and backward slashes + m1 := strings.ReplaceAll(old, "\\", "/") + if m1 != old { + r.Set(m1, new) + } + + m2 := strings.ReplaceAll(old, "/", "\\") + if m2 != old && m2 != m1 { + r.Set(m2, new) + } +} + +func (r *ReplacementsContext) SetPathWithParents(old, new string) { + r.SetPath(old, new) + r.SetPath(filepath.Dir(old), new+"_PARENT") + r.SetPath(filepath.Dir(filepath.Dir(old)), new+"_GPARENT") +} + +func PrepareReplacementsWorkspaceClient(t testutil.TestingT, r *ReplacementsContext, w *databricks.WorkspaceClient) { + t.Helper() + // in some clouds (gcp) w.Config.Host includes "https://" prefix in others it's really just a host (azure) + host := strings.TrimPrefix(strings.TrimPrefix(w.Config.Host, "http://"), "https://") + r.Set("https://"+host, "$DATABRICKS_URL") + r.Set("http://"+host, "$DATABRICKS_URL") + r.Set(host, "$DATABRICKS_HOST") + r.Set(w.Config.ClusterID, "$DATABRICKS_CLUSTER_ID") + r.Set(w.Config.WarehouseID, "$DATABRICKS_WAREHOUSE_ID") + r.Set(w.Config.ServerlessComputeID, "$DATABRICKS_SERVERLESS_COMPUTE_ID") + r.Set(w.Config.MetadataServiceURL, "$DATABRICKS_METADATA_SERVICE_URL") + r.Set(w.Config.AccountID, "$DATABRICKS_ACCOUNT_ID") + r.Set(w.Config.Token, "$DATABRICKS_TOKEN") + r.Set(w.Config.Username, "$DATABRICKS_USERNAME") + r.Set(w.Config.Password, "$DATABRICKS_PASSWORD") + r.SetPath(w.Config.Profile, "$DATABRICKS_CONFIG_PROFILE") + r.Set(w.Config.ConfigFile, "$DATABRICKS_CONFIG_FILE") + r.Set(w.Config.GoogleServiceAccount, "$DATABRICKS_GOOGLE_SERVICE_ACCOUNT") + r.Set(w.Config.GoogleCredentials, "$GOOGLE_CREDENTIALS") + r.Set(w.Config.AzureResourceID, "$DATABRICKS_AZURE_RESOURCE_ID") + r.Set(w.Config.AzureClientSecret, "$ARM_CLIENT_SECRET") + // r.Set(w.Config.AzureClientID, "$ARM_CLIENT_ID") + r.Set(w.Config.AzureClientID, testerName) + r.Set(w.Config.AzureTenantID, "$ARM_TENANT_ID") + r.Set(w.Config.ActionsIDTokenRequestURL, "$ACTIONS_ID_TOKEN_REQUEST_URL") + r.Set(w.Config.ActionsIDTokenRequestToken, "$ACTIONS_ID_TOKEN_REQUEST_TOKEN") + r.Set(w.Config.AzureEnvironment, "$ARM_ENVIRONMENT") + r.Set(w.Config.ClientID, "$DATABRICKS_CLIENT_ID") + r.Set(w.Config.ClientSecret, "$DATABRICKS_CLIENT_SECRET") + r.SetPath(w.Config.DatabricksCliPath, "$DATABRICKS_CLI_PATH") + // This is set to words like "path" that happen too frequently + // r.Set(w.Config.AuthType, "$DATABRICKS_AUTH_TYPE") +} + +func PrepareReplacementsUser(t testutil.TestingT, r *ReplacementsContext, u iam.User) { + t.Helper() + // There could be exact matches or overlap between different name fields, so sort them by length + // to ensure we match the largest one first and map them all to the same token + + r.Set(u.UserName, testerName) + r.Set(u.DisplayName, testerName) + if u.Name != nil { + r.Set(u.Name.FamilyName, testerName) + r.Set(u.Name.GivenName, testerName) + } + + for _, val := range u.Emails { + r.Set(val.Value, testerName) + } + + r.Set(iamutil.GetShortUserName(&u), testerName) + + for ind, val := range u.Groups { + r.Set(val.Value, fmt.Sprintf("$USER.Groups[%d]", ind)) + } + + r.Set(u.Id, "$USER.Id") + + for ind, val := range u.Roles { + r.Set(val.Value, fmt.Sprintf("$USER.Roles[%d]", ind)) + } +} + +func PrepareReplacementsUUID(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(uuidRegex, "") +} + +func PrepareReplacementsNumber(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(numIdRegex, "") +} + +func PrepareReplacementsTemporaryDirectory(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(privatePathRegex, "/tmp/.../$3") +} diff --git a/libs/testdiff/replacement_test.go b/libs/testdiff/replacement_test.go new file mode 100644 index 000000000..de247c03e --- /dev/null +++ b/libs/testdiff/replacement_test.go @@ -0,0 +1,46 @@ +package testdiff + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestReplacement_Literal(t *testing.T) { + var repls ReplacementsContext + + repls.Set(`foobar`, `[replacement]`) + assert.Equal(t, `[replacement]`, repls.Replace(`foobar`)) +} + +func TestReplacement_Encoded(t *testing.T) { + var repls ReplacementsContext + + repls.Set(`foo"bar`, `[replacement]`) + assert.Equal(t, `"[replacement]"`, repls.Replace(`"foo\"bar"`)) +} + +func TestReplacement_UUID(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsUUID(t, &repls) + + assert.Equal(t, "", repls.Replace("123e4567-e89b-12d3-a456-426614174000")) +} + +func TestReplacement_Number(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsNumber(t, &repls) + + assert.Equal(t, "12", repls.Replace("12")) + assert.Equal(t, "", repls.Replace("123")) +} + +func TestReplacement_TemporaryDirectory(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsTemporaryDirectory(t, &repls) + + assert.Equal(t, "/tmp/.../tail", repls.Replace("/tmp/foo/bar/qux/tail")) +} diff --git a/libs/testdiff/testdiff.go b/libs/testdiff/testdiff.go index fef1d5ae2..f65adf7f7 100644 --- a/libs/testdiff/testdiff.go +++ b/libs/testdiff/testdiff.go @@ -17,18 +17,20 @@ func UnifiedDiff(filename1, filename2, s1, s2 string) string { return fmt.Sprint(gotextdiff.ToUnified(filename1, filename2, s1, edits)) } -func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out string) { +func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out string) bool { t.Helper() if len(out) < 1000 && len(expected) < 1000 { // This shows full strings + diff which could be useful when debugging newlines - assert.Equal(t, expected, out, "%s vs %s", filename1, filename2) + return assert.Equal(t, expected, out, "%s vs %s", filename1, filename2) } else { // only show diff for large texts diff := UnifiedDiff(filename1, filename2, expected, out) if diff != "" { - t.Errorf("Diff:\n" + diff) + t.Error("Diff:\n" + diff) + return false } } + return true } func AssertEqualJQ(t testutil.TestingT, expectedName, outName, expected, out string, ignorePaths []string) {