diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index 8622b29ca..dfe78790a 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -a6a317df8327c9b1e5cb59a03a42ffa2aabeef6d \ No newline at end of file +779817ed8d63031f5ea761fbd25ee84f38feec0d \ No newline at end of file diff --git a/.codegen/service.go.tmpl b/.codegen/service.go.tmpl index ee2c7b0fd..0c9fa089a 100644 --- a/.codegen/service.go.tmpl +++ b/.codegen/service.go.tmpl @@ -140,9 +140,9 @@ func new{{.PascalName}}() *cobra.Command { {{- end}} {{$method := .}} {{ if not .IsJsonOnly }} - {{range $request.Fields -}} + {{range .AllFields -}} {{- if not .Required -}} - {{if .Entity.IsObject }}// TODO: complex arg: {{.Name}} + {{if .Entity.IsObject}}{{if not (eq . $method.RequestBodyField) }}// TODO: complex arg: {{.Name}}{{end}} {{else if .Entity.IsAny }}// TODO: any: {{.Name}} {{else if .Entity.ArrayValue }}// TODO: array: {{.Name}} {{else if .Entity.MapValue }}// TODO: map via StringToStringVar: {{.Name}} diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index d998224a4..d3363b7e3 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -60,6 +60,12 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@887a942a15af3a7626099df99e897a18d9e5ab3a # v5.1.0 + - name: Run ruff + uses: astral-sh/ruff-action@31a518504640beb4897d0b9f9e50a2a9196e75ba # v3.0.1 + with: + version: "0.9.1" + args: "format --check" + - name: Set go env run: | echo "GOPATH=$(go env GOPATH)" >> $GITHUB_ENV diff --git a/.gitignore b/.gitignore index edd1409ae..2060b6bac 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ dist/ *.log coverage.txt +coverage-acceptance.txt __pycache__ *.pyc @@ -31,3 +32,4 @@ __pycache__ .vscode/tasks.json .databricks +.ruff_cache diff --git a/.golangci.yaml b/.golangci.yaml index 07a6afdc5..8a83135ee 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -15,12 +15,20 @@ linters: - intrange - mirror - perfsprint + - unconvert linters-settings: govet: enable-all: true disable: - fieldalignment - shadow + settings: + printf: + funcs: + - (github.com/databricks/cli/internal/testutil.TestingT).Infof + - (github.com/databricks/cli/internal/testutil.TestingT).Errorf + - (github.com/databricks/cli/internal/testutil.TestingT).Fatalf + - (github.com/databricks/cli/internal/testutil.TestingT).Skipf gofmt: rewrite-rules: - pattern: 'a[b:len(a)]' @@ -41,6 +49,8 @@ linters-settings: disable: # good check, but we have too many assert.(No)?Errorf? so excluding for now - require-error + copyloopvar: + check-alias: true issues: exclude-dirs-use-default: false # recommended by docs https://golangci-lint.run/usage/false-positives/ max-issues-per-linter: 1000 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b59fa540..53392e5db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,49 @@ # Version changelog +## [Release] Release v0.239.0 + +### New feature announcement + +#### Databricks Apps support + +You can now manage Databricks Apps using DABs by defining an `app` resource in your bundle configuration. +For more information see Databricks documentation https://docs.databricks.com/en/dev-tools/bundles/resources.html#app + +#### Referencing complex variables in complex variables + +You can now reference complex variables within other complex variables. +For more details see https://github.com/databricks/cli/pull/2157 + +CLI: + * Filter out system clusters in cluster picker ([#2131](https://github.com/databricks/cli/pull/2131)). + * Add command line flags for fields that are not in the API request body ([#2155](https://github.com/databricks/cli/pull/2155)). + +Bundles: + * Added support for Databricks Apps in DABs ([#1928](https://github.com/databricks/cli/pull/1928)). + * Allow artifact path to be located outside the sync root ([#2128](https://github.com/databricks/cli/pull/2128)). + * Retry app deployment if there is an active deployment in progress ([#2153](https://github.com/databricks/cli/pull/2153)). + * Resolve variables in a loop ([#2164](https://github.com/databricks/cli/pull/2164)). + * Improve resolution of complex variables within complex variables ([#2157](https://github.com/databricks/cli/pull/2157)). + * Added output message to warn about slower deployments with apps ([#2161](https://github.com/databricks/cli/pull/2161)). + * Patch references to UC schemas to capture dependencies automatically ([#1989](https://github.com/databricks/cli/pull/1989)). + * Format default-python template ([#2110](https://github.com/databricks/cli/pull/2110)). + * Encourage the use of root_path in production to ensure single deployment ([#1712](https://github.com/databricks/cli/pull/1712)). + * Log warnings to stderr for "bundle validate -o json" ([#2109](https://github.com/databricks/cli/pull/2109)). + +API Changes: + * Changed `databricks account federation-policy update` command with new required argument order. + * Changed `databricks account service-principal-federation-policy update` command with new required argument order. + +OpenAPI commit 779817ed8d63031f5ea761fbd25ee84f38feec0d (2025-01-08) +Dependency updates: + * Upgrade TF provider to 1.63.0 ([#2162](https://github.com/databricks/cli/pull/2162)). + * Bump golangci-lint version to v1.63.4 from v1.63.1 ([#2114](https://github.com/databricks/cli/pull/2114)). + * Bump astral-sh/setup-uv from 4 to 5 ([#2116](https://github.com/databricks/cli/pull/2116)). + * Bump golang.org/x/oauth2 from 0.24.0 to 0.25.0 ([#2080](https://github.com/databricks/cli/pull/2080)). + * Bump github.com/hashicorp/hc-install from 0.9.0 to 0.9.1 ([#2079](https://github.com/databricks/cli/pull/2079)). + * Bump golang.org/x/term from 0.27.0 to 0.28.0 ([#2078](https://github.com/databricks/cli/pull/2078)). + * Bump github.com/databricks/databricks-sdk-go from 0.54.0 to 0.55.0 ([#2126](https://github.com/databricks/cli/pull/2126)). + ## [Release] Release v0.238.0 Bundles: diff --git a/Makefile b/Makefile index 6cfc458aa..4d1e07535 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -default: build +default: vendor fmt lint PACKAGES=./acceptance/... ./libs/... ./internal/... ./cmd/... ./bundle/... . @@ -14,6 +14,7 @@ lintcheck: # formatting/goimports will not be applied by 'make lint'. However, it will be applied by 'make fmt'. # If you need to ensure that formatting & imports are always fixed, do "make fmt lint" fmt: + ruff format -q golangci-lint run --enable-only="gofmt,gofumpt,goimports" --fix ./... test: @@ -25,6 +26,17 @@ cover: showcover: go tool cover -html=coverage.txt +acc-cover: + rm -fr ./acceptance/build/cover/ + CLI_GOCOVERDIR=build/cover go test ./acceptance + rm -fr ./acceptance/build/cover-merged/ + mkdir -p acceptance/build/cover-merged/ + go tool covdata merge -i $$(printf '%s,' acceptance/build/cover/* | sed 's/,$$//') -o acceptance/build/cover-merged/ + go tool covdata textfmt -i acceptance/build/cover-merged -o coverage-acceptance.txt + +acc-showcover: + go tool cover -html=coverage-acceptance.txt + build: vendor go build -mod vendor @@ -48,4 +60,4 @@ integration: integration-short: $(INTEGRATION) -short -.PHONY: lint lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short docs +.PHONY: lint lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short acc-cover acc-showcover docs diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index b9fb219dc..5f1181313 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -1,12 +1,14 @@ package acceptance_test import ( + "context" "errors" "fmt" "io" "os" "os/exec" "path/filepath" + "regexp" "runtime" "slices" "sort" @@ -17,6 +19,7 @@ import ( "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/testdiff" + "github.com/databricks/databricks-sdk-go" "github.com/stretchr/testify/require" ) @@ -38,32 +41,65 @@ func TestAccept(t *testing.T) { cwd, err := os.Getwd() require.NoError(t, err) - execPath := BuildCLI(t, cwd) + coverDir := os.Getenv("CLI_GOCOVERDIR") + + if coverDir != "" { + require.NoError(t, os.MkdirAll(coverDir, os.ModePerm)) + coverDir, err = filepath.Abs(coverDir) + require.NoError(t, err) + t.Logf("Writing coverage to %s", coverDir) + } + + execPath := BuildCLI(t, cwd, coverDir) // $CLI is what test scripts are using t.Setenv("CLI", execPath) // Make helper scripts available t.Setenv("PATH", fmt.Sprintf("%s%c%s", filepath.Join(cwd, "bin"), os.PathListSeparator, os.Getenv("PATH"))) - server := StartServer(t) - AddHandlers(server) - // Redirect API access to local server: - t.Setenv("DATABRICKS_HOST", fmt.Sprintf("http://127.0.0.1:%d", server.Port)) - t.Setenv("DATABRICKS_TOKEN", "dapi1234") - - homeDir := t.TempDir() - // Do not read user's ~/.databrickscfg - t.Setenv(env.HomeEnvVar(), homeDir) - repls := testdiff.ReplacementsContext{} repls.Set(execPath, "$CLI") + tempHomeDir := t.TempDir() + repls.Set(tempHomeDir, "$TMPHOME") + t.Logf("$TMPHOME=%v", tempHomeDir) + + // Prevent CLI from downloading terraform in each test: + t.Setenv("DATABRICKS_TF_EXEC_PATH", tempHomeDir) + + ctx := context.Background() + cloudEnv := os.Getenv("CLOUD_ENV") + + if cloudEnv == "" { + server := StartServer(t) + AddHandlers(server) + // Redirect API access to local server: + t.Setenv("DATABRICKS_HOST", server.URL) + t.Setenv("DATABRICKS_TOKEN", "dapi1234") + + homeDir := t.TempDir() + // Do not read user's ~/.databrickscfg + t.Setenv(env.HomeEnvVar(), homeDir) + } + + workspaceClient, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + user, err := workspaceClient.CurrentUser.Me(ctx) + require.NoError(t, err) + require.NotNil(t, user) + testdiff.PrepareReplacementsUser(t, &repls, *user) + testdiff.PrepareReplacementsWorkspaceClient(t, &repls, workspaceClient) + testdiff.PrepareReplacementsUUID(t, &repls) + testDirs := getTests(t) require.NotEmpty(t, testDirs) + for _, dir := range testDirs { - t.Run(dir, func(t *testing.T) { + testName := strings.ReplaceAll(dir, "\\", "/") + t.Run(testName, func(t *testing.T) { t.Parallel() - runTest(t, dir, repls) + runTest(t, dir, coverDir, repls) }) } } @@ -88,7 +124,7 @@ func getTests(t *testing.T) []string { return testDirs } -func runTest(t *testing.T, dir string, repls testdiff.ReplacementsContext) { +func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsContext) { var tmpDir string var err error if KeepTmp { @@ -111,70 +147,95 @@ func runTest(t *testing.T, dir string, repls testdiff.ReplacementsContext) { args := []string{"bash", "-euo", "pipefail", EntryPointScript} cmd := exec.Command(args[0], args[1:]...) + if coverDir != "" { + // Creating individual coverage directory for each test, because writing to the same one + // results in sporadic failures like this one (only if tests are running in parallel): + // +error: coverage meta-data emit failed: writing ... rename .../tmp.covmeta.b3f... .../covmeta.b3f2c...: no such file or directory + coverDir = filepath.Join(coverDir, strings.ReplaceAll(dir, string(os.PathSeparator), "--")) + err := os.MkdirAll(coverDir, os.ModePerm) + require.NoError(t, err) + cmd.Env = append(os.Environ(), "GOCOVERDIR="+coverDir) + } + + // Write combined output to a file + out, err := os.Create(filepath.Join(tmpDir, "output.txt")) + require.NoError(t, err) + cmd.Stdout = out + cmd.Stderr = out cmd.Dir = tmpDir - outB, err := cmd.CombinedOutput() + err = cmd.Run() - out := formatOutput(string(outB), err) - out = repls.Replace(out) - doComparison(t, filepath.Join(dir, "output.txt"), "script output", out) + // Include exit code in output (if non-zero) + formatOutput(out, err) + require.NoError(t, out.Close()) - for key := range outputs { - if key == "output.txt" { - // handled above - continue - } - pathNew := filepath.Join(tmpDir, key) - newValBytes, err := os.ReadFile(pathNew) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - t.Errorf("%s: expected to find this file but could not (%s)", key, tmpDir) - } else { - t.Errorf("%s: could not read: %s", key, err) - } - continue - } - pathExpected := filepath.Join(dir, key) - newVal := repls.Replace(string(newValBytes)) - doComparison(t, pathExpected, pathNew, newVal) + // Compare expected outputs + for relPath := range outputs { + doComparison(t, repls, dir, tmpDir, relPath) } // Make sure there are not unaccounted for new files - files, err := os.ReadDir(tmpDir) + files, err := ListDir(t, tmpDir) require.NoError(t, err) - - for _, f := range files { - name := f.Name() - if _, ok := inputs[name]; ok { + for _, relPath := range files { + if _, ok := inputs[relPath]; ok { continue } - if _, ok := outputs[name]; ok { + if _, ok := outputs[relPath]; ok { continue } - t.Errorf("Unexpected output: %s", f) - if strings.HasPrefix(name, "out") { + if strings.HasPrefix(relPath, "out") { // We have a new file starting with "out" // Show the contents & support overwrite mode for it: - pathNew := filepath.Join(tmpDir, name) - newVal := testutil.ReadFile(t, pathNew) - newVal = repls.Replace(newVal) - doComparison(t, filepath.Join(dir, name), filepath.Join(tmpDir, name), newVal) + doComparison(t, repls, dir, tmpDir, relPath) } } } -func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) { - valueNew = testdiff.NormalizeNewlines(valueNew) - valueExpected := string(readIfExists(t, pathExpected)) - valueExpected = testdiff.NormalizeNewlines(valueExpected) - testdiff.AssertEqualTexts(t, pathExpected, pathNew, valueExpected, valueNew) - if testdiff.OverwriteMode { - if valueNew != "" { - t.Logf("Overwriting: %s", pathExpected) - testutil.WriteFile(t, pathExpected, valueNew) - } else { - t.Logf("Removing: %s", pathExpected) - _ = os.Remove(pathExpected) +func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirNew, relPath string) { + pathRef := filepath.Join(dirRef, relPath) + pathNew := filepath.Join(dirNew, relPath) + bufRef, okRef := readIfExists(t, pathRef) + bufNew, okNew := readIfExists(t, pathNew) + if !okRef && !okNew { + t.Errorf("Both files are missing: %s, %s", pathRef, pathNew) + return + } + + valueRef := testdiff.NormalizeNewlines(string(bufRef)) + valueNew := testdiff.NormalizeNewlines(string(bufNew)) + + // Apply replacements to the new value only. + // The reference value is stored after applying replacements. + valueNew = repls.Replace(valueNew) + + // The test did not produce an expected output file. + if okRef && !okNew { + t.Errorf("Missing output file: %s", relPath) + testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if testdiff.OverwriteMode { + t.Logf("Removing output file: %s", relPath) + require.NoError(t, os.Remove(pathRef)) } + return + } + + // The test produced an unexpected output file. + if !okRef && okNew { + t.Errorf("Unexpected output file: %s", relPath) + testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if testdiff.OverwriteMode { + t.Logf("Writing output file: %s", relPath) + testutil.WriteFile(t, pathRef, valueNew) + } + return + } + + // Compare the reference and new values. + equal := testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if !equal && testdiff.OverwriteMode { + t.Logf("Overwriting existing output file: %s", relPath) + testutil.WriteFile(t, pathRef, valueNew) } } @@ -182,17 +243,22 @@ func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) { // Note, cleanups are not executed if main script fails; that's not a huge issue, since it runs it temp dir. func readMergedScriptContents(t *testing.T, dir string) string { scriptContents := testutil.ReadFile(t, filepath.Join(dir, EntryPointScript)) + + // Wrap script contents in a subshell such that changing the working + // directory only affects the main script and not cleanup. + scriptContents = "(\n" + scriptContents + ")\n" + prepares := []string{} cleanups := []string{} for { - x := readIfExists(t, filepath.Join(dir, CleanupScript)) - if len(x) > 0 { + x, ok := readIfExists(t, filepath.Join(dir, CleanupScript)) + if ok { cleanups = append(cleanups, string(x)) } - x = readIfExists(t, filepath.Join(dir, PrepareScript)) - if len(x) > 0 { + x, ok = readIfExists(t, filepath.Join(dir, PrepareScript)) + if ok { prepares = append(prepares, string(x)) } @@ -210,14 +276,30 @@ func readMergedScriptContents(t *testing.T, dir string) string { return strings.Join(prepares, "\n") } -func BuildCLI(t *testing.T, cwd string) string { +func BuildCLI(t *testing.T, cwd, coverDir string) string { execPath := filepath.Join(cwd, "build", "databricks") if runtime.GOOS == "windows" { execPath += ".exe" } start := time.Now() - args := []string{"go", "build", "-mod", "vendor", "-o", execPath} + args := []string{ + "go", "build", + "-mod", "vendor", + "-o", execPath, + } + + if coverDir != "" { + args = append(args, "-cover") + } + + if runtime.GOOS == "windows" { + // Get this error on my local Windows: + // error obtaining VCS status: exit status 128 + // Use -buildvcs=false to disable VCS stamping. + args = append(args, "-buildvcs=false") + } + cmd := exec.Command(args[0], args[1:]...) cmd.Dir = ".." out, err := cmd.CombinedOutput() @@ -252,29 +334,28 @@ func copyFile(src, dst string) error { return err } -func formatOutput(out string, err error) string { +func formatOutput(w io.Writer, err error) { if err == nil { - return out + return } if exiterr, ok := err.(*exec.ExitError); ok { exitCode := exiterr.ExitCode() - out += fmt.Sprintf("\nExit code: %d\n", exitCode) + fmt.Fprintf(w, "\nExit code: %d\n", exitCode) } else { - out += fmt.Sprintf("\nError: %s\n", err) + fmt.Fprintf(w, "\nError: %s\n", err) } - return out } -func readIfExists(t *testing.T, path string) []byte { +func readIfExists(t *testing.T, path string) ([]byte, bool) { data, err := os.ReadFile(path) if err == nil { - return data + return data, true } if !errors.Is(err, os.ErrNotExist) { t.Fatalf("%s: %s", path, err) } - return []byte{} + return []byte{}, false } func CopyDir(src, dst string, inputs, outputs map[string]bool) error { @@ -289,8 +370,10 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error { return err } - if strings.HasPrefix(name, "out") { - outputs[relPath] = true + if strings.HasPrefix(relPath, "out") { + if !info.IsDir() { + outputs[relPath] = true + } return nil } else { inputs[relPath] = true @@ -309,3 +392,47 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error { return copyFile(path, destPath) }) } + +func ListDir(t *testing.T, src string) ([]string, error) { + // exclude folders in .gitignore from comparison + ignored := []string{ + "\\.ruff_cache", + "\\.venv", + ".*\\.egg-info", + "__pycache__", + // depends on uv version + "uv.lock", + } + + var files []string + err := filepath.Walk(src, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + for _, ignoredFolder := range ignored { + if matched, _ := regexp.MatchString(ignoredFolder, info.Name()); matched { + return filepath.SkipDir + } + } + + return nil + } else { + for _, ignoredFolder := range ignored { + if matched, _ := regexp.MatchString(ignoredFolder, info.Name()); matched { + return nil + } + } + } + + relPath, err := filepath.Rel(src, path) + if err != nil { + return err + } + + files = append(files, relPath) + return nil + }) + return files, err +} diff --git a/acceptance/bin/sort_blocks.py b/acceptance/bin/sort_blocks.py index f50c6f50f..d558f252a 100755 --- a/acceptance/bin/sort_blocks.py +++ b/acceptance/bin/sort_blocks.py @@ -4,6 +4,7 @@ Helper to sort blocks in text file. A block is a set of lines separated from oth This is to workaround non-determinism in the output. """ + import sys blocks = [] @@ -11,10 +12,10 @@ blocks = [] for line in sys.stdin: if not line.strip(): if blocks and blocks[-1]: - blocks.append('') + blocks.append("") continue if not blocks: - blocks.append('') + blocks.append("") blocks[-1] += line blocks.sort() diff --git a/acceptance/bundle/help/bundle-deploy/output.txt b/acceptance/bundle/help/bundle-deploy/output.txt new file mode 100644 index 000000000..13c903f3e --- /dev/null +++ b/acceptance/bundle/help/bundle-deploy/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle deploy --help +Deploy bundle + +Usage: + databricks bundle deploy [flags] + +Flags: + --auto-approve Skip interactive approvals that might be required for deployment. + -c, --cluster-id string Override cluster in the deployment with the given cluster ID. + --fail-on-active-runs Fail if there are running jobs or pipelines in the deployment. + --force Force-override Git branch validation. + --force-lock Force acquisition of deployment lock. + -h, --help help for deploy + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-deploy/script b/acceptance/bundle/help/bundle-deploy/script new file mode 100644 index 000000000..6375cfea3 --- /dev/null +++ b/acceptance/bundle/help/bundle-deploy/script @@ -0,0 +1 @@ +trace $CLI bundle deploy --help diff --git a/acceptance/bundle/help/bundle-deployment/output.txt b/acceptance/bundle/help/bundle-deployment/output.txt new file mode 100644 index 000000000..ddf5b3305 --- /dev/null +++ b/acceptance/bundle/help/bundle-deployment/output.txt @@ -0,0 +1,22 @@ + +>>> $CLI bundle deployment --help +Deployment related commands + +Usage: + databricks bundle deployment [command] + +Available Commands: + bind Bind bundle-defined resources to existing resources + unbind Unbind bundle-defined resources from its managed remote resource + +Flags: + -h, --help help for deployment + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Use "databricks bundle deployment [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle-deployment/script b/acceptance/bundle/help/bundle-deployment/script new file mode 100644 index 000000000..ef93f7dc2 --- /dev/null +++ b/acceptance/bundle/help/bundle-deployment/script @@ -0,0 +1 @@ +trace $CLI bundle deployment --help diff --git a/acceptance/bundle/help/bundle-destroy/output.txt b/acceptance/bundle/help/bundle-destroy/output.txt new file mode 100644 index 000000000..d70164301 --- /dev/null +++ b/acceptance/bundle/help/bundle-destroy/output.txt @@ -0,0 +1,18 @@ + +>>> $CLI bundle destroy --help +Destroy deployed bundle resources + +Usage: + databricks bundle destroy [flags] + +Flags: + --auto-approve Skip interactive approvals for deleting resources and files + --force-lock Force acquisition of deployment lock. + -h, --help help for destroy + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-destroy/script b/acceptance/bundle/help/bundle-destroy/script new file mode 100644 index 000000000..955d7b7f9 --- /dev/null +++ b/acceptance/bundle/help/bundle-destroy/script @@ -0,0 +1 @@ +trace $CLI bundle destroy --help diff --git a/acceptance/bundle/help/bundle-generate-dashboard/output.txt b/acceptance/bundle/help/bundle-generate-dashboard/output.txt new file mode 100644 index 000000000..a63ce0ff8 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-dashboard/output.txt @@ -0,0 +1,24 @@ + +>>> $CLI bundle generate dashboard --help +Generate configuration for a dashboard + +Usage: + databricks bundle generate dashboard [flags] + +Flags: + -s, --dashboard-dir string directory to write the dashboard representation to (default "src") + --existing-id string ID of the dashboard to generate configuration for + --existing-path string workspace path of the dashboard to generate configuration for + -f, --force force overwrite existing files in the output directory + -h, --help help for dashboard + --resource string resource key of dashboard to watch for changes + -d, --resource-dir string directory to write the configuration to (default "resources") + --watch watch for changes to the dashboard and update the configuration + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-dashboard/script b/acceptance/bundle/help/bundle-generate-dashboard/script new file mode 100644 index 000000000..320156129 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-dashboard/script @@ -0,0 +1 @@ +trace $CLI bundle generate dashboard --help diff --git a/acceptance/bundle/help/bundle-generate-job/output.txt b/acceptance/bundle/help/bundle-generate-job/output.txt new file mode 100644 index 000000000..adc3f45ae --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-job/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle generate job --help +Generate bundle configuration for a job + +Usage: + databricks bundle generate job [flags] + +Flags: + -d, --config-dir string Dir path where the output config will be stored (default "resources") + --existing-job-id int Job ID of the job to generate config for + -f, --force Force overwrite existing files in the output directory + -h, --help help for job + -s, --source-dir string Dir path where the downloaded files will be stored (default "src") + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-job/script b/acceptance/bundle/help/bundle-generate-job/script new file mode 100644 index 000000000..109ed59aa --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-job/script @@ -0,0 +1 @@ +trace $CLI bundle generate job --help diff --git a/acceptance/bundle/help/bundle-generate-pipeline/output.txt b/acceptance/bundle/help/bundle-generate-pipeline/output.txt new file mode 100644 index 000000000..cf5f70920 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-pipeline/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle generate pipeline --help +Generate bundle configuration for a pipeline + +Usage: + databricks bundle generate pipeline [flags] + +Flags: + -d, --config-dir string Dir path where the output config will be stored (default "resources") + --existing-pipeline-id string ID of the pipeline to generate config for + -f, --force Force overwrite existing files in the output directory + -h, --help help for pipeline + -s, --source-dir string Dir path where the downloaded files will be stored (default "src") + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-pipeline/script b/acceptance/bundle/help/bundle-generate-pipeline/script new file mode 100644 index 000000000..c6af62d0a --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-pipeline/script @@ -0,0 +1 @@ +trace $CLI bundle generate pipeline --help diff --git a/acceptance/bundle/help/bundle-generate/output.txt b/acceptance/bundle/help/bundle-generate/output.txt new file mode 100644 index 000000000..1d77dfdbd --- /dev/null +++ b/acceptance/bundle/help/bundle-generate/output.txt @@ -0,0 +1,25 @@ + +>>> $CLI bundle generate --help +Generate bundle configuration + +Usage: + databricks bundle generate [command] + +Available Commands: + app Generate bundle configuration for a Databricks app + dashboard Generate configuration for a dashboard + job Generate bundle configuration for a job + pipeline Generate bundle configuration for a pipeline + +Flags: + -h, --help help for generate + --key string resource key to use for the generated configuration + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Use "databricks bundle generate [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle-generate/script b/acceptance/bundle/help/bundle-generate/script new file mode 100644 index 000000000..932588768 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate/script @@ -0,0 +1 @@ +trace $CLI bundle generate --help diff --git a/acceptance/bundle/help/bundle-init/output.txt b/acceptance/bundle/help/bundle-init/output.txt new file mode 100644 index 000000000..bafe5a187 --- /dev/null +++ b/acceptance/bundle/help/bundle-init/output.txt @@ -0,0 +1,31 @@ + +>>> $CLI bundle init --help +Initialize using a bundle template. + +TEMPLATE_PATH optionally specifies which template to use. It can be one of the following: +- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows +- default-sql: The default SQL template for .sql files that run with Databricks SQL +- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) +- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks) +- a local file system path with a template directory +- a Git repository URL, e.g. https://github.com/my/repository + +See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates. + +Usage: + databricks bundle init [TEMPLATE_PATH] [flags] + +Flags: + --branch string Git branch to use for template initialization + --config-file string JSON file containing key value pairs of input parameters required for template initialization. + -h, --help help for init + --output-dir string Directory to write the initialized template to. + --tag string Git tag to use for template initialization + --template-dir string Directory path within a Git repository containing the template. + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-init/script b/acceptance/bundle/help/bundle-init/script new file mode 100644 index 000000000..9bcce7559 --- /dev/null +++ b/acceptance/bundle/help/bundle-init/script @@ -0,0 +1 @@ +trace $CLI bundle init --help diff --git a/acceptance/bundle/help/bundle-open/output.txt b/acceptance/bundle/help/bundle-open/output.txt new file mode 100644 index 000000000..8b98aa850 --- /dev/null +++ b/acceptance/bundle/help/bundle-open/output.txt @@ -0,0 +1,17 @@ + +>>> $CLI bundle open --help +Open a resource in the browser + +Usage: + databricks bundle open [flags] + +Flags: + --force-pull Skip local cache and load the state from the remote workspace + -h, --help help for open + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-open/script b/acceptance/bundle/help/bundle-open/script new file mode 100644 index 000000000..b4dfa2222 --- /dev/null +++ b/acceptance/bundle/help/bundle-open/script @@ -0,0 +1 @@ +trace $CLI bundle open --help diff --git a/acceptance/bundle/help/bundle-run/output.txt b/acceptance/bundle/help/bundle-run/output.txt new file mode 100644 index 000000000..17763a295 --- /dev/null +++ b/acceptance/bundle/help/bundle-run/output.txt @@ -0,0 +1,57 @@ + +>>> $CLI bundle run --help +Run the job or pipeline identified by KEY. + +The KEY is the unique identifier of the resource to run. In addition to +customizing the run using any of the available flags, you can also specify +keyword or positional arguments as shown in these examples: + + databricks bundle run my_job -- --key1 value1 --key2 value2 + +Or: + + databricks bundle run my_job -- value1 value2 value3 + +If the specified job uses job parameters or the job has a notebook task with +parameters, the first example applies and flag names are mapped to the +parameter names. + +If the specified job does not use job parameters and the job has a Python file +task or a Python wheel task, the second example applies. + +Usage: + databricks bundle run [flags] KEY + +Job Flags: + --params stringToString comma separated k=v pairs for job parameters (default []) + +Job Task Flags: + Note: please prefer use of job-level parameters (--param) over task-level parameters. + For more information, see https://docs.databricks.com/en/workflows/jobs/create-run-jobs.html#pass-parameters-to-a-databricks-job-task + --dbt-commands strings A list of commands to execute for jobs with DBT tasks. + --jar-params strings A list of parameters for jobs with Spark JAR tasks. + --notebook-params stringToString A map from keys to values for jobs with notebook tasks. (default []) + --pipeline-params stringToString A map from keys to values for jobs with pipeline tasks. (default []) + --python-named-params stringToString A map from keys to values for jobs with Python wheel tasks. (default []) + --python-params strings A list of parameters for jobs with Python tasks. + --spark-submit-params strings A list of parameters for jobs with Spark submit tasks. + --sql-params stringToString A map from keys to values for jobs with SQL tasks. (default []) + +Pipeline Flags: + --full-refresh strings List of tables to reset and recompute. + --full-refresh-all Perform a full graph reset and recompute. + --refresh strings List of tables to update. + --refresh-all Perform a full graph update. + --validate-only Perform an update to validate graph correctness. + +Flags: + -h, --help help for run + --no-wait Don't wait for the run to complete. + --restart Restart the run if it is already running. + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-run/script b/acceptance/bundle/help/bundle-run/script new file mode 100644 index 000000000..edcf1786a --- /dev/null +++ b/acceptance/bundle/help/bundle-run/script @@ -0,0 +1 @@ +trace $CLI bundle run --help diff --git a/acceptance/bundle/help/bundle-schema/output.txt b/acceptance/bundle/help/bundle-schema/output.txt new file mode 100644 index 000000000..8f2983f5b --- /dev/null +++ b/acceptance/bundle/help/bundle-schema/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle schema --help +Generate JSON Schema for bundle configuration + +Usage: + databricks bundle schema [flags] + +Flags: + -h, --help help for schema + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-schema/script b/acceptance/bundle/help/bundle-schema/script new file mode 100644 index 000000000..a084fec8e --- /dev/null +++ b/acceptance/bundle/help/bundle-schema/script @@ -0,0 +1 @@ +trace $CLI bundle schema --help diff --git a/acceptance/bundle/help/bundle-summary/output.txt b/acceptance/bundle/help/bundle-summary/output.txt new file mode 100644 index 000000000..935c4bdc5 --- /dev/null +++ b/acceptance/bundle/help/bundle-summary/output.txt @@ -0,0 +1,17 @@ + +>>> $CLI bundle summary --help +Summarize resources deployed by this bundle + +Usage: + databricks bundle summary [flags] + +Flags: + --force-pull Skip local cache and load the state from the remote workspace + -h, --help help for summary + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-summary/script b/acceptance/bundle/help/bundle-summary/script new file mode 100644 index 000000000..967279d86 --- /dev/null +++ b/acceptance/bundle/help/bundle-summary/script @@ -0,0 +1 @@ +trace $CLI bundle summary --help diff --git a/acceptance/bundle/help/bundle-sync/output.txt b/acceptance/bundle/help/bundle-sync/output.txt new file mode 100644 index 000000000..6588e6978 --- /dev/null +++ b/acceptance/bundle/help/bundle-sync/output.txt @@ -0,0 +1,19 @@ + +>>> $CLI bundle sync --help +Synchronize bundle tree to the workspace + +Usage: + databricks bundle sync [flags] + +Flags: + --full perform full synchronization (default is incremental) + -h, --help help for sync + --interval duration file system polling interval (for --watch) (default 1s) + --output type type of the output format + --watch watch local file system for changes + +Global Flags: + --debug enable debug logging + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-sync/script b/acceptance/bundle/help/bundle-sync/script new file mode 100644 index 000000000..fe1d6c7e3 --- /dev/null +++ b/acceptance/bundle/help/bundle-sync/script @@ -0,0 +1 @@ +trace $CLI bundle sync --help diff --git a/acceptance/bundle/help/bundle-validate/output.txt b/acceptance/bundle/help/bundle-validate/output.txt new file mode 100644 index 000000000..a0c350faf --- /dev/null +++ b/acceptance/bundle/help/bundle-validate/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle validate --help +Validate configuration + +Usage: + databricks bundle validate [flags] + +Flags: + -h, --help help for validate + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-validate/script b/acceptance/bundle/help/bundle-validate/script new file mode 100644 index 000000000..8b8434b2d --- /dev/null +++ b/acceptance/bundle/help/bundle-validate/script @@ -0,0 +1 @@ +trace $CLI bundle validate --help diff --git a/acceptance/bundle/help/bundle/output.txt b/acceptance/bundle/help/bundle/output.txt new file mode 100644 index 000000000..e0e2ea47c --- /dev/null +++ b/acceptance/bundle/help/bundle/output.txt @@ -0,0 +1,33 @@ + +>>> $CLI bundle --help +Databricks Asset Bundles let you express data/AI/analytics projects as code. + +Online documentation: https://docs.databricks.com/en/dev-tools/bundles/index.html + +Usage: + databricks bundle [command] + +Available Commands: + deploy Deploy bundle + deployment Deployment related commands + destroy Destroy deployed bundle resources + generate Generate bundle configuration + init Initialize using a bundle template + open Open a resource in the browser + run Run a job or pipeline update + schema Generate JSON Schema for bundle configuration + summary Summarize resources deployed by this bundle + sync Synchronize bundle tree to the workspace + validate Validate configuration + +Flags: + -h, --help help for bundle + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + +Use "databricks bundle [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle/script b/acceptance/bundle/help/bundle/script new file mode 100644 index 000000000..eac116817 --- /dev/null +++ b/acceptance/bundle/help/bundle/script @@ -0,0 +1 @@ +trace $CLI bundle --help diff --git a/acceptance/bundle/override/job_cluster/output.txt b/acceptance/bundle/override/job_cluster/output.txt index 947d19032..ff6e8316e 100644 --- a/acceptance/bundle/override/job_cluster/output.txt +++ b/acceptance/bundle/override/job_cluster/output.txt @@ -4,7 +4,7 @@ "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/development/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", @@ -32,7 +32,7 @@ "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/override/job_cluster_var/output.txt b/acceptance/bundle/override/job_cluster_var/output.txt index cb76de5a8..0b19e5eb2 100644 --- a/acceptance/bundle/override/job_cluster_var/output.txt +++ b/acceptance/bundle/override/job_cluster_var/output.txt @@ -4,7 +4,7 @@ "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/development/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", @@ -31,8 +31,8 @@ Name: override_job_cluster Target: development Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/override_job_cluster/development Validation OK! @@ -41,7 +41,7 @@ Validation OK! "foo": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", @@ -68,7 +68,7 @@ Validation OK! Name: override_job_cluster Target: staging Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging Validation OK! diff --git a/acceptance/bundle/override/job_tasks/output.txt b/acceptance/bundle/override/job_tasks/output.txt index 0bb0b1812..915351d4e 100644 --- a/acceptance/bundle/override/job_tasks/output.txt +++ b/acceptance/bundle/override/job_tasks/output.txt @@ -69,8 +69,8 @@ Error: file ./test1.py not found Name: override_job_tasks Target: staging Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_tasks/staging + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/override_job_tasks/staging Found 1 error diff --git a/acceptance/bundle/override/merge-string-map/output.txt b/acceptance/bundle/override/merge-string-map/output.txt index 986da8174..b566aa07f 100644 --- a/acceptance/bundle/override/merge-string-map/output.txt +++ b/acceptance/bundle/override/merge-string-map/output.txt @@ -21,7 +21,7 @@ Warning: expected map, found string Name: merge-string-map Target: dev Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/merge-string-map/dev + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/merge-string-map/dev Found 1 warning diff --git a/acceptance/bundle/override/pipeline_cluster/output.txt b/acceptance/bundle/override/pipeline_cluster/output.txt index 81bf58180..8babed0ec 100644 --- a/acceptance/bundle/override/pipeline_cluster/output.txt +++ b/acceptance/bundle/override/pipeline_cluster/output.txt @@ -14,7 +14,7 @@ ], "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_pipeline_cluster/development/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_pipeline_cluster/development/state/metadata.json" }, "name": "job", "permissions": [] @@ -36,7 +36,7 @@ ], "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_pipeline_cluster/staging/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_pipeline_cluster/staging/state/metadata.json" }, "name": "job", "permissions": [] diff --git a/bundle/tests/path_translation/fallback/README.md b/acceptance/bundle/paths/fallback/README.md similarity index 100% rename from bundle/tests/path_translation/fallback/README.md rename to acceptance/bundle/paths/fallback/README.md diff --git a/bundle/tests/path_translation/nominal/databricks.yml b/acceptance/bundle/paths/fallback/databricks.yml similarity index 80% rename from bundle/tests/path_translation/nominal/databricks.yml rename to acceptance/bundle/paths/fallback/databricks.yml index cd425920d..c6d0abe0a 100644 --- a/bundle/tests/path_translation/nominal/databricks.yml +++ b/acceptance/bundle/paths/fallback/databricks.yml @@ -1,5 +1,5 @@ bundle: - name: path_translation_nominal + name: fallback include: - "resources/*.yml" diff --git a/acceptance/bundle/paths/fallback/output.job.json b/acceptance/bundle/paths/fallback/output.job.json new file mode 100644 index 000000000..fe9e1cf3d --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.job.json @@ -0,0 +1,67 @@ +[ + { + "job_cluster_key": "default", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook" + }, + "task_key": "notebook_example" + }, + { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file.py" + }, + "task_key": "spark_python_example" + }, + { + "dbt_task": { + "commands": [ + "dbt run", + "dbt run" + ], + "project_directory": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/dbt_project" + }, + "job_cluster_key": "default", + "task_key": "dbt_example" + }, + { + "job_cluster_key": "default", + "sql_task": { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/sql.sql" + }, + "warehouse_id": "cafef00d" + }, + "task_key": "sql_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "whl": "dist/wheel1.whl" + }, + { + "whl": "dist/wheel2.whl" + } + ], + "python_wheel_task": { + "package_name": "my_package" + }, + "task_key": "python_wheel_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "jar": "target/jar1.jar" + }, + { + "jar": "target/jar2.jar" + } + ], + "spark_jar_task": { + "main_class_name": "com.example.Main" + }, + "task_key": "spark_jar_example" + } +] diff --git a/acceptance/bundle/paths/fallback/output.pipeline.json b/acceptance/bundle/paths/fallback/output.pipeline.json new file mode 100644 index 000000000..38521cb22 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.pipeline.json @@ -0,0 +1,22 @@ +[ + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file1.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook1" + } + }, + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file2.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook2" + } + } +] diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt new file mode 100644 index 000000000..f694610d2 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.txt @@ -0,0 +1,18 @@ + +>>> $CLI bundle validate -t development -o json + +Exit code: 0 + +>>> $CLI bundle validate -t error +Error: notebook this value is overridden not found. Local notebook references are expected +to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] + +Name: fallback +Target: error +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/fallback/error + +Found 1 error + +Exit code: 1 diff --git a/bundle/tests/path_translation/fallback/override_job.yml b/acceptance/bundle/paths/fallback/override_job.yml similarity index 100% rename from bundle/tests/path_translation/fallback/override_job.yml rename to acceptance/bundle/paths/fallback/override_job.yml diff --git a/bundle/tests/path_translation/fallback/override_pipeline.yml b/acceptance/bundle/paths/fallback/override_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/fallback/override_pipeline.yml rename to acceptance/bundle/paths/fallback/override_pipeline.yml diff --git a/bundle/tests/path_translation/fallback/resources/my_job.yml b/acceptance/bundle/paths/fallback/resources/my_job.yml similarity index 71% rename from bundle/tests/path_translation/fallback/resources/my_job.yml rename to acceptance/bundle/paths/fallback/resources/my_job.yml index 4907df4f0..921ee412b 100644 --- a/bundle/tests/path_translation/fallback/resources/my_job.yml +++ b/acceptance/bundle/paths/fallback/resources/my_job.yml @@ -4,33 +4,45 @@ resources: name: "placeholder" tasks: - task_key: notebook_example + job_cluster_key: default notebook_task: notebook_path: "this value is overridden" - task_key: spark_python_example + job_cluster_key: default spark_python_task: python_file: "this value is overridden" - task_key: dbt_example + job_cluster_key: default dbt_task: project_directory: "this value is overridden" commands: - "dbt run" - task_key: sql_example + job_cluster_key: default sql_task: file: path: "this value is overridden" warehouse_id: cafef00d - task_key: python_wheel_example + job_cluster_key: default python_wheel_task: package_name: my_package libraries: - whl: ../dist/wheel1.whl - task_key: spark_jar_example + job_cluster_key: default spark_jar_task: main_class_name: com.example.Main libraries: - jar: ../target/jar1.jar + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/bundle/tests/path_translation/fallback/resources/my_pipeline.yml b/acceptance/bundle/paths/fallback/resources/my_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/fallback/resources/my_pipeline.yml rename to acceptance/bundle/paths/fallback/resources/my_pipeline.yml diff --git a/acceptance/bundle/paths/fallback/script b/acceptance/bundle/paths/fallback/script new file mode 100644 index 000000000..29aa420c5 --- /dev/null +++ b/acceptance/bundle/paths/fallback/script @@ -0,0 +1,10 @@ +errcode trace $CLI bundle validate -t development -o json > output.tmp.json + +# Capture job tasks +jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json + +# Capture pipeline libraries +jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json + +# Expect failure for the "error" target +errcode trace $CLI bundle validate -t error diff --git a/acceptance/bundle/paths/fallback/script.cleanup b/acceptance/bundle/paths/fallback/script.cleanup new file mode 100644 index 000000000..f93425dff --- /dev/null +++ b/acceptance/bundle/paths/fallback/script.cleanup @@ -0,0 +1 @@ +rm -f output.tmp.json diff --git a/bundle/tests/path_translation/fallback/src/dbt_project/.gitkeep b/acceptance/bundle/paths/fallback/src/dbt_project/.gitkeep similarity index 100% rename from bundle/tests/path_translation/fallback/src/dbt_project/.gitkeep rename to acceptance/bundle/paths/fallback/src/dbt_project/.gitkeep diff --git a/bundle/tests/path_translation/fallback/src/file.py b/acceptance/bundle/paths/fallback/src/file.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file.py rename to acceptance/bundle/paths/fallback/src/file.py diff --git a/bundle/tests/path_translation/fallback/src/file1.py b/acceptance/bundle/paths/fallback/src/file1.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file1.py rename to acceptance/bundle/paths/fallback/src/file1.py diff --git a/bundle/tests/path_translation/fallback/src/file2.py b/acceptance/bundle/paths/fallback/src/file2.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file2.py rename to acceptance/bundle/paths/fallback/src/file2.py diff --git a/bundle/tests/path_translation/fallback/src/notebook.py b/acceptance/bundle/paths/fallback/src/notebook.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook.py rename to acceptance/bundle/paths/fallback/src/notebook.py diff --git a/bundle/tests/path_translation/fallback/src/notebook1.py b/acceptance/bundle/paths/fallback/src/notebook1.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook1.py rename to acceptance/bundle/paths/fallback/src/notebook1.py diff --git a/bundle/tests/path_translation/fallback/src/notebook2.py b/acceptance/bundle/paths/fallback/src/notebook2.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook2.py rename to acceptance/bundle/paths/fallback/src/notebook2.py diff --git a/bundle/tests/path_translation/fallback/src/sql.sql b/acceptance/bundle/paths/fallback/src/sql.sql similarity index 100% rename from bundle/tests/path_translation/fallback/src/sql.sql rename to acceptance/bundle/paths/fallback/src/sql.sql diff --git a/bundle/tests/path_translation/nominal/README.md b/acceptance/bundle/paths/nominal/README.md similarity index 100% rename from bundle/tests/path_translation/nominal/README.md rename to acceptance/bundle/paths/nominal/README.md diff --git a/bundle/tests/path_translation/fallback/databricks.yml b/acceptance/bundle/paths/nominal/databricks.yml similarity index 79% rename from bundle/tests/path_translation/fallback/databricks.yml rename to acceptance/bundle/paths/nominal/databricks.yml index 92be3f921..5d3c22f91 100644 --- a/bundle/tests/path_translation/fallback/databricks.yml +++ b/acceptance/bundle/paths/nominal/databricks.yml @@ -1,5 +1,5 @@ bundle: - name: path_translation_fallback + name: nominal include: - "resources/*.yml" diff --git a/acceptance/bundle/paths/nominal/output.job.json b/acceptance/bundle/paths/nominal/output.job.json new file mode 100644 index 000000000..9e1cb4d90 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.job.json @@ -0,0 +1,89 @@ +[ + { + "job_cluster_key": "default", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook" + }, + "task_key": "notebook_example" + }, + { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file.py" + }, + "task_key": "spark_python_example" + }, + { + "dbt_task": { + "commands": [ + "dbt run", + "dbt run" + ], + "project_directory": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/dbt_project" + }, + "job_cluster_key": "default", + "task_key": "dbt_example" + }, + { + "job_cluster_key": "default", + "sql_task": { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/sql.sql" + }, + "warehouse_id": "cafef00d" + }, + "task_key": "sql_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "whl": "dist/wheel1.whl" + }, + { + "whl": "dist/wheel2.whl" + } + ], + "python_wheel_task": { + "package_name": "my_package" + }, + "task_key": "python_wheel_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "jar": "target/jar1.jar" + }, + { + "jar": "target/jar2.jar" + } + ], + "spark_jar_task": { + "main_class_name": "com.example.Main" + }, + "task_key": "spark_jar_example" + }, + { + "for_each_task": { + "task": { + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook" + } + } + }, + "job_cluster_key": "default", + "task_key": "for_each_notebook_example" + }, + { + "for_each_task": { + "task": { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file.py" + } + } + }, + "task_key": "for_each_spark_python_example" + } +] diff --git a/acceptance/bundle/paths/nominal/output.pipeline.json b/acceptance/bundle/paths/nominal/output.pipeline.json new file mode 100644 index 000000000..277b0c4a1 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.pipeline.json @@ -0,0 +1,22 @@ +[ + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file1.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook1" + } + }, + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file2.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook2" + } + } +] diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt new file mode 100644 index 000000000..189170335 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.txt @@ -0,0 +1,18 @@ + +>>> $CLI bundle validate -t development -o json + +Exit code: 0 + +>>> $CLI bundle validate -t error +Error: notebook this value is overridden not found. Local notebook references are expected +to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] + +Name: nominal +Target: error +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/nominal/error + +Found 1 error + +Exit code: 1 diff --git a/bundle/tests/path_translation/nominal/override_job.yml b/acceptance/bundle/paths/nominal/override_job.yml similarity index 100% rename from bundle/tests/path_translation/nominal/override_job.yml rename to acceptance/bundle/paths/nominal/override_job.yml diff --git a/bundle/tests/path_translation/nominal/override_pipeline.yml b/acceptance/bundle/paths/nominal/override_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/nominal/override_pipeline.yml rename to acceptance/bundle/paths/nominal/override_pipeline.yml diff --git a/bundle/tests/path_translation/nominal/resources/my_job.yml b/acceptance/bundle/paths/nominal/resources/my_job.yml similarity index 74% rename from bundle/tests/path_translation/nominal/resources/my_job.yml rename to acceptance/bundle/paths/nominal/resources/my_job.yml index 2020c9dc8..13996a20c 100644 --- a/bundle/tests/path_translation/nominal/resources/my_job.yml +++ b/acceptance/bundle/paths/nominal/resources/my_job.yml @@ -4,38 +4,45 @@ resources: name: "placeholder" tasks: - task_key: notebook_example + job_cluster_key: default notebook_task: notebook_path: "this value is overridden" - task_key: spark_python_example + job_cluster_key: default spark_python_task: python_file: "this value is overridden" - task_key: dbt_example + job_cluster_key: default dbt_task: project_directory: "this value is overridden" commands: - "dbt run" - task_key: sql_example + job_cluster_key: default sql_task: file: path: "this value is overridden" warehouse_id: cafef00d - task_key: python_wheel_example + job_cluster_key: default python_wheel_task: package_name: my_package libraries: - whl: ../dist/wheel1.whl - task_key: spark_jar_example + job_cluster_key: default spark_jar_task: main_class_name: com.example.Main libraries: - jar: ../target/jar1.jar - task_key: for_each_notebook_example + job_cluster_key: default for_each_task: task: notebook_task: @@ -44,5 +51,12 @@ resources: - task_key: for_each_spark_python_example for_each_task: task: + job_cluster_key: default spark_python_task: python_file: "this value is overridden" + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/bundle/tests/path_translation/nominal/resources/my_pipeline.yml b/acceptance/bundle/paths/nominal/resources/my_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/nominal/resources/my_pipeline.yml rename to acceptance/bundle/paths/nominal/resources/my_pipeline.yml diff --git a/acceptance/bundle/paths/nominal/script b/acceptance/bundle/paths/nominal/script new file mode 100644 index 000000000..29aa420c5 --- /dev/null +++ b/acceptance/bundle/paths/nominal/script @@ -0,0 +1,10 @@ +errcode trace $CLI bundle validate -t development -o json > output.tmp.json + +# Capture job tasks +jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json + +# Capture pipeline libraries +jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json + +# Expect failure for the "error" target +errcode trace $CLI bundle validate -t error diff --git a/acceptance/bundle/paths/nominal/script.cleanup b/acceptance/bundle/paths/nominal/script.cleanup new file mode 100644 index 000000000..f93425dff --- /dev/null +++ b/acceptance/bundle/paths/nominal/script.cleanup @@ -0,0 +1 @@ +rm -f output.tmp.json diff --git a/bundle/tests/path_translation/nominal/src/dbt_project/.gitkeep b/acceptance/bundle/paths/nominal/src/dbt_project/.gitkeep similarity index 100% rename from bundle/tests/path_translation/nominal/src/dbt_project/.gitkeep rename to acceptance/bundle/paths/nominal/src/dbt_project/.gitkeep diff --git a/bundle/tests/path_translation/nominal/src/file.py b/acceptance/bundle/paths/nominal/src/file.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file.py rename to acceptance/bundle/paths/nominal/src/file.py diff --git a/bundle/tests/path_translation/nominal/src/file1.py b/acceptance/bundle/paths/nominal/src/file1.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file1.py rename to acceptance/bundle/paths/nominal/src/file1.py diff --git a/bundle/tests/path_translation/nominal/src/file2.py b/acceptance/bundle/paths/nominal/src/file2.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file2.py rename to acceptance/bundle/paths/nominal/src/file2.py diff --git a/bundle/tests/path_translation/nominal/src/notebook.py b/acceptance/bundle/paths/nominal/src/notebook.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook.py rename to acceptance/bundle/paths/nominal/src/notebook.py diff --git a/bundle/tests/path_translation/nominal/src/notebook1.py b/acceptance/bundle/paths/nominal/src/notebook1.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook1.py rename to acceptance/bundle/paths/nominal/src/notebook1.py diff --git a/bundle/tests/path_translation/nominal/src/notebook2.py b/acceptance/bundle/paths/nominal/src/notebook2.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook2.py rename to acceptance/bundle/paths/nominal/src/notebook2.py diff --git a/bundle/tests/path_translation/nominal/src/sql.sql b/acceptance/bundle/paths/nominal/src/sql.sql similarity index 100% rename from bundle/tests/path_translation/nominal/src/sql.sql rename to acceptance/bundle/paths/nominal/src/sql.sql diff --git a/bundle/tests/relative_path_translation/databricks.yml b/acceptance/bundle/paths/relative_path_translation/databricks.yml similarity index 100% rename from bundle/tests/relative_path_translation/databricks.yml rename to acceptance/bundle/paths/relative_path_translation/databricks.yml diff --git a/acceptance/bundle/paths/relative_path_translation/output.default.json b/acceptance/bundle/paths/relative_path_translation/output.default.json new file mode 100644 index 000000000..e2514b392 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.default.json @@ -0,0 +1,6 @@ +{ + "paths": [ + "/Workspace/remote/src/file1.py", + "/Workspace/remote/src/file1.py" + ] +} diff --git a/acceptance/bundle/paths/relative_path_translation/output.override.json b/acceptance/bundle/paths/relative_path_translation/output.override.json new file mode 100644 index 000000000..729d2eaa0 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.override.json @@ -0,0 +1,6 @@ +{ + "paths": [ + "/Workspace/remote/src/file2.py", + "/Workspace/remote/src/file2.py" + ] +} diff --git a/acceptance/bundle/paths/relative_path_translation/output.txt b/acceptance/bundle/paths/relative_path_translation/output.txt new file mode 100644 index 000000000..362f2ec7b --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.txt @@ -0,0 +1,4 @@ + +>>> $CLI bundle validate -t default -o json + +>>> $CLI bundle validate -t override -o json diff --git a/bundle/tests/relative_path_translation/resources/job.yml b/acceptance/bundle/paths/relative_path_translation/resources/job.yml similarity index 66% rename from bundle/tests/relative_path_translation/resources/job.yml rename to acceptance/bundle/paths/relative_path_translation/resources/job.yml index 93f121f25..9540ff1ad 100644 --- a/bundle/tests/relative_path_translation/resources/job.yml +++ b/acceptance/bundle/paths/relative_path_translation/resources/job.yml @@ -3,12 +3,20 @@ resources: job: tasks: - task_key: local + job_cluster_key: default spark_python_task: python_file: ../src/file1.py - task_key: variable_reference + job_cluster_key: default spark_python_task: # Note: this is a pure variable reference yet needs to persist the location # of the reference, not the location of the variable value. # Also see https://github.com/databricks/cli/issues/1330. python_file: ${var.file_path} + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/acceptance/bundle/paths/relative_path_translation/script b/acceptance/bundle/paths/relative_path_translation/script new file mode 100644 index 000000000..252e9a07f --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate -t default -o json | \ + jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.default.json +trace $CLI bundle validate -t override -o json | \ + jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.override.json diff --git a/bundle/tests/relative_path_translation/src/file1.py b/acceptance/bundle/paths/relative_path_translation/src/file1.py similarity index 100% rename from bundle/tests/relative_path_translation/src/file1.py rename to acceptance/bundle/paths/relative_path_translation/src/file1.py diff --git a/bundle/tests/relative_path_translation/src/file2.py b/acceptance/bundle/paths/relative_path_translation/src/file2.py similarity index 100% rename from bundle/tests/relative_path_translation/src/file2.py rename to acceptance/bundle/paths/relative_path_translation/src/file2.py diff --git a/acceptance/bundle/templates/dbt-sql/input.json b/acceptance/bundle/templates/dbt-sql/input.json new file mode 100644 index 000000000..201ac9667 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_dbt_sql", + "http_path": "/sql/2.0/warehouses/f00dcafe", + "default_catalog": "main", + "personal_schemas": "yes, use a schema based on the current user name during development" +} diff --git a/acceptance/bundle/templates/dbt-sql/output.txt b/acceptance/bundle/templates/dbt-sql/output.txt new file mode 100644 index 000000000..972c7e152 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output.txt @@ -0,0 +1,32 @@ + +>>> $CLI bundle init dbt-sql --config-file ./input.json --output-dir output + +Welcome to the dbt template for Databricks Asset Bundles! + +A workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html. +workspace_host: $DATABRICKS_URL + +📊 Your new project has been created in the 'my_dbt_sql' directory! +If you already have dbt installed, just type 'cd my_dbt_sql; dbt init' to get started. +Refer to the README.md file for full "getting started" guide and production setup instructions. + + +>>> $CLI bundle validate -t dev +Name: my_dbt_sql +Target: dev +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/dev + +Validation OK! + +>>> $CLI bundle validate -t prod +Name: my_dbt_sql +Target: prod +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/prod + +Validation OK! diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore new file mode 100644 index 000000000..de811f118 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore @@ -0,0 +1,2 @@ + +.databricks diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json new file mode 100644 index 000000000..28fe943fd --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "redhat.vscode-yaml", + "innoverio.vscode-dbt-power-user", + ] +} diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json new file mode 100644 index 000000000..e8dcd1a83 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json @@ -0,0 +1,32 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "python.envFile": "${workspaceFolder}/.databricks/.databricks.env", + "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "hive_metastore", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/2.0/warehouses/f00dcafe" + } + ], + "sqltools.autoConnectTo": "", + "[jinja-sql]": { + "editor.defaultFormatter": "innoverio.vscode-dbt-power-user" + } +} diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md new file mode 100644 index 000000000..756a2eda4 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md @@ -0,0 +1,138 @@ +# my_dbt_sql + +The 'my_dbt_sql' project was generated by using the dbt template for +Databricks Asset Bundles. It follows the standard dbt project structure +and has an additional `resources` directory to define Databricks resources such as jobs +that run dbt models. + +* Learn more about dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. +* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html + +The remainder of this file includes instructions for local development (using dbt) +and deployment to production (using Databricks Asset Bundles). + +## Development setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. Install dbt + + To install dbt, you need a recent version of Python. For the instructions below, + we assume `python3` refers to the Python version you want to use. On some systems, + you may need to refer to a different Python version, e.g. `python` or `/usr/bin/python`. + + Run these instructions from the `my_dbt_sql` directory. We recommend making + use of a Python virtual environment and installing dbt as follows: + + ``` + $ python3 -m venv .venv + $ . .venv/bin/activate + $ pip install -r requirements-dev.txt + ``` + +4. Initialize your dbt profile + + Use `dbt init` to initialize your profile. + + ``` + $ dbt init + ``` + + Note that dbt authentication uses personal access tokens by default + (see https://docs.databricks.com/dev-tools/auth/pat.html). + You can use OAuth as an alternative, but this currently requires manual configuration. + See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md + for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605 + for advice on setting up OAuth for Azure Databricks. + + To setup up additional profiles, such as a 'prod' profile, + see https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles. + +5. Activate dbt so it can be used from the terminal + + ``` + $ . .venv/bin/activate + ``` + +## Local development with dbt + +Use `dbt` to [run this project locally using a SQL warehouse](https://docs.databricks.com/partners/prep/dbt.html): + +``` +$ dbt seed +$ dbt run +``` + +(Did you get an error that the dbt command could not be found? You may need +to try the last step from the development setup above to re-activate +your Python virtual environment!) + + +To just evaluate a single model defined in a file called orders.sql, use: + +``` +$ dbt run --model orders +``` + +Use `dbt test` to run tests generated from yml files such as `models/schema.yml` +and any SQL tests from `tests/` + +``` +$ dbt test +``` + +## Production setup + +Your production dbt profiles are defined in dbt_profiles/profiles.yml. +These profiles define the default catalog, schema, and any other +target-specific settings. Read more about dbt profiles on Databricks at +https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. + +The target workspaces for staging and prod are defined in databricks.yml. +You can manually deploy based on these configurations (see below). +Or you can use CI/CD to automate deployment. See +https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation +on CI/CD setup. + +## Manually deploying to Databricks with Databricks Asset Bundles + +Databricks Asset Bundles can be used to deploy to Databricks and to execute +dbt commands as a job using Databricks Workflows. See +https://docs.databricks.com/dev-tools/bundles/index.html to learn more. + +Use the Databricks CLI to deploy a development copy of this project to a workspace: + +``` +$ databricks bundle deploy --target dev +``` + +(Note that "dev" is the default target, so the `--target` parameter +is optional here.) + +This deploys everything that's defined for this project. +For example, the default template would deploy a job called +`[dev yourname] my_dbt_sql_job` to your workspace. +You can find that job by opening your workpace and clicking on **Workflows**. + +You can also deploy to your production target directly from the command-line. +The warehouse, catalog, and schema for that target are configured in databricks.yml. +When deploying to this target, note that the default job at resources/my_dbt_sql.job.yml +has a schedule set that runs every day. The schedule is paused when deploying in development mode +(see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +To deploy a production copy, type: + +``` +$ databricks bundle deploy --target prod +``` + +## IDE support + +Optionally, install developer tools such as the Databricks extension for Visual Studio Code from +https://docs.databricks.com/dev-tools/vscode-ext.html. Third-party extensions +related to dbt may further enhance your dbt development experience! diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml new file mode 100644 index 000000000..1962bc543 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml @@ -0,0 +1,34 @@ +# This file defines the structure of this project and how it is deployed +# to production using Databricks Asset Bundles. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_dbt_sql + uuid: + +include: + - resources/*.yml + +# Deployment targets. +# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml +targets: + dev: + default: true + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml new file mode 100644 index 000000000..fdaf30dda --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml @@ -0,0 +1,38 @@ + +# This file defines dbt profiles for deployed dbt jobs. +my_dbt_sql: + target: dev # default target + outputs: + + # Doing local development with the dbt CLI? + # Then you should create your own profile in your .dbt/profiles.yml using 'dbt init' + # (See README.md) + + # The default target when deployed with the Databricks CLI + # N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml + dev: + type: databricks + method: http + catalog: main + schema: "{{ var('dev_schema') }}" + + http_path: /sql/2.0/warehouses/f00dcafe + + # The workspace host / token are provided by Databricks + # see databricks.yml for the workspace host used for 'dev' + host: "{{ env_var('DBT_HOST') }}" + token: "{{ env_var('DBT_ACCESS_TOKEN') }}" + + # The production target when deployed with the Databricks CLI + prod: + type: databricks + method: http + catalog: main + schema: default + + http_path: /sql/2.0/warehouses/f00dcafe + + # The workspace host / token are provided by Databricks + # see databricks.yml for the workspace host used for 'prod' + host: "{{ env_var('DBT_HOST') }}" + token: "{{ env_var('DBT_ACCESS_TOKEN') }}" diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml new file mode 100644 index 000000000..4218640d8 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml @@ -0,0 +1,32 @@ +name: 'my_dbt_sql' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'my_dbt_sql' + +# These configurations specify where dbt should look for different types of files. +# For Databricks asset bundles, we put everything in src, as you may have +# non-dbt resources in your project. +model-paths: ["src/models"] +analysis-paths: ["src/analyses"] +test-paths: ["src/tests"] +seed-paths: ["src/seeds"] +macro-paths: ["src/macros"] +snapshot-paths: ["src/snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views by default. These settings can be overridden in the +# individual model files using the `{{ config(...) }}` macro. +models: + my_dbt_sql: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml new file mode 100644 index 000000000..5e0f0fc29 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml @@ -0,0 +1,23 @@ +# This file defines prompts with defaults for dbt initializaton. +# It is used when the `dbt init` command is invoked. +# +fixed: + type: databricks +prompts: + host: + default: $DATABRICKS_HOST + token: + hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + hide_input: true + http_path: + hint: 'HTTP path of SQL warehouse to use' + default: /sql/2.0/warehouses/f00dcafe + catalog: + hint: 'initial catalog' + default: main + schema: + hint: 'personal schema where dbt will build objects during development, example: $USERNAME' + threads: + hint: 'threads to use during development, 1 or more' + type: 'int' + default: 4 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt new file mode 100644 index 000000000..e6b861203 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt @@ -0,0 +1,3 @@ +## requirements-dev.txt: dependencies for local development. + +dbt-databricks>=1.8.0,<2.0.0 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml new file mode 100644 index 000000000..d52f8ed50 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml @@ -0,0 +1,43 @@ +resources: + jobs: + my_dbt_sql_job: + name: my_dbt_sql_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + + tasks: + - task_key: dbt + + dbt_task: + project_directory: ../ + # The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml + profiles_directory: dbt_profiles/ + commands: + # The dbt commands to run (see also dbt_profiles/profiles.yml; dev_schema is used in the dev profile) + - 'dbt deps --target=${bundle.target}' + - 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + - 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + + libraries: + - pypi: + package: dbt-databricks>=1.8.0,<2.0.0 + + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/analyses/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/macros/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql new file mode 100644 index 000000000..e32736ceb --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql @@ -0,0 +1,17 @@ + +-- This model file defines a materialized view called 'orders_daily' +-- +-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'materialized_view') }} + +select order_date, count(*) AS number_of_orders + +from {{ ref('orders_raw') }} + +-- During development, only process a smaller range of data +{% if target.name != 'prod' %} +where order_date >= '2019-08-01' and order_date < '2019-09-01' +{% endif %} + +group by order_date diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql new file mode 100644 index 000000000..8faf8f38b --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql @@ -0,0 +1,16 @@ +-- This model file defines a streaming table called 'orders_raw' +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ +-- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'streaming_table') }} + +select + customer_name, + date(timestamp(from_unixtime(try_cast(order_datetime as bigint)))) as order_date, + order_number +from stream read_files( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml new file mode 100644 index 000000000..c64f1bfce --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml @@ -0,0 +1,21 @@ + +version: 2 + +models: + - name: orders_raw + description: "Raw ingested orders" + columns: + - name: customer_name + description: "The name of a customer" + data_tests: + - unique + - not_null + + - name: orders_daily + description: "Number of orders by day" + columns: + - name: order_date + description: "The date on which orders took place" + data_tests: + - unique + - not_null diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/seeds/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/snapshots/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/tests/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/script b/acceptance/bundle/templates/dbt-sql/script new file mode 100644 index 000000000..c4ca817fe --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/script @@ -0,0 +1,5 @@ +trace $CLI bundle init dbt-sql --config-file ./input.json --output-dir output + +cd output/my_dbt_sql +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/default-python/input.json b/acceptance/bundle/templates/default-python/input.json new file mode 100644 index 000000000..3e1d79c68 --- /dev/null +++ b/acceptance/bundle/templates/default-python/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_default_python", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes" +} diff --git a/acceptance/bundle/templates/default-python/output.txt b/acceptance/bundle/templates/default-python/output.txt new file mode 100644 index 000000000..5493ac2cf --- /dev/null +++ b/acceptance/bundle/templates/default-python/output.txt @@ -0,0 +1,30 @@ + +>>> $CLI bundle init default-python --config-file ./input.json --output-dir output + +Welcome to the default Python template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): $DATABRICKS_URL + +✨ Your new project has been created in the 'my_default_python' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> $CLI bundle validate -t dev +Name: my_default_python +Target: dev +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/dev + +Validation OK! + +>>> $CLI bundle validate -t prod +Name: my_default_python +Target: prod +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_python/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore b/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json new file mode 100644 index 000000000..5d15eba36 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json new file mode 100644 index 000000000..8ee87c30d --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json @@ -0,0 +1,16 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/output/my_default_python/README.md new file mode 100644 index 000000000..97d7d7949 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/README.md @@ -0,0 +1,47 @@ +# my_default_python + +The 'my_default_python' project was generated by using the default-python template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_python_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_default_python.job.yml). The schedule + is paused when deploying in development mode (see + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +5. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` + +6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + +7. For documentation on the Databricks asset bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml new file mode 100644 index 000000000..9deca9cf5 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml @@ -0,0 +1,31 @@ +# This is a Databricks asset bundle definition for my_default_python. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_python + uuid: + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini b/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini new file mode 100644 index 000000000..80432c220 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = src diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt b/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt new file mode 100644 index 000000000..0ffbf6aed --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt @@ -0,0 +1,29 @@ +## requirements-dev.txt: dependencies for local development. +## +## For defining dependencies used by jobs in Databricks Workflows, see +## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + +## Add code completion support for DLT +databricks-dlt + +## pytest is the default package used for testing +pytest + +## Dependencies for building wheel files +setuptools +wheel + +## databricks-connect can be used to run parts of this project locally. +## See https://docs.databricks.com/dev-tools/databricks-connect.html. +## +## databricks-connect is automatically installed if you're using Databricks +## extension for Visual Studio Code +## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). +## +## To manually install databricks-connect, either follow the instructions +## at https://docs.databricks.com/dev-tools/databricks-connect.html +## to install the package system-wide. Or uncomment the line below to install a +## version of db-connect that corresponds to the Databricks Runtime version used +## for this project. +# +# databricks-connect>=15.4,<15.5 diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml new file mode 100644 index 000000000..e6148a4ad --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml @@ -0,0 +1,49 @@ +# The main job for my_default_python. +resources: + jobs: + my_default_python_job: + name: my_default_python_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + tasks: + - task_key: notebook_task + job_cluster_key: job_cluster + notebook_task: + notebook_path: ../src/notebook.ipynb + + - task_key: refresh_pipeline + depends_on: + - task_key: notebook_task + pipeline_task: + pipeline_id: ${resources.pipelines.my_default_python_pipeline.id} + + - task_key: main_task + depends_on: + - task_key: refresh_pipeline + job_cluster_key: job_cluster + python_wheel_task: + package_name: my_default_python + entry_point: main + libraries: + # By default we just include the .whl file generated for the my_default_python package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + - whl: ../dist/*.whl + + job_clusters: + - job_cluster_key: job_cluster + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + autoscale: + min_workers: 1 + max_workers: 4 diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml new file mode 100644 index 000000000..f9e083f4f --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml @@ -0,0 +1,13 @@ +# The main pipeline for my_default_python +resources: + pipelines: + my_default_python_pipeline: + name: my_default_python_pipeline + catalog: main + target: my_default_python_${bundle.target} + libraries: + - notebook: + path: ../src/dlt_pipeline.ipynb + + configuration: + bundle.sourcePath: ${workspace.file_path}/src diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb new file mode 100644 index 000000000..3b2fef4b4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append(\"../src\")\n", + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "ipynb-notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/setup.py b/acceptance/bundle/templates/default-python/output/my_default_python/setup.py new file mode 100644 index 000000000..84b24ecb8 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/setup.py @@ -0,0 +1,41 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_default_python project. +""" + +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import datetime +import my_default_python + +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + +setup( + name="my_default_python", + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + version=my_default_python.__version__ + "+" + local_version, + url="https://databricks.com", + author="$USERNAME", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb new file mode 100644 index 000000000..36e993af7 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Import DLT and src/my_default_python\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from my_default_python import main" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb new file mode 100644 index 000000000..0d560443b --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py new file mode 100644 index 000000000..dc449154a --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py @@ -0,0 +1,6 @@ +from my_default_python.main import get_taxis, get_spark + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/default-python/script b/acceptance/bundle/templates/default-python/script new file mode 100644 index 000000000..b11a7ea21 --- /dev/null +++ b/acceptance/bundle/templates/default-python/script @@ -0,0 +1,5 @@ +trace $CLI bundle init default-python --config-file ./input.json --output-dir output + +cd output/my_default_python +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/default-sql/.ruff.toml b/acceptance/bundle/templates/default-sql/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/acceptance/bundle/templates/default-sql/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/acceptance/bundle/templates/default-sql/input.json b/acceptance/bundle/templates/default-sql/input.json new file mode 100644 index 000000000..c728d25de --- /dev/null +++ b/acceptance/bundle/templates/default-sql/input.json @@ -0,0 +1,6 @@ +{ + "project_name": "my_default_sql", + "http_path": "/sql/2.0/warehouses/f00dcafe", + "default_catalog": "main", + "personal_schemas": "yes, automatically use a schema based on the current user name during development" +} diff --git a/acceptance/bundle/templates/default-sql/output.txt b/acceptance/bundle/templates/default-sql/output.txt new file mode 100644 index 000000000..fe0139093 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output.txt @@ -0,0 +1,32 @@ + +>>> $CLI bundle init default-sql --config-file ./input.json --output-dir output + +Welcome to the default SQL template for Databricks Asset Bundles! + +A workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html. +workspace_host: $DATABRICKS_URL + +✨ Your new project has been created in the 'my_default_sql' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> $CLI bundle validate -t dev +Name: my_default_sql +Target: dev +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_sql/dev + +Validation OK! + +>>> $CLI bundle validate -t prod +Name: my_default_sql +Target: prod +Workspace: + Host: $DATABRICKS_URL + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/my_default_sql/prod + +Validation OK! diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore b/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore new file mode 100644 index 000000000..de811f118 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore @@ -0,0 +1,2 @@ + +.databricks diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json new file mode 100644 index 000000000..8e1023465 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "redhat.vscode-yaml", + "databricks.sqltools-databricks-driver", + ] +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json new file mode 100644 index 000000000..c641abe39 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json @@ -0,0 +1,27 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "main", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/2.0/warehouses/f00dcafe" + } + ], + "sqltools.autoConnectTo": "", +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md new file mode 100644 index 000000000..67ded153f --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md @@ -0,0 +1,41 @@ +# my_default_sql + +The 'my_default_sql' project was generated by using the default-sql template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/install.html + +2. Authenticate to your Databricks workspace (if you have not done so already): + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_sql_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + +5. To run a job, use the "run" command: + ``` + $ databricks bundle run + ``` + +6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. + +7. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml new file mode 100644 index 000000000..ab857287e --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks asset bundle definition for my_default_sql. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_sql + uuid: + +include: + - resources/*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + warehouse_id: + description: The warehouse to use + catalog: + description: The catalog to use + schema: + description: The schema to use + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + variables: + warehouse_id: f00dcafe + catalog: main + schema: ${workspace.current_user.short_name} + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + variables: + warehouse_id: f00dcafe + catalog: main + schema: default + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml new file mode 100644 index 000000000..86de0f9db --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml @@ -0,0 +1,38 @@ +# A job running SQL queries on a SQL warehouse +resources: + jobs: + my_default_sql_sql_job: + name: my_default_sql_sql_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} + - name: bundle_target + default: ${bundle.target} + + tasks: + - task_key: orders_raw + sql_task: + warehouse_id: ${var.warehouse_id} + file: + path: ../src/orders_raw.sql + + - task_key: orders_daily + depends_on: + - task_key: orders_raw + sql_task: + warehouse_id: ${var.warehouse_id} + file: + path: ../src/orders_daily.sql diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md new file mode 100644 index 000000000..5350d09cf --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks and SQL files. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb new file mode 100644 index 000000000..c3fd072e5 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb @@ -0,0 +1,35 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "SELECT * FROM json.`/databricks-datasets/nyctaxi/sample/json/`" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "exploration", + "widgets": {} + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql new file mode 100644 index 000000000..ea7b80b54 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql @@ -0,0 +1,21 @@ +-- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) + +USE CATALOG {{catalog}}; +USE IDENTIFIER({{schema}}); + +CREATE OR REPLACE MATERIALIZED VIEW + orders_daily +AS SELECT + order_date, count(*) AS number_of_orders +FROM + orders_raw + +WHERE if( + {{bundle_target}} = "prod", + true, + + -- During development, only process a smaller range of data + order_date >= '2019-08-01' AND order_date < '2019-09-01' +) + +GROUP BY order_date diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql new file mode 100644 index 000000000..79b1354cf --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql @@ -0,0 +1,19 @@ +-- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ +-- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html + +USE CATALOG {{catalog}}; +USE IDENTIFIER({{schema}}); + +CREATE OR REFRESH STREAMING TABLE + orders_raw +AS SELECT + customer_name, + DATE(TIMESTAMP(FROM_UNIXTIME(TRY_CAST(order_datetime AS BIGINT)))) AS order_date, + order_number +FROM STREAM READ_FILES( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/acceptance/bundle/templates/default-sql/script b/acceptance/bundle/templates/default-sql/script new file mode 100644 index 000000000..66e7a14a2 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/script @@ -0,0 +1,5 @@ +trace $CLI bundle init default-sql --config-file ./input.json --output-dir output + +cd output/my_default_sql +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/input.json b/acceptance/bundle/templates/experimental-jobs-as-code/input.json new file mode 100644 index 000000000..748076c75 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/input.json @@ -0,0 +1,5 @@ +{ + "project_name": "my_jobs_as_code", + "include_notebook": "yes", + "include_python": "yes" +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt new file mode 100644 index 000000000..1aa8a94d5 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt @@ -0,0 +1,85 @@ + +>>> $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output + +Welcome to (EXPERIMENTAL) "Jobs as code" template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): $DATABRICKS_URL + +✨ Your new project has been created in the 'my_jobs_as_code' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> $CLI bundle validate -t dev --output json +{ + "jobs": { + "my_jobs_as_code_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "email_notifications": { + "on_failure": [ + "$USERNAME" + ] + }, + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "autoscale": { + "max_workers": 4, + "min_workers": 1 + }, + "node_type_id": "i3.xlarge", + "spark_version": "15.4.x-scala2.12" + } + } + ], + "max_concurrent_runs": 4, + "name": "[dev $USERNAME] my_jobs_as_code_job", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": { + "dev": "$USERNAME" + }, + "tasks": [ + { + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/files/src/notebook" + }, + "task_key": "notebook_task" + }, + { + "depends_on": [ + { + "task_key": "notebook_task" + } + ], + "job_cluster_key": "job_cluster", + "libraries": [ + { + "whl": "dist/*.whl" + } + ], + "python_wheel_task": { + "entry_point": "main", + "package_name": "my_jobs_as_code" + }, + "task_key": "main_task" + } + ], + "trigger": { + "pause_status": "PAUSED", + "periodic": { + "interval": 1, + "unit": "DAYS" + } + } + } + } +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md new file mode 100644 index 000000000..8c429c6e5 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md @@ -0,0 +1,58 @@ +# my_jobs_as_code + +The 'my_jobs_as_code' project was generated by using the "Jobs as code" template. + +## Prerequisites + +1. Install Databricks CLI 0.238 or later. + See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). + +2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). + We use uv to create a virtual environment and install the required dependencies. + +3. Authenticate to your Databricks workspace if you have not done so already: + ``` + $ databricks configure + ``` + +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + +5. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. + +## Deploy and run jobs + +1. Create a new virtual environment and install the required dependencies: + ``` + $ uv sync + ``` + +2. To deploy the bundle to the development target: + ``` + $ databricks bundle deploy --target dev + ``` + + *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_jobs_as_code_job` to your workspace. + You can find that job by opening your workspace and clicking on **Workflows**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_jobs_as_code_job.py). The schedule + is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). + +4. To run a job: + ``` + $ databricks bundle run + ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml new file mode 100644 index 000000000..fd87aa381 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks asset bundle definition for my_jobs_as_code. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_jobs_as_code + uuid: + +experimental: + python: + # Activate virtual environment before loading resources defined in Python. + # If disabled, defaults to using the Python interpreter available in the current shell. + venv_path: .venv + # Functions called to load resources defined in Python. See resources/__init__.py + resources: + - "resources:load_resources" + +artifacts: + default: + type: whl + path: . + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml new file mode 100644 index 000000000..28240e3ec --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml @@ -0,0 +1,49 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "my_jobs_as_code" +requires-python = ">=3.10" +description = "wheel file based on my_jobs_as_code" + +# Dependencies in case the output wheel file is used as a library dependency. +# For defining dependencies, when this package is used in Databricks, see: +# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +# +# Example: +# dependencies = [ +# "requests==x.y.z", +# ] +dependencies = [ +] + +# see setup.py +dynamic = ["version"] + +[project.entry-points.packages] +main = "my_jobs_as_code.main:main" + +[tool.setuptools] +py-modules = ["resources", "my_jobs_as_code"] + +[tool.uv] +## Dependencies for local development +dev-dependencies = [ + "databricks-bundles==0.7.0", + + ## Add code completion support for DLT + # "databricks-dlt", + + ## databricks-connect can be used to run parts of this project locally. + ## See https://docs.databricks.com/dev-tools/databricks-connect.html. + ## + ## Uncomment line below to install a version of db-connect that corresponds to + ## the Databricks Runtime version used for this project. + # "databricks-connect>=15.4,<15.5", +] + +override-dependencies = [ + # pyspark package conflicts with 'databricks-connect' + "pyspark; sys_platform == 'never'", +] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py new file mode 100644 index 000000000..fbcb9dc5f --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import ( + Bundle, + Resources, + load_resources_from_current_package_module, +) + + +def load_resources(bundle: Bundle) -> Resources: + """ + 'load_resources' function is referenced in databricks.yml and is responsible for loading + bundle resources defined in Python code. This function is called by Databricks CLI during + bundle deployment. After deployment, this function is not used. + """ + + # the default implementation loads all Python files in 'resources' directory + return load_resources_from_current_package_module() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py new file mode 100644 index 000000000..4854d656f --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py @@ -0,0 +1,67 @@ +from databricks.bundles.jobs import Job + +""" +The main job for my_jobs_as_code. +""" + + +my_jobs_as_code_job = Job.from_dict( + { + "name": "my_jobs_as_code_job", + "trigger": { + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + "periodic": { + "interval": 1, + "unit": "DAYS", + }, + }, + "email_notifications": { + "on_failure": [ + "$USERNAME", + ], + }, + "tasks": [ + { + "task_key": "notebook_task", + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "src/notebook.ipynb", + }, + }, + { + "task_key": "main_task", + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + "job_cluster_key": "job_cluster", + "python_wheel_task": { + "package_name": "my_jobs_as_code", + "entry_point": "main", + }, + "libraries": [ + # By default we just include the .whl file generated for the my_jobs_as_code package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + { + "whl": "dist/*.whl", + }, + ], + }, + ], + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "spark_version": "15.4.x-scala2.12", + "node_type_id": "i3.xlarge", + "autoscale": { + "min_workers": 1, + "max_workers": 4, + }, + }, + }, + ], + } +) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py new file mode 100644 index 000000000..ba284ba82 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py @@ -0,0 +1,18 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_jobs_as_code project. +""" + +import os + +from setuptools import setup + +local_version = os.getenv("LOCAL_VERSION") +version = "0.0.1" + +setup( + version=f"{version}+{local_version}" if local_version else version, +) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb new file mode 100644 index 000000000..9bc3f1560 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_jobs_as_code import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py new file mode 100644 index 000000000..13e100ee2 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py @@ -0,0 +1,8 @@ +from my_jobs_as_code.main import get_taxis, get_spark + +# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/script b/acceptance/bundle/templates/experimental-jobs-as-code/script new file mode 100644 index 000000000..2209aa7ab --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/script @@ -0,0 +1,12 @@ +trace $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output + +cd output/my_jobs_as_code + +# silence uv output because it's non-deterministic +uv sync 2> /dev/null + +# remove version constraint because it always creates a warning on dev builds +cat databricks.yml | grep -v databricks_cli_version > databricks.yml.new +mv databricks.yml.new databricks.yml + +trace $CLI bundle validate -t dev --output json | jq ".resources" diff --git a/acceptance/bundle/variables/arg-repeat/databricks.yml b/acceptance/bundle/variables/arg-repeat/databricks.yml new file mode 100644 index 000000000..377c6cfab --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: arg-repeat + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/arg-repeat/output.txt b/acceptance/bundle/variables/arg-repeat/output.txt new file mode 100644 index 000000000..48bd2033f --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/output.txt @@ -0,0 +1,20 @@ + +>>> errcode $CLI bundle validate --var a=one -o json + +Exit code: 0 +{ + "a": { + "default": "hello", + "value": "one" + } +} + +>>> errcode $CLI bundle validate --var a=one --var a=two +Error: failed to assign two to a: variable has already been assigned value: one + +Name: arg-repeat +Target: default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/variables/arg-repeat/script b/acceptance/bundle/variables/arg-repeat/script new file mode 100644 index 000000000..3e03dbcb1 --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle validate --var a=one -o json | jq .variables +trace errcode $CLI bundle validate --var a=one --var a=two diff --git a/acceptance/bundle/variables/complex-cross-ref/databricks.yml b/acceptance/bundle/variables/complex-cross-ref/databricks.yml new file mode 100644 index 000000000..4459f44df --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/databricks.yml @@ -0,0 +1,12 @@ +bundle: + name: complex-cross-ref + +variables: + a: + default: + a_1: 500 + a_2: ${var.b.b_2} + b: + default: + b_1: ${var.a.a_1} + b_2: 2.5 diff --git a/acceptance/bundle/variables/complex-cross-ref/output.txt b/acceptance/bundle/variables/complex-cross-ref/output.txt new file mode 100644 index 000000000..f1b624d29 --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/output.txt @@ -0,0 +1,22 @@ +{ + "a": { + "default": { + "a_1": 500, + "a_2": 2.5 + }, + "value": { + "a_1": 500, + "a_2": 2.5 + } + }, + "b": { + "default": { + "b_1": 500, + "b_2": 2.5 + }, + "value": { + "b_1": 500, + "b_2": 2.5 + } + } +} diff --git a/acceptance/bundle/variables/complex-cross-ref/script b/acceptance/bundle/variables/complex-cross-ref/script new file mode 100644 index 000000000..0e53f237e --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .variables diff --git a/acceptance/bundle/variables/complex-cycle-self/databricks.yml b/acceptance/bundle/variables/complex-cycle-self/databricks.yml new file mode 100644 index 000000000..bb461795c --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/databricks.yml @@ -0,0 +1,7 @@ +bundle: + name: cycle + +variables: + a: + default: + hello: ${var.a} diff --git a/acceptance/bundle/variables/complex-cycle-self/output.txt b/acceptance/bundle/variables/complex-cycle-self/output.txt new file mode 100644 index 000000000..fa80154ca --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/output.txt @@ -0,0 +1,9 @@ +Warning: Detected unresolved variables after 11 resolution rounds + +Name: cycle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/cycle/default + +Found 1 warning diff --git a/acceptance/bundle/variables/complex-cycle-self/script b/acceptance/bundle/variables/complex-cycle-self/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/variables/complex-cycle/databricks.yml b/acceptance/bundle/variables/complex-cycle/databricks.yml new file mode 100644 index 000000000..9784a4e25 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: cycle + +variables: + a: + default: + hello: ${var.b} + b: + default: + hello: ${var.a} diff --git a/acceptance/bundle/variables/complex-cycle/output.txt b/acceptance/bundle/variables/complex-cycle/output.txt new file mode 100644 index 000000000..fa80154ca --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/output.txt @@ -0,0 +1,9 @@ +Warning: Detected unresolved variables after 11 resolution rounds + +Name: cycle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/cycle/default + +Found 1 warning diff --git a/acceptance/bundle/variables/complex-cycle/script b/acceptance/bundle/variables/complex-cycle/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/variables/complex-simple/databricks.yml b/acceptance/bundle/variables/complex-simple/databricks.yml new file mode 100644 index 000000000..135ff86cf --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/databricks.yml @@ -0,0 +1,27 @@ +# This example works and properly merges resources.jobs.job1.job_clusters.new_cluster and ${var.cluster}. +# retaining num_workers, spark_version and overriding node_type_id. +bundle: + name: TestResolveComplexVariable + +variables: + cluster: + type: "complex" + value: + node_type_id: "Standard_DS3_v2" + num_workers: 2 + +resources: + jobs: + job1: + job_clusters: + - new_cluster: + node_type_id: "random" + spark_version: 13.3.x-scala2.12 + +targets: + dev: + resources: + jobs: + job1: + job_clusters: + - new_cluster: ${var.cluster} diff --git a/acceptance/bundle/variables/complex-simple/output.txt b/acceptance/bundle/variables/complex-simple/output.txt new file mode 100644 index 000000000..16b0ec80f --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/output.txt @@ -0,0 +1,10 @@ +[ + { + "job_cluster_key": "", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "spark_version": "13.3.x-scala2.12" + } + } +] diff --git a/acceptance/bundle/variables/complex-simple/script b/acceptance/bundle/variables/complex-simple/script new file mode 100644 index 000000000..1c31d0b40 --- /dev/null +++ b/acceptance/bundle/variables/complex-simple/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.job_clusters diff --git a/acceptance/bundle/variables/complex-transitive-deep/databricks.yml b/acceptance/bundle/variables/complex-transitive-deep/databricks.yml new file mode 100644 index 000000000..1357c291a --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: complex-transitive + +variables: + catalog: + default: hive_metastore + spark_conf_1: + default: + "spark.databricks.sql.initial.catalog.name": ${var.catalog} + spark_conf: + default: ${var.spark_conf_1} + etl_cluster_config: + type: complex + default: + spark_version: 14.3.x-scala2.12 + runtime_engine: PHOTON + spark_conf: ${var.spark_conf} + +resources: + clusters: + my_cluster: ${var.etl_cluster_config} diff --git a/acceptance/bundle/variables/complex-transitive-deep/output.txt b/acceptance/bundle/variables/complex-transitive-deep/output.txt new file mode 100644 index 000000000..29c41cda5 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/output.txt @@ -0,0 +1,3 @@ +{ + "spark.databricks.sql.initial.catalog.name": "hive_metastore" +} diff --git a/acceptance/bundle/variables/complex-transitive-deep/script b/acceptance/bundle/variables/complex-transitive-deep/script new file mode 100644 index 000000000..52bb08ed4 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/script @@ -0,0 +1,2 @@ +# Currently, this incorrectly outputs variable reference instead of resolved value +$CLI bundle validate -o json | jq '.resources.clusters.my_cluster.spark_conf' diff --git a/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml b/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml new file mode 100644 index 000000000..3f9bea464 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/databricks.yml @@ -0,0 +1,22 @@ +bundle: + name: complex-transitive-deeper + +variables: + catalog_1: + default: + name: hive_metastore + catalog: + default: ${var.catalog_1} + spark_conf: + default: + "spark.databricks.sql.initial.catalog.name": ${var.catalog.name} + etl_cluster_config: + type: complex + default: + spark_version: 14.3.x-scala2.12 + runtime_engine: PHOTON + spark_conf: ${var.spark_conf} + +resources: + clusters: + my_cluster: ${var.etl_cluster_config} diff --git a/acceptance/bundle/variables/complex-transitive-deeper/output.txt b/acceptance/bundle/variables/complex-transitive-deeper/output.txt new file mode 100644 index 000000000..3bedbfb9a --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/output.txt @@ -0,0 +1,7 @@ +Error: expected a map to index "variables.catalog.value.name", found string + +{ + "my_cluster": "${var.etl_cluster_config}" +} + +Exit code: 1 diff --git a/acceptance/bundle/variables/complex-transitive-deeper/script b/acceptance/bundle/variables/complex-transitive-deeper/script new file mode 100644 index 000000000..d4fb404b1 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deeper/script @@ -0,0 +1,2 @@ +# Currently, this errors instead of interpolating variables +$CLI bundle validate -o json | jq '.resources.clusters' diff --git a/acceptance/bundle/variables/complex-transitive/output.txt b/acceptance/bundle/variables/complex-transitive/output.txt index a031e0497..29c41cda5 100644 --- a/acceptance/bundle/variables/complex-transitive/output.txt +++ b/acceptance/bundle/variables/complex-transitive/output.txt @@ -1,3 +1,3 @@ { - "spark.databricks.sql.initial.catalog.name": "${var.catalog}" + "spark.databricks.sql.initial.catalog.name": "hive_metastore" } diff --git a/acceptance/bundle/variables/complex-with-var-reference/databricks.yml b/acceptance/bundle/variables/complex-with-var-reference/databricks.yml new file mode 100644 index 000000000..104f9a470 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/databricks.yml @@ -0,0 +1,17 @@ +bundle: + name: TestResolveComplexVariableWithVarReference + +variables: + package_version: + default: "1.0.0" + cluster_libraries: + type: "complex" + default: + - pypi: + package: "cicd_template==${var.package_version}" + +resources: + jobs: + job1: + tasks: + - libraries: ${var.cluster_libraries} diff --git a/acceptance/bundle/variables/complex-with-var-reference/output.txt b/acceptance/bundle/variables/complex-with-var-reference/output.txt new file mode 100644 index 000000000..a5b792ac4 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/output.txt @@ -0,0 +1,12 @@ +[ + { + "libraries": [ + { + "pypi": { + "package": "cicd_template==1.0.0" + } + } + ], + "task_key": "" + } +] diff --git a/acceptance/bundle/variables/complex-with-var-reference/script b/acceptance/bundle/variables/complex-with-var-reference/script new file mode 100644 index 000000000..0f7353ad1 --- /dev/null +++ b/acceptance/bundle/variables/complex-with-var-reference/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.tasks diff --git a/acceptance/bundle/variables/complex-within-complex/databricks.yml b/acceptance/bundle/variables/complex-within-complex/databricks.yml new file mode 100644 index 000000000..f1d77289e --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/databricks.yml @@ -0,0 +1,34 @@ +# Does not work currently, explicitly disabled, even though it works if you remove 'type: "complex"' lines +# Also fails to merge clusters. +bundle: + name: TestResolveComplexVariableReferencesWithComplexVariablesError + +variables: + cluster: + type: "complex" + value: + node_type_id: "Standard_DS3_v2" + num_workers: 2 + spark_conf: "${var.spark_conf}" + spark_conf: + type: "complex" + value: + spark.executor.memory: "4g" + spark.executor.cores: "2" + +resources: + jobs: + job1: + job_clusters: + - job_cluster_key: my_cluster + new_cluster: + node_type_id: "random" + +targets: + dev: + resources: + jobs: + job1: + job_clusters: + - job_cluster_key: my_cluster + new_cluster: ${var.cluster} diff --git a/acceptance/bundle/variables/complex-within-complex/output.txt b/acceptance/bundle/variables/complex-within-complex/output.txt new file mode 100644 index 000000000..72e6ef69a --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/output.txt @@ -0,0 +1,17 @@ +Warning: unknown field: node_type_id + at resources.jobs.job1.job_clusters[0] + in databricks.yml:25:11 + +[ + { + "job_cluster_key": "my_cluster", + "new_cluster": { + "node_type_id": "Standard_DS3_v2", + "num_workers": 2, + "spark_conf": { + "spark.executor.cores": "2", + "spark.executor.memory": "4g" + } + } + } +] diff --git a/acceptance/bundle/variables/complex-within-complex/script b/acceptance/bundle/variables/complex-within-complex/script new file mode 100644 index 000000000..1c31d0b40 --- /dev/null +++ b/acceptance/bundle/variables/complex-within-complex/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .resources.jobs.job1.job_clusters diff --git a/acceptance/bundle/variables/complex/out.default.json b/acceptance/bundle/variables/complex/out.default.json index 6454562a6..a1ccd52bc 100644 --- a/acceptance/bundle/variables/complex/out.default.json +++ b/acceptance/bundle/variables/complex/out.default.json @@ -4,7 +4,7 @@ "my_job": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables/default/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/complex-variables/default/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/variables/complex/out.dev.json b/acceptance/bundle/variables/complex/out.dev.json index cede5feb2..bb939091b 100644 --- a/acceptance/bundle/variables/complex/out.dev.json +++ b/acceptance/bundle/variables/complex/out.dev.json @@ -4,7 +4,7 @@ "my_job": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables/dev/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/complex-variables/dev/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/variables/complex_multiple_files/output.txt b/acceptance/bundle/variables/complex_multiple_files/output.txt index e87b8df11..ec2cad1ce 100644 --- a/acceptance/bundle/variables/complex_multiple_files/output.txt +++ b/acceptance/bundle/variables/complex_multiple_files/output.txt @@ -4,7 +4,7 @@ "my_job": { "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables-multiple-files/dev/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/complex-variables-multiple-files/dev/state/metadata.json" }, "edit_mode": "UI_LOCKED", "format": "MULTI_TASK", diff --git a/acceptance/bundle/variables/cycle/databricks.yml b/acceptance/bundle/variables/cycle/databricks.yml new file mode 100644 index 000000000..b35196671 --- /dev/null +++ b/acceptance/bundle/variables/cycle/databricks.yml @@ -0,0 +1,8 @@ +bundle: + name: cycle + +variables: + a: + default: ${var.b} + b: + default: ${var.a} diff --git a/acceptance/bundle/variables/cycle/output.txt b/acceptance/bundle/variables/cycle/output.txt new file mode 100644 index 000000000..ea9c95cd4 --- /dev/null +++ b/acceptance/bundle/variables/cycle/output.txt @@ -0,0 +1,14 @@ +Error: cycle detected in field resolution: variables.a.default -> var.b -> var.a -> var.b + +{ + "a": { + "default": "${var.b}", + "value": "${var.b}" + }, + "b": { + "default": "${var.a}", + "value": "${var.a}" + } +} + +Exit code: 1 diff --git a/acceptance/bundle/variables/cycle/script b/acceptance/bundle/variables/cycle/script new file mode 100644 index 000000000..0e53f237e --- /dev/null +++ b/acceptance/bundle/variables/cycle/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .variables diff --git a/acceptance/bundle/variables/empty/output.txt b/acceptance/bundle/variables/empty/output.txt index c3f0af130..261635920 100644 --- a/acceptance/bundle/variables/empty/output.txt +++ b/acceptance/bundle/variables/empty/output.txt @@ -3,8 +3,8 @@ Error: no value assigned to required variable a. Assignment can be done through Name: empty${var.a} Target: default Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/empty${var.a}/default + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/empty${var.a}/default Found 1 error diff --git a/acceptance/bundle/variables/env_overrides/output.txt b/acceptance/bundle/variables/env_overrides/output.txt index e8fb99938..f42f82211 100644 --- a/acceptance/bundle/variables/env_overrides/output.txt +++ b/acceptance/bundle/variables/env_overrides/output.txt @@ -14,8 +14,8 @@ Error: no value assigned to required variable b. Assignment can be done through Name: test bundle Target: env-missing-a-required-variable-assignment Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/test bundle/env-missing-a-required-variable-assignment + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/test bundle/env-missing-a-required-variable-assignment Found 1 error diff --git a/acceptance/bundle/variables/git-branch/databricks.yml b/acceptance/bundle/variables/git-branch/databricks.yml new file mode 100644 index 000000000..7cf210722 --- /dev/null +++ b/acceptance/bundle/variables/git-branch/databricks.yml @@ -0,0 +1,19 @@ +bundle: + name: git + git: + # This is currently not supported + branch: ${var.deployment_branch} + +variables: + deployment_branch: + # By setting deployment_branch to "" we set bundle.git.branch to "" which is the same unsetting it. + # This this should make CLI read branch from git and update bundle.git.branch accordingly. It should + # Also set bundle.git.inferred to true. + default: "" + +targets: + prod: + default: true + dev: + variables: + deployment_branch: dev-branch diff --git a/acceptance/bundle/variables/git-branch/output.txt b/acceptance/bundle/variables/git-branch/output.txt new file mode 100644 index 000000000..d6d824394 --- /dev/null +++ b/acceptance/bundle/variables/git-branch/output.txt @@ -0,0 +1,98 @@ + +>>> $CLI bundle validate -o json +{ + "bundle": { + "environment": "prod", + "git": { + "actual_branch": "main", + "branch": "", + "bundle_root_path": ".", + }, + "name": "git", + "target": "prod", + "terraform": { + "exec_path": "$TMPHOME" + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "deployment_branch": { + "default": "", + "value": "" + } + }, + "workspace": { + "artifact_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/files", + "resource_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/resources", + "root_path": "/Workspace/Users/$USERNAME/.bundle/git/prod", + "state_path": "/Workspace/Users/$USERNAME/.bundle/git/prod/state" + } +} + +>>> $CLI bundle validate +Name: git +Target: prod +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/git/prod + +Validation OK! + +>>> $CLI bundle validate -o json -t dev +{ + "bundle": { + "environment": "dev", + "git": { + "actual_branch": "main", + "branch": "dev-branch", + "bundle_root_path": ".", + }, + "name": "git", + "target": "dev", + "terraform": { + "exec_path": "$TMPHOME" + } + }, + "sync": { + "paths": [ + "." + ] + }, + "targets": null, + "variables": { + "deployment_branch": { + "default": "dev-branch", + "value": "dev-branch" + } + }, + "workspace": { + "artifact_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/files", + "resource_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/resources", + "root_path": "/Workspace/Users/$USERNAME/.bundle/git/dev", + "state_path": "/Workspace/Users/$USERNAME/.bundle/git/dev/state" + } +} + +>>> $CLI bundle validate -t dev +Name: git +Target: dev +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/git/dev + +Validation OK! diff --git a/acceptance/bundle/variables/git-branch/script b/acceptance/bundle/variables/git-branch/script new file mode 100644 index 000000000..aed881f1f --- /dev/null +++ b/acceptance/bundle/variables/git-branch/script @@ -0,0 +1,6 @@ +git-repo-init +trace $CLI bundle validate -o json | grep -v '"commit"' +trace $CLI bundle validate +trace $CLI bundle validate -o json -t dev | grep -v '"commit"' +trace $CLI bundle validate -t dev | grep -v '"commit"' +rm -fr .git diff --git a/acceptance/bundle/variables/resolve-builtin/output.txt b/acceptance/bundle/variables/resolve-builtin/output.txt index 2f58abd8a..f060c472e 100644 --- a/acceptance/bundle/variables/resolve-builtin/output.txt +++ b/acceptance/bundle/variables/resolve-builtin/output.txt @@ -1,8 +1,8 @@ { "artifact_path": "TestResolveVariableReferences/bar/artifacts", "current_user": { - "short_name": "tester", - "userName": "tester@databricks.com" + "short_name": "$USERNAME", + "userName": "$USERNAME" }, "file_path": "TestResolveVariableReferences/bar/baz", "resource_path": "TestResolveVariableReferences/bar/resources", diff --git a/acceptance/bundle/variables/resolve-nonstrings/databricks.yml b/acceptance/bundle/variables/resolve-nonstrings/databricks.yml new file mode 100644 index 000000000..a02c78a7e --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/databricks.yml @@ -0,0 +1,23 @@ +bundle: + name: TestResolveVariableReferencesForPrimitiveNonStringFields + +variables: + no_alert_for_canceled_runs: {} + no_alert_for_skipped_runs: {} + min_workers: {} + max_workers: {} + spot_bid_max_price: {} + +resources: + jobs: + job1: + notification_settings: + no_alert_for_canceled_runs: ${var.no_alert_for_canceled_runs} + no_alert_for_skipped_runs: ${var.no_alert_for_skipped_runs} + tasks: + - new_cluster: + autoscale: + min_workers: ${var.min_workers} + max_workers: ${var.max_workers} + azure_attributes: + spot_bid_max_price: ${var.spot_bid_max_price} diff --git a/acceptance/bundle/variables/resolve-nonstrings/output.txt b/acceptance/bundle/variables/resolve-nonstrings/output.txt new file mode 100644 index 000000000..3a1eb9c47 --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/output.txt @@ -0,0 +1,52 @@ +{ + "variables": { + "max_workers": { + "value": "2" + }, + "min_workers": { + "value": "1" + }, + "no_alert_for_canceled_runs": { + "value": "true" + }, + "no_alert_for_skipped_runs": { + "value": "false" + }, + "spot_bid_max_price": { + "value": "0.5" + } + }, + "jobs": { + "job1": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/TestResolveVariableReferencesForPrimitiveNonStringFields/default/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "format": "MULTI_TASK", + "notification_settings": { + "no_alert_for_canceled_runs": true, + "no_alert_for_skipped_runs": false + }, + "permissions": [], + "queue": { + "enabled": true + }, + "tags": {}, + "tasks": [ + { + "new_cluster": { + "autoscale": { + "max_workers": 2, + "min_workers": 1 + }, + "azure_attributes": { + "spot_bid_max_price": 0.5 + } + }, + "task_key": "" + } + ] + } + } +} diff --git a/acceptance/bundle/variables/resolve-nonstrings/script b/acceptance/bundle/variables/resolve-nonstrings/script new file mode 100644 index 000000000..cb9e45b61 --- /dev/null +++ b/acceptance/bundle/variables/resolve-nonstrings/script @@ -0,0 +1,4 @@ +export BUNDLE_VAR_no_alert_for_skipped_runs=false +export BUNDLE_VAR_max_workers=2 +export BUNDLE_VAR_min_workers=3 # shadowed by --var below +$CLI bundle validate -o json --var no_alert_for_canceled_runs=true --var min_workers=1 --var spot_bid_max_price=0.5 | jq '{ variables, jobs: .resources.jobs }' diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml b/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml new file mode 100644 index 000000000..6a45de330 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/databricks.yml @@ -0,0 +1,9 @@ +bundle: + name: TestResolveVariableReferencesToBundleVariables + +workspace: + root_path: "${bundle.name}/${var.foo}" + +variables: + foo: + value: "bar" diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt b/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt new file mode 100644 index 000000000..c56fbe415 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/output.txt @@ -0,0 +1,11 @@ +{ + "artifact_path": "TestResolveVariableReferencesToBundleVariables/bar/artifacts", + "current_user": { + "short_name": "$USERNAME", + "userName": "$USERNAME" + }, + "file_path": "TestResolveVariableReferencesToBundleVariables/bar/files", + "resource_path": "TestResolveVariableReferencesToBundleVariables/bar/resources", + "root_path": "TestResolveVariableReferencesToBundleVariables/bar", + "state_path": "TestResolveVariableReferencesToBundleVariables/bar/state" +} diff --git a/acceptance/bundle/variables/resolve-vars-in-root-path/script b/acceptance/bundle/variables/resolve-vars-in-root-path/script new file mode 100644 index 000000000..fefd9abe6 --- /dev/null +++ b/acceptance/bundle/variables/resolve-vars-in-root-path/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .workspace diff --git a/acceptance/bundle/variables/vanilla/output.txt b/acceptance/bundle/variables/vanilla/output.txt index 69b358a3f..1d88bd060 100644 --- a/acceptance/bundle/variables/vanilla/output.txt +++ b/acceptance/bundle/variables/vanilla/output.txt @@ -8,8 +8,8 @@ Error: no value assigned to required variable b. Assignment can be done through Name: ${var.a} ${var.b} Target: default Workspace: - User: tester@databricks.com - Path: /Workspace/Users/tester@databricks.com/.bundle/${var.a} ${var.b}/default + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/${var.a} ${var.b}/default Found 1 error diff --git a/acceptance/bundle/variables/variable_overrides_in_target/output.txt b/acceptance/bundle/variables/variable_overrides_in_target/output.txt index de193f5b6..8998b691d 100644 --- a/acceptance/bundle/variables/variable_overrides_in_target/output.txt +++ b/acceptance/bundle/variables/variable_overrides_in_target/output.txt @@ -12,7 +12,7 @@ "continuous": true, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/use-default-variable-values/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/use-default-variable-values/state/metadata.json" }, "name": "a_string", "permissions": [] @@ -33,7 +33,7 @@ "continuous": true, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-string-variable/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/override-string-variable/state/metadata.json" }, "name": "overridden_string", "permissions": [] @@ -54,7 +54,7 @@ "continuous": true, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-int-variable/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/override-int-variable/state/metadata.json" }, "name": "a_string", "permissions": [] @@ -75,7 +75,7 @@ "continuous": false, "deployment": { "kind": "BUNDLE", - "metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-both-bool-and-string-variables/state/metadata.json" + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/foobar/override-both-bool-and-string-variables/state/metadata.json" }, "name": "overridden_string", "permissions": [] diff --git a/acceptance/script.prepare b/acceptance/script.prepare index 3f1bb2acc..61061b59e 100644 --- a/acceptance/script.prepare +++ b/acceptance/script.prepare @@ -1,6 +1,3 @@ -# Prevent CLI from downloading terraform in each test: -export DATABRICKS_TF_EXEC_PATH=/tmp/ - errcode() { # Temporarily disable 'set -e' to prevent the script from exiting on error set +e @@ -34,3 +31,12 @@ trace() { return $? } + +git-repo-init() { + git init -qb main + git config --global core.autocrlf false + git config user.name "Tester" + git config user.email "tester@databricks.com" + git add databricks.yml + git commit -qm 'Add databricks.yml' +} diff --git a/acceptance/server_test.go b/acceptance/server_test.go index 7b21e198f..0d10fbea1 100644 --- a/acceptance/server_test.go +++ b/acceptance/server_test.go @@ -2,11 +2,11 @@ package acceptance_test import ( "encoding/json" - "net" "net/http" "net/http/httptest" "testing" + "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/workspace" @@ -14,8 +14,7 @@ import ( type TestServer struct { *httptest.Server - Mux *http.ServeMux - Port int + Mux *http.ServeMux } type HandlerFunc func(r *http.Request) (any, error) @@ -23,12 +22,10 @@ type HandlerFunc func(r *http.Request) (any, error) func NewTestServer() *TestServer { mux := http.NewServeMux() server := httptest.NewServer(mux) - port := server.Listener.Addr().(*net.TCPAddr).Port return &TestServer{ Server: server, Mux: mux, - Port: port, } } @@ -126,4 +123,27 @@ func AddHandlers(server *TestServer) { ResourceId: "1001", }, nil }) + + server.Handle("/api/2.1/unity-catalog/current-metastore-assignment", func(r *http.Request) (any, error) { + return catalog.MetastoreAssignment{ + DefaultCatalogName: "main", + }, nil + }) + + server.Handle("/api/2.0/permissions/directories/1001", func(r *http.Request) (any, error) { + return workspace.WorkspaceObjectPermissions{ + ObjectId: "1001", + ObjectType: "DIRECTORY", + AccessControlList: []workspace.WorkspaceObjectAccessControlResponse{ + { + UserName: "tester@databricks.com", + AllPermissions: []workspace.WorkspaceObjectPermission{ + { + PermissionLevel: "CAN_MANAGE", + }, + }, + }, + }, + }, nil + }) } diff --git a/bundle/apps/interpolate_variables_test.go b/bundle/apps/interpolate_variables_test.go index a2909006f..b6c424a95 100644 --- a/bundle/apps/interpolate_variables_test.go +++ b/bundle/apps/interpolate_variables_test.go @@ -44,6 +44,6 @@ func TestAppInterpolateVariables(t *testing.T) { diags := bundle.Apply(context.Background(), b, InterpolateVariables()) require.Empty(t, diags) - require.Equal(t, []any([]any{map[string]any{"name": "JOB_ID", "value": "123"}}), b.Config.Resources.Apps["my_app_1"].Config["env"]) + require.Equal(t, []any{map[string]any{"name": "JOB_ID", "value": "123"}}, b.Config.Resources.Apps["my_app_1"].Config["env"]) require.Nil(t, b.Config.Resources.Apps["my_app_2"].Config) } diff --git a/bundle/apps/slow_deploy_message.go b/bundle/apps/slow_deploy_message.go new file mode 100644 index 000000000..87275980a --- /dev/null +++ b/bundle/apps/slow_deploy_message.go @@ -0,0 +1,29 @@ +package apps + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" +) + +type slowDeployMessage struct{} + +// TODO: needs to be removed when when no_compute option becomes available in TF provider and used in DABs +// See https://github.com/databricks/cli/pull/2144 +func (v *slowDeployMessage) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if len(b.Config.Resources.Apps) > 0 { + cmdio.LogString(ctx, "Note: Databricks apps included in this bundle may increase initial deployment time due to compute provisioning.") + } + + return nil +} + +func (v *slowDeployMessage) Name() string { + return "apps.SlowDeployMessage" +} + +func SlowDeployMessage() bundle.Mutator { + return &slowDeployMessage{} +} diff --git a/bundle/config/mutator/capture_schema_dependency.go b/bundle/config/mutator/capture_schema_dependency.go new file mode 100644 index 000000000..5025c9a0d --- /dev/null +++ b/bundle/config/mutator/capture_schema_dependency.go @@ -0,0 +1,100 @@ +package mutator + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" +) + +type captureSchemaDependency struct{} + +// If a user defines a UC schema in the bundle, they can refer to it in DLT pipelines +// or UC Volumes using the `${resources.schemas..name}` syntax. Using this +// syntax allows TF to capture the deploy time dependency this DLT pipeline or UC Volume +// has on the schema and deploy changes to the schema before deploying the pipeline or volume. +// +// This mutator translates any implicit schema references in DLT pipelines or UC Volumes +// to the explicit syntax. +func CaptureSchemaDependency() bundle.Mutator { + return &captureSchemaDependency{} +} + +func (m *captureSchemaDependency) Name() string { + return "CaptureSchemaDependency" +} + +func schemaNameRef(key string) string { + return fmt.Sprintf("${resources.schemas.%s.name}", key) +} + +func findSchema(b *bundle.Bundle, catalogName, schemaName string) (string, *resources.Schema) { + if catalogName == "" || schemaName == "" { + return "", nil + } + + for k, s := range b.Config.Resources.Schemas { + if s != nil && s.CreateSchema != nil && s.CatalogName == catalogName && s.Name == schemaName { + return k, s + } + } + return "", nil +} + +func resolveVolume(v *resources.Volume, b *bundle.Bundle) { + if v == nil || v.CreateVolumeRequestContent == nil { + return + } + schemaK, schema := findSchema(b, v.CatalogName, v.SchemaName) + if schema == nil { + return + } + + v.SchemaName = schemaNameRef(schemaK) +} + +func resolvePipelineSchema(p *resources.Pipeline, b *bundle.Bundle) { + if p == nil || p.PipelineSpec == nil { + return + } + if p.Schema == "" { + return + } + schemaK, schema := findSchema(b, p.Catalog, p.Schema) + if schema == nil { + return + } + + p.Schema = schemaNameRef(schemaK) +} + +func resolvePipelineTarget(p *resources.Pipeline, b *bundle.Bundle) { + if p == nil || p.PipelineSpec == nil { + return + } + if p.Target == "" { + return + } + schemaK, schema := findSchema(b, p.Catalog, p.Target) + if schema == nil { + return + } + p.Target = schemaNameRef(schemaK) +} + +func (m *captureSchemaDependency) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + for _, p := range b.Config.Resources.Pipelines { + // "schema" and "target" have the same semantics in the DLT API but are mutually + // exclusive i.e. only one can be set at a time. If schema is set, the pipeline + // is in direct publishing mode and can write tables to multiple schemas + // (vs target which is limited to a single schema). + resolvePipelineTarget(p, b) + resolvePipelineSchema(p, b) + } + for _, v := range b.Config.Resources.Volumes { + resolveVolume(v, b) + } + return nil +} diff --git a/bundle/config/mutator/capture_schema_dependency_test.go b/bundle/config/mutator/capture_schema_dependency_test.go new file mode 100644 index 000000000..0a94e7748 --- /dev/null +++ b/bundle/config/mutator/capture_schema_dependency_test.go @@ -0,0 +1,277 @@ +package mutator + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCaptureSchemaDependencyForVolume(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Volumes: map[string]*resources.Volume{ + "volume1": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "foobar", + }, + }, + "volume2": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog2", + SchemaName: "foobar", + }, + }, + "volume3": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "barfoo", + }, + }, + "volume4": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalogX", + SchemaName: "foobar", + }, + }, + "volume5": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "schemaX", + }, + }, + "nilVolume": nil, + "emptyVolume": {}, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Volumes["volume1"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Volumes["volume2"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Volumes["volume3"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "foobar", b.Config.Resources.Volumes["volume4"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "schemaX", b.Config.Resources.Volumes["volume5"].CreateVolumeRequestContent.SchemaName) + + assert.Nil(t, b.Config.Resources.Volumes["nilVolume"]) + assert.Nil(t, b.Config.Resources.Volumes["emptyVolume"].CreateVolumeRequestContent) +} + +func TestCaptureSchemaDependencyForPipelinesWithTarget(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "foobar", + }, + }, + "pipeline2": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog2", + Schema: "foobar", + }, + }, + "pipeline3": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "barfoo", + }, + }, + "pipeline4": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalogX", + Schema: "foobar", + }, + }, + "pipeline5": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "schemaX", + }, + }, + "pipeline6": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Schema: "foobar", + }, + }, + "pipeline7": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Schema: "", + Name: "whatever", + }, + }, + "nilPipeline": nil, + "emptyPipeline": {}, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Pipelines["pipeline1"].Schema) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Pipelines["pipeline2"].Schema) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Pipelines["pipeline3"].Schema) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline4"].Schema) + assert.Equal(t, "schemaX", b.Config.Resources.Pipelines["pipeline5"].Schema) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline6"].Schema) + assert.Equal(t, "", b.Config.Resources.Pipelines["pipeline7"].Schema) + + assert.Nil(t, b.Config.Resources.Pipelines["nilPipeline"]) + assert.Nil(t, b.Config.Resources.Pipelines["emptyPipeline"].PipelineSpec) + + for _, k := range []string{"pipeline1", "pipeline2", "pipeline3", "pipeline4", "pipeline5", "pipeline6", "pipeline7"} { + assert.Empty(t, b.Config.Resources.Pipelines[k].Target) + } +} + +func TestCaptureSchemaDependencyForPipelinesWithSchema(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "foobar", + }, + }, + "pipeline2": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog2", + Target: "foobar", + }, + }, + "pipeline3": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "barfoo", + }, + }, + "pipeline4": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalogX", + Target: "foobar", + }, + }, + "pipeline5": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "schemaX", + }, + }, + "pipeline6": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Target: "foobar", + }, + }, + "pipeline7": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Target: "", + Name: "whatever", + }, + }, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Pipelines["pipeline1"].Target) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Pipelines["pipeline2"].Target) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Pipelines["pipeline3"].Target) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline4"].Target) + assert.Equal(t, "schemaX", b.Config.Resources.Pipelines["pipeline5"].Target) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline6"].Target) + assert.Equal(t, "", b.Config.Resources.Pipelines["pipeline7"].Target) + + for _, k := range []string{"pipeline1", "pipeline2", "pipeline3", "pipeline4", "pipeline5", "pipeline6", "pipeline7"} { + assert.Empty(t, b.Config.Resources.Pipelines[k].Schema) + } +} diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 7ad3dfd8d..9aa93791f 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -3,6 +3,7 @@ package mutator import ( "context" "errors" + "fmt" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" @@ -13,15 +14,37 @@ import ( "github.com/databricks/cli/libs/dyn/dynvar" ) +/* +For pathological cases, output and time grow exponentially. + +On my laptop, timings for acceptance/bundle/variables/complex-cycle: +rounds time + + 9 0.10s + 10 0.13s + 11 0.27s + 12 0.68s + 13 1.98s + 14 6.28s + 15 21.70s + 16 78.16s +*/ +const maxResolutionRounds = 11 + type resolveVariableReferences struct { - prefixes []string - pattern dyn.Pattern - lookupFn func(dyn.Value, dyn.Path, *bundle.Bundle) (dyn.Value, error) - skipFn func(dyn.Value) bool + prefixes []string + pattern dyn.Pattern + lookupFn func(dyn.Value, dyn.Path, *bundle.Bundle) (dyn.Value, error) + skipFn func(dyn.Value) bool + extraRounds int } func ResolveVariableReferences(prefixes ...string) bundle.Mutator { - return &resolveVariableReferences{prefixes: prefixes, lookupFn: lookup} + return &resolveVariableReferences{ + prefixes: prefixes, + lookupFn: lookup, + extraRounds: maxResolutionRounds - 1, + } } func ResolveVariableReferencesInLookup() bundle.Mutator { @@ -32,19 +55,6 @@ func ResolveVariableReferencesInLookup() bundle.Mutator { }, pattern: dyn.NewPattern(dyn.Key("variables"), dyn.AnyKey(), dyn.Key("lookup")), lookupFn: lookupForVariables} } -func ResolveVariableReferencesInComplexVariables() bundle.Mutator { - return &resolveVariableReferences{ - prefixes: []string{ - "bundle", - "workspace", - "variables", - }, - pattern: dyn.NewPattern(dyn.Key("variables"), dyn.AnyKey(), dyn.Key("value")), - lookupFn: lookupForComplexVariables, - skipFn: skipResolvingInNonComplexVariables, - } -} - func lookup(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) { if path.String() == "workspace.file_path" { @@ -57,38 +67,6 @@ func lookup(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { return dyn.GetByPath(v, path) } -func lookupForComplexVariables(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { - if path[0].Key() != "variables" { - return lookup(v, path, b) - } - - varV, err := dyn.GetByPath(v, path[:len(path)-1]) - if err != nil { - return dyn.InvalidValue, err - } - - var vv variable.Variable - err = convert.ToTyped(&vv, varV) - if err != nil { - return dyn.InvalidValue, err - } - - if vv.Type == variable.VariableTypeComplex { - return dyn.InvalidValue, errors.New("complex variables cannot contain references to another complex variables") - } - - return lookup(v, path, b) -} - -func skipResolvingInNonComplexVariables(v dyn.Value) bool { - switch v.Kind() { - case dyn.KindMap, dyn.KindSequence: - return false - default: - return true - } -} - func lookupForVariables(v dyn.Value, path dyn.Path, b *bundle.Bundle) (dyn.Value, error) { if path[0].Key() != "variables" { return lookup(v, path, b) @@ -131,7 +109,36 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) varPath := dyn.NewPath(dyn.Key("var")) var diags diag.Diagnostics + maxRounds := 1 + m.extraRounds + for round := range maxRounds { + hasUpdates, newDiags := m.resolveOnce(b, prefixes, varPath) + + diags = diags.Extend(newDiags) + + if diags.HasError() { + break + } + + if !hasUpdates { + break + } + + if round >= maxRounds-1 { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: fmt.Sprintf("Detected unresolved variables after %d resolution rounds", round+1), + // Would be nice to include names of the variables there, but that would complicate things more + }) + break + } + } + return diags +} + +func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn.Path, varPath dyn.Path) (bool, diag.Diagnostics) { + var diags diag.Diagnostics + hasUpdates := false err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { // Synthesize a copy of the root that has all fields that are present in the type // but not set in the dynamic value set to their corresponding empty value. @@ -174,6 +181,7 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) if m.skipFn != nil && m.skipFn(v) { return dyn.InvalidValue, dynvar.ErrSkipResolution } + hasUpdates = true return m.lookupFn(normalized, path, b) } } @@ -194,5 +202,6 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) if err != nil { diags = diags.Extend(diag.FromErr(err)) } - return diags + + return hasUpdates, diags } diff --git a/bundle/config/mutator/resolve_variable_references_test.go b/bundle/config/mutator/resolve_variable_references_test.go index 18bb022aa..44f6c8dbb 100644 --- a/bundle/config/mutator/resolve_variable_references_test.go +++ b/bundle/config/mutator/resolve_variable_references_test.go @@ -7,321 +7,10 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/bundle/config/variable" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/pipelines" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func TestResolveVariableReferencesToBundleVariables(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Workspace: config.Workspace{ - RootPath: "${bundle.name}/${var.foo}", - }, - Variables: map[string]*variable.Variable{ - "foo": { - Value: "bar", - }, - }, - }, - } - - // Apply with a valid prefix. This should change the workspace root path. - diags := bundle.Apply(context.Background(), b, ResolveVariableReferences("bundle", "variables")) - require.NoError(t, diags.Error()) - require.Equal(t, "example/bar", b.Config.Workspace.RootPath) -} - -func TestResolveVariableReferencesForPrimitiveNonStringFields(t *testing.T) { - var diags diag.Diagnostics - - b := &bundle.Bundle{ - Config: config.Root{ - Variables: map[string]*variable.Variable{ - "no_alert_for_canceled_runs": {}, - "no_alert_for_skipped_runs": {}, - "min_workers": {}, - "max_workers": {}, - "spot_bid_max_price": {}, - }, - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - NotificationSettings: &jobs.JobNotificationSettings{ - NoAlertForCanceledRuns: false, - NoAlertForSkippedRuns: false, - }, - Tasks: []jobs.Task{ - { - NewCluster: &compute.ClusterSpec{ - Autoscale: &compute.AutoScale{ - MinWorkers: 0, - MaxWorkers: 0, - }, - AzureAttributes: &compute.AzureAttributes{ - SpotBidMaxPrice: 0.0, - }, - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Initialize the variables. - diags = bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.InitializeVariables([]string{ - "no_alert_for_canceled_runs=true", - "no_alert_for_skipped_runs=true", - "min_workers=1", - "max_workers=2", - "spot_bid_max_price=0.5", - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - // Assign the variables to the dynamic configuration. - diags = bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - // Set the notification settings. - p = dyn.MustPathFromString("resources.jobs.job1.notification_settings") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("no_alert_for_canceled_runs")), dyn.V("${var.no_alert_for_canceled_runs}")) - require.NoError(t, err) - v, err = dyn.SetByPath(v, p.Append(dyn.Key("no_alert_for_skipped_runs")), dyn.V("${var.no_alert_for_skipped_runs}")) - require.NoError(t, err) - - // Set the min and max workers. - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0].new_cluster.autoscale") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("min_workers")), dyn.V("${var.min_workers}")) - require.NoError(t, err) - v, err = dyn.SetByPath(v, p.Append(dyn.Key("max_workers")), dyn.V("${var.max_workers}")) - require.NoError(t, err) - - // Set the spot bid max price. - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0].new_cluster.azure_attributes") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("spot_bid_max_price")), dyn.V("${var.spot_bid_max_price}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - // Apply for the variable prefix. This should resolve the variables to their values. - diags = bundle.Apply(context.Background(), b, ResolveVariableReferences("variables")) - require.NoError(t, diags.Error()) - assert.True(t, b.Config.Resources.Jobs["job1"].JobSettings.NotificationSettings.NoAlertForCanceledRuns) - assert.True(t, b.Config.Resources.Jobs["job1"].JobSettings.NotificationSettings.NoAlertForSkippedRuns) - assert.Equal(t, 1, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.Autoscale.MinWorkers) - assert.Equal(t, 2, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.Autoscale.MaxWorkers) - assert.InDelta(t, 0.5, b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].NewCluster.AzureAttributes.SpotBidMaxPrice, 0.0001) -} - -func TestResolveComplexVariable(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "cluster": { - Value: map[string]any{ - "node_type_id": "Standard_DS3_v2", - "num_workers": 2, - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - JobClusters: []jobs.JobCluster{ - { - NewCluster: compute.ClusterSpec{ - NodeTypeId: "random", - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.job_clusters[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("new_cluster")), dyn.V("${var.cluster}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, ResolveVariableReferences("bundle", "workspace", "variables")) - require.NoError(t, diags.Error()) - require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["job1"].JobSettings.JobClusters[0].NewCluster.NodeTypeId) - require.Equal(t, 2, b.Config.Resources.Jobs["job1"].JobSettings.JobClusters[0].NewCluster.NumWorkers) -} - -func TestResolveComplexVariableReferencesWithComplexVariablesError(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "cluster": { - Value: map[string]any{ - "node_type_id": "Standard_DS3_v2", - "num_workers": 2, - "spark_conf": "${var.spark_conf}", - }, - Type: variable.VariableTypeComplex, - }, - "spark_conf": { - Value: map[string]any{ - "spark.executor.memory": "4g", - "spark.executor.cores": "2", - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - JobClusters: []jobs.JobCluster{ - { - NewCluster: compute.ClusterSpec{ - NodeTypeId: "random", - }, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.job_clusters[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("new_cluster")), dyn.V("${var.cluster}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, bundle.Seq(ResolveVariableReferencesInComplexVariables(), ResolveVariableReferences("bundle", "workspace", "variables"))) - require.ErrorContains(t, diags.Error(), "complex variables cannot contain references to another complex variables") -} - -func TestResolveComplexVariableWithVarReference(t *testing.T) { - b := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Name: "example", - }, - Variables: map[string]*variable.Variable{ - "package_version": { - Value: "1.0.0", - }, - "cluster_libraries": { - Value: [](map[string]any){ - { - "pypi": map[string]string{ - "package": "cicd_template==${var.package_version}", - }, - }, - }, - Type: variable.VariableTypeComplex, - }, - }, - - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": { - JobSettings: &jobs.JobSettings{ - Tasks: []jobs.Task{ - { - Libraries: []compute.Library{}, - }, - }, - }, - }, - }, - }, - }, - } - - ctx := context.Background() - - // Assign the variables to the dynamic configuration. - diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { - var p dyn.Path - var err error - - p = dyn.MustPathFromString("resources.jobs.job1.tasks[0]") - v, err = dyn.SetByPath(v, p.Append(dyn.Key("libraries")), dyn.V("${var.cluster_libraries}")) - require.NoError(t, err) - - return v, nil - }) - return diag.FromErr(err) - }) - require.NoError(t, diags.Error()) - - diags = bundle.Apply(ctx, b, bundle.Seq( - ResolveVariableReferencesInComplexVariables(), - ResolveVariableReferences("bundle", "workspace", "variables"), - )) - require.NoError(t, diags.Error()) - require.Equal(t, "cicd_template==1.0.0", b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].Libraries[0].Pypi.Package) -} - func TestResolveVariableReferencesWithSourceLinkedDeployment(t *testing.T) { testCases := []struct { enabled bool diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index a2c830be3..1eda578fa 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -136,7 +136,7 @@ func (t *translateContext) rewritePath( } // Local path is relative to the directory the resource was defined in. - localPath := filepath.Join(dir, filepath.FromSlash(input)) + localPath := filepath.Join(dir, input) if interp, ok := t.seen[localPath]; ok { return interp, nil } @@ -151,6 +151,10 @@ func (t *translateContext) rewritePath( return "", fmt.Errorf("path %s is not contained in sync root path", localPath) } + // Normalize paths to separated by forward slashes. + localPath = filepath.ToSlash(localPath) + localRelPath = filepath.ToSlash(localRelPath) + // Convert local path into workspace path via specified function. var interp string switch opts.Mode { @@ -180,9 +184,9 @@ func (t *translateContext) rewritePath( } func (t *translateContext) translateNotebookPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, localRelPath) if errors.Is(err, fs.ErrNotExist) { - if filepath.Ext(localFullPath) != notebook.ExtensionNone { + if path.Ext(localFullPath) != notebook.ExtensionNone { return "", fmt.Errorf("notebook %s not found", literal) } @@ -198,7 +202,7 @@ func (t *translateContext) translateNotebookPath(ctx context.Context, literal, l // way we can provide a more targeted error message. for _, ext := range extensions { literalWithExt := literal + ext - localRelPathWithExt := filepath.ToSlash(localRelPath + ext) + localRelPathWithExt := localRelPath + ext if _, err := fs.Stat(t.b.SyncRoot, localRelPathWithExt); err == nil { return "", fmt.Errorf(`notebook %s not found. Did you mean %s? Local notebook references are expected to contain one of the following @@ -218,42 +222,42 @@ to contain one of the following file extensions: [%s]`, literal, strings.Join(ex } // Upon import, notebooks are stripped of their extension. - localRelPathNoExt := strings.TrimSuffix(localRelPath, filepath.Ext(localRelPath)) - return path.Join(t.remoteRoot, filepath.ToSlash(localRelPathNoExt)), nil + localRelPathNoExt := strings.TrimSuffix(localRelPath, path.Ext(localRelPath)) + return path.Join(t.remoteRoot, localRelPathNoExt), nil } func (t *translateContext) translateFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, localRelPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", filepath.FromSlash(localFullPath), err) } if nb { return "", ErrIsNotebook{localFullPath} } - return path.Join(t.remoteRoot, filepath.ToSlash(localRelPath)), nil + return path.Join(t.remoteRoot, localRelPath), nil } func (t *translateContext) translateDirectoryPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { - info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) + info, err := t.b.SyncRoot.Stat(localRelPath) if err != nil { return "", err } if !info.IsDir() { - return "", fmt.Errorf("%s is not a directory", localFullPath) + return "", fmt.Errorf("%s is not a directory", filepath.FromSlash(localFullPath)) } - return path.Join(t.remoteRoot, filepath.ToSlash(localRelPath)), nil + return path.Join(t.remoteRoot, localRelPath), nil } func (t *translateContext) translateLocalAbsoluteFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { - info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) + info, err := t.b.SyncRoot.Stat(localRelPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is a file: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is a file: %w", filepath.FromSlash(localFullPath), err) } if info.IsDir() { return "", fmt.Errorf("expected %s to be a file but found a directory", literal) @@ -262,12 +266,12 @@ func (t *translateContext) translateLocalAbsoluteFilePath(ctx context.Context, l } func (t *translateContext) translateLocalAbsoluteDirectoryPath(ctx context.Context, literal, localFullPath, _ string) (string, error) { - info, err := os.Stat(localFullPath) + info, err := os.Stat(filepath.FromSlash(localFullPath)) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("directory %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is a directory: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is a directory: %w", filepath.FromSlash(localFullPath), err) } if !info.IsDir() { return "", fmt.Errorf("expected %s to be a directory but found a file", literal) @@ -281,7 +285,7 @@ func (t *translateContext) translateLocalRelativePath(ctx context.Context, liter func (t *translateContext) translateLocalRelativeWithPrefixPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { if !strings.HasPrefix(localRelPath, ".") { - localRelPath = "." + string(filepath.Separator) + localRelPath + localRelPath = "./" + localRelPath } return localRelPath, nil } diff --git a/bundle/config/mutator/translate_paths_artifacts_test.go b/bundle/config/mutator/translate_paths_artifacts_test.go index fb402b488..0d1af6156 100644 --- a/bundle/config/mutator/translate_paths_artifacts_test.go +++ b/bundle/config/mutator/translate_paths_artifacts_test.go @@ -46,7 +46,7 @@ func TestTranslatePathsArtifacts_InsideSyncRoot(t *testing.T) { require.NoError(t, diags.Error()) // Assert that the artifact path has been converted to a local absolute path. - assert.Equal(t, lib, b.Config.Artifacts["my_artifact"].Path) + assert.Equal(t, filepath.ToSlash(lib), b.Config.Artifacts["my_artifact"].Path) } func TestTranslatePathsArtifacts_OutsideSyncRoot(t *testing.T) { @@ -79,5 +79,5 @@ func TestTranslatePathsArtifacts_OutsideSyncRoot(t *testing.T) { require.NoError(t, diags.Error()) // Assert that the artifact path has been converted to a local absolute path. - assert.Equal(t, lib, b.Config.Artifacts["my_artifact"].Path) + assert.Equal(t, filepath.ToSlash(lib), b.Config.Artifacts["my_artifact"].Path) } diff --git a/bundle/config/mutator/translate_paths_dashboards_test.go b/bundle/config/mutator/translate_paths_dashboards_test.go index 5e4e69f5d..02fba92e0 100644 --- a/bundle/config/mutator/translate_paths_dashboards_test.go +++ b/bundle/config/mutator/translate_paths_dashboards_test.go @@ -48,7 +48,7 @@ func TestTranslatePathsDashboards_FilePathRelativeSubDirectory(t *testing.T) { // Assert that the file path for the dashboard has been converted to its local absolute path. assert.Equal( t, - filepath.Join(dir, "src", "my_dashboard.lvdash.json"), + filepath.ToSlash(filepath.Join(dir, "src", "my_dashboard.lvdash.json")), b.Config.Resources.Dashboards["dashboard"].FilePath, ) } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 493abb8c5..aa6488ab0 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -6,7 +6,6 @@ import ( "os" "path/filepath" "runtime" - "strings" "testing" "github.com/databricks/cli/bundle" @@ -226,7 +225,7 @@ func TestTranslatePaths(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.whl"), + "dist/task.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) assert.Equal( @@ -251,7 +250,7 @@ func TestTranslatePaths(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.jar"), + "dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[5].Libraries[0].Jar, ) assert.Equal( @@ -362,7 +361,7 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { ) assert.Equal( t, - filepath.Join("job", "dist", "task.jar"), + "job/dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[1].Libraries[0].Jar, ) assert.Equal( @@ -774,8 +773,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) - assert.Equal(t, strings.Join([]string{".", "job", "dist", "env1.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0]) - assert.Equal(t, strings.Join([]string{".", "dist", "env2.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) + assert.Equal(t, "./job/dist/env1.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0]) + assert.Equal(t, "./dist/env2.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) assert.Equal(t, "simplejson", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[2]) assert.Equal(t, "/Workspace/Users/foo@bar.com/test.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[3]) assert.Equal(t, "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[4]) @@ -839,7 +838,7 @@ func TestTranslatePathWithComplexVariables(t *testing.T) { assert.Equal( t, - filepath.Join("variables", "local", "whl.whl"), + "variables/local/whl.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) } @@ -952,34 +951,34 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { // updated to source path assert.Equal( t, - filepath.Join(dir, "my_job_notebook"), + dir+"/my_job_notebook", b.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath, ) assert.Equal( t, - filepath.Join(dir, "requirements.txt"), + dir+"/requirements.txt", b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements, ) assert.Equal( t, - filepath.Join(dir, "my_python_file.py"), + dir+"/my_python_file.py", b.Config.Resources.Jobs["job"].Tasks[3].SparkPythonTask.PythonFile, ) assert.Equal( t, - filepath.Join(dir, "my_pipeline_notebook"), + dir+"/my_pipeline_notebook", b.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path, ) assert.Equal( t, - filepath.Join(dir, "my_python_file.py"), + dir+"/my_python_file.py", b.Config.Resources.Pipelines["pipeline"].Libraries[2].File.Path, ) // left as is assert.Equal( t, - filepath.Join("dist", "task.whl"), + "dist/task.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) assert.Equal( @@ -989,7 +988,7 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.jar"), + "dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[4].Libraries[0].Jar, ) assert.Equal( diff --git a/bundle/config/validate/validate_sync_patterns.go b/bundle/config/validate/validate_sync_patterns.go index f5787a81d..04acd28ab 100644 --- a/bundle/config/validate/validate_sync_patterns.go +++ b/bundle/config/validate/validate_sync_patterns.go @@ -47,15 +47,13 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di var errs errgroup.Group var diags diag.Diagnostics - for i, pattern := range patterns { - index := i - fullPattern := pattern + for index, pattern := range patterns { // If the pattern is negated, strip the negation prefix // and check if the pattern matches any files. // Negation in gitignore syntax means "don't look at this path' // So if p matches nothing it's useless negation, but if there are matches, // it means: do not include these files into result set - p := strings.TrimPrefix(fullPattern, "!") + p := strings.TrimPrefix(pattern, "!") errs.Go(func() error { fs, err := fileset.NewGlobSet(rb.BundleRoot(), []string{p}) if err != nil { @@ -72,7 +70,7 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di mu.Lock() diags = diags.Append(diag.Diagnostic{ Severity: diag.Warning, - Summary: fmt.Sprintf("Pattern %s does not match any files", fullPattern), + Summary: fmt.Sprintf("Pattern %s does not match any files", pattern), Locations: []dyn.Location{loc.Location()}, Paths: []dyn.Path{loc.Path()}, }) diff --git a/bundle/internal/schema/annotations_openapi.yml b/bundle/internal/schema/annotations_openapi.yml index e9c893c87..8ff5c9253 100644 --- a/bundle/internal/schema/annotations_openapi.yml +++ b/bundle/internal/schema/annotations_openapi.yml @@ -1,4 +1,47 @@ # This file is auto-generated. DO NOT EDIT. +github.com/databricks/cli/bundle/config/resources.App: + "active_deployment": + "description": |- + The active deployment of the app. A deployment is considered active when it has been deployed + to the app compute. + "app_status": {} + "compute_status": {} + "create_time": + "description": |- + The creation time of the app. Formatted timestamp in ISO 6801. + "creator": + "description": |- + The email of the user that created the app. + "default_source_code_path": + "description": |- + The default workspace file system path of the source code from which app deployment are + created. This field tracks the workspace source code path of the last active deployment. + "description": + "description": |- + The description of the app. + "name": + "description": |- + The name of the app. The name must contain only lowercase alphanumeric characters and hyphens. + It must be unique within the workspace. + "pending_deployment": + "description": |- + The pending deployment of the app. A deployment is considered pending when it is being prepared + for deployment to the app compute. + "resources": + "description": |- + Resources for the app. + "service_principal_client_id": {} + "service_principal_id": {} + "service_principal_name": {} + "update_time": + "description": |- + The update time of the app. Formatted timestamp in ISO 6801. + "updater": + "description": |- + The email of the user that last updated the app. + "url": + "description": |- + The URL of the app once it is deployed. github.com/databricks/cli/bundle/config/resources.Cluster: "apply_policy_default_values": "description": |- @@ -220,6 +263,7 @@ github.com/databricks/cli/bundle/config/resources.Job: "job_clusters": "description": |- A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. + If more than 100 job clusters are available, you can paginate through them using :method:jobs/get. "max_concurrent_runs": "description": |- An optional maximum allowed number of concurrent runs of the job. @@ -250,6 +294,7 @@ github.com/databricks/cli/bundle/config/resources.Job: "tasks": "description": |- A list of task specifications to be executed by this job. + If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available. "timeout_seconds": "description": |- An optional timeout applied to each run of this job. A value of `0` means no timeout. @@ -489,6 +534,187 @@ github.com/databricks/cli/bundle/config/resources.Volume: "description": |- The storage location on the cloud "volume_type": {} +github.com/databricks/databricks-sdk-go/service/apps.AppDeployment: + "create_time": + "description": |- + The creation time of the deployment. Formatted timestamp in ISO 6801. + "creator": + "description": |- + The email of the user creates the deployment. + "deployment_artifacts": + "description": |- + The deployment artifacts for an app. + "deployment_id": + "description": |- + The unique id of the deployment. + "mode": + "description": |- + The mode of which the deployment will manage the source code. + "source_code_path": + "description": |- + The workspace file system path of the source code used to create the app deployment. This is different from + `deployment_artifacts.source_code_path`, which is the path used by the deployed app. The former refers + to the original source code location of the app in the workspace during deployment creation, whereas + the latter provides a system generated stable snapshotted source code path used by the deployment. + "status": + "description": |- + Status and status message of the deployment + "update_time": + "description": |- + The update time of the deployment. Formatted timestamp in ISO 6801. +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentArtifacts: + "source_code_path": + "description": |- + The snapshotted workspace file system path of the source code loaded by the deployed app. +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentMode: + "_": + "enum": + - |- + SNAPSHOT + - |- + AUTO_SYNC +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentState: + "_": + "enum": + - |- + SUCCEEDED + - |- + FAILED + - |- + IN_PROGRESS + - |- + CANCELLED +github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: + "message": + "description": |- + Message corresponding with the deployment state. + "state": + "description": |- + State of the deployment. +github.com/databricks/databricks-sdk-go/service/apps.AppResource: + "description": + "description": |- + Description of the App Resource. + "job": {} + "name": + "description": |- + Name of the App Resource. + "secret": {} + "serving_endpoint": {} + "sql_warehouse": {} +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: + "id": + "description": |- + Id of the job to grant permission on. + "permission": + "description": |- + Permissions to grant on the Job. Supported permissions are: "CAN_MANAGE", "IS_OWNER", "CAN_MANAGE_RUN", "CAN_VIEW". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceJobJobPermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + IS_OWNER + - |- + CAN_MANAGE_RUN + - |- + CAN_VIEW +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecret: + "key": + "description": |- + Key of the secret to grant permission on. + "permission": + "description": |- + Permission to grant on the secret scope. For secrets, only one permission is allowed. Permission must be one of: "READ", "WRITE", "MANAGE". + "scope": + "description": |- + Scope of the secret to grant permission on. +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSecretSecretPermission: + "_": + "description": |- + Permission to grant on the secret scope. Supported permissions are: "READ", "WRITE", "MANAGE". + "enum": + - |- + READ + - |- + WRITE + - |- + MANAGE +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpoint: + "name": + "description": |- + Name of the serving endpoint to grant permission on. + "permission": + "description": |- + Permission to grant on the serving endpoint. Supported permissions are: "CAN_MANAGE", "CAN_QUERY", "CAN_VIEW". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceServingEndpointServingEndpointPermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_QUERY + - |- + CAN_VIEW +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouse: + "id": + "description": |- + Id of the SQL warehouse to grant permission on. + "permission": + "description": |- + Permission to grant on the SQL warehouse. Supported permissions are: "CAN_MANAGE", "CAN_USE", "IS_OWNER". +github.com/databricks/databricks-sdk-go/service/apps.AppResourceSqlWarehouseSqlWarehousePermission: + "_": + "enum": + - |- + CAN_MANAGE + - |- + CAN_USE + - |- + IS_OWNER +github.com/databricks/databricks-sdk-go/service/apps.ApplicationState: + "_": + "enum": + - |- + DEPLOYING + - |- + RUNNING + - |- + CRASHED + - |- + UNAVAILABLE +github.com/databricks/databricks-sdk-go/service/apps.ApplicationStatus: + "message": + "description": |- + Application status message + "state": + "description": |- + State of the application. +github.com/databricks/databricks-sdk-go/service/apps.ComputeState: + "_": + "enum": + - |- + ERROR + - |- + DELETING + - |- + STARTING + - |- + STOPPING + - |- + UPDATING + - |- + STOPPED + - |- + ACTIVE +github.com/databricks/databricks-sdk-go/service/apps.ComputeStatus: + "message": + "description": |- + Compute status message + "state": + "description": |- + State of the app compute. github.com/databricks/databricks-sdk-go/service/catalog.MonitorCronSchedule: "pause_status": "description": |- @@ -2116,6 +2342,26 @@ github.com/databricks/databricks-sdk-go/service/ml.ModelVersionTag: github.com/databricks/databricks-sdk-go/service/pipelines.CronTrigger: "quartz_cron_schedule": {} "timezone_id": {} +github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek: + "_": + "description": |- + Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). + If not specified all days of the week will be used. + "enum": + - |- + MONDAY + - |- + TUESDAY + - |- + WEDNESDAY + - |- + THURSDAY + - |- + FRIDAY + - |- + SATURDAY + - |- + SUNDAY github.com/databricks/databricks-sdk-go/service/pipelines.DeploymentKind: "_": "description": | @@ -2375,26 +2621,6 @@ github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindow: "description": |- Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details. If not specified, UTC will be used. -github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek: - "_": - "description": |- - Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour). - If not specified all days of the week will be used. - "enum": - - |- - MONDAY - - |- - TUESDAY - - |- - WEDNESDAY - - |- - THURSDAY - - |- - FRIDAY - - |- - SATURDAY - - |- - SUNDAY github.com/databricks/databricks-sdk-go/service/pipelines.SchemaSpec: "destination_catalog": "description": |- diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index d92c2c0a4..58140914c 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -376,6 +376,19 @@ github.com/databricks/cli/bundle/config/resources.Volume: "volume_type": "description": |- PLACEHOLDER +github.com/databricks/databricks-sdk-go/service/apps.AppResource: + "job": + "description": |- + PLACEHOLDER + "secret": + "description": |- + PLACEHOLDER + "serving_endpoint": + "description": |- + PLACEHOLDER + "sql_warehouse": + "description": |- + PLACEHOLDER github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes: "availability": "description": |- @@ -496,25 +509,6 @@ github.com/databricks/databricks-sdk-go/service/apps.AppDeploymentStatus: "state": "description": |- PLACEHOLDER -github.com/databricks/databricks-sdk-go/service/apps.AppResource: - "description": - "description": |- - PLACEHOLDER - "job": - "description": |- - PLACEHOLDER - "name": - "description": |- - PLACEHOLDER - "secret": - "description": |- - PLACEHOLDER - "serving_endpoint": - "description": |- - PLACEHOLDER - "sql_warehouse": - "description": |- - PLACEHOLDER github.com/databricks/databricks-sdk-go/service/apps.AppResourceJob: "id": "description": |- diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index 27c4b16cd..677b8fc10 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.62.0" +const ProviderVersion = "1.63.0" diff --git a/bundle/internal/tf/schema/resource_external_location.go b/bundle/internal/tf/schema/resource_external_location.go index da28271bc..72411f4dc 100644 --- a/bundle/internal/tf/schema/resource_external_location.go +++ b/bundle/internal/tf/schema/resource_external_location.go @@ -13,8 +13,13 @@ type ResourceExternalLocationEncryptionDetails struct { type ResourceExternalLocation struct { AccessPoint string `json:"access_point,omitempty"` + BrowseOnly bool `json:"browse_only,omitempty"` Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + CredentialId string `json:"credential_id,omitempty"` CredentialName string `json:"credential_name"` + Fallback bool `json:"fallback,omitempty"` ForceDestroy bool `json:"force_destroy,omitempty"` ForceUpdate bool `json:"force_update,omitempty"` Id string `json:"id,omitempty"` @@ -24,6 +29,8 @@ type ResourceExternalLocation struct { Owner string `json:"owner,omitempty"` ReadOnly bool `json:"read_only,omitempty"` SkipValidation bool `json:"skip_validation,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` Url string `json:"url"` EncryptionDetails *ResourceExternalLocationEncryptionDetails `json:"encryption_details,omitempty"` } diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index 1f89dc64d..7dd3f9210 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.62.0" +const ProviderVersion = "1.63.0" func NewRoot() *Root { return &Root{ diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index c6ec04962..b59ce9f89 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -130,6 +130,7 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { // mutators need informed consent if they are potentially destructive. deployCore := bundle.Defer( bundle.Seq( + apps.SlowDeployMessage(), bundle.LogString("Deploying resources..."), terraform.Apply(), ), diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 50df5634a..c5b875196 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -61,7 +61,6 @@ func Initialize() bundle.Mutator { pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseApplyMutators), mutator.ResolveVariableReferencesInLookup(), mutator.ResolveResourceReferences(), - mutator.ResolveVariableReferencesInComplexVariables(), mutator.ResolveVariableReferences( "bundle", "workspace", @@ -74,6 +73,8 @@ func Initialize() bundle.Mutator { mutator.MergePipelineClusters(), mutator.MergeApps(), + mutator.CaptureSchemaDependency(), + // Provide permission config errors & warnings after initializing all variables permissions.PermissionDiagnostics(), mutator.SetRunAs(), diff --git a/bundle/run/app.go b/bundle/run/app.go index 11030beda..b15f3f4b6 100644 --- a/bundle/run/app.go +++ b/bundle/run/app.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/run/output" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/apps" "github.com/spf13/cobra" ) @@ -111,11 +112,21 @@ func (a *appRunner) start(ctx context.Context) error { // active and pending deployments fields (if any). If there are active or pending deployments, // we need to wait for them to complete before we can do the new deployment. // Otherwise, the new deployment will fail. - // Thus, we first wait for the active deployment to complete. - if startedApp.ActiveDeployment != nil && - startedApp.ActiveDeployment.Status.State == apps.AppDeploymentStateInProgress { + err = waitForDeploymentToComplete(ctx, w, startedApp) + if err != nil { + return err + } + + logProgress(ctx, "App is started!") + return nil +} + +func waitForDeploymentToComplete(ctx context.Context, w *databricks.WorkspaceClient, app *apps.App) error { + // We first wait for the active deployment to complete. + if app.ActiveDeployment != nil && + app.ActiveDeployment.Status.State == apps.AppDeploymentStateInProgress { logProgress(ctx, "Waiting for the active deployment to complete...") - _, err = w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, startedApp.ActiveDeployment.DeploymentId, 20*time.Minute, nil) + _, err := w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, app.ActiveDeployment.DeploymentId, 20*time.Minute, nil) if err != nil { return err } @@ -123,17 +134,16 @@ func (a *appRunner) start(ctx context.Context) error { } // Then, we wait for the pending deployment to complete. - if startedApp.PendingDeployment != nil && - startedApp.PendingDeployment.Status.State == apps.AppDeploymentStateInProgress { + if app.PendingDeployment != nil && + app.PendingDeployment.Status.State == apps.AppDeploymentStateInProgress { logProgress(ctx, "Waiting for the pending deployment to complete...") - _, err = w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, startedApp.PendingDeployment.DeploymentId, 20*time.Minute, nil) + _, err := w.Apps.WaitGetDeploymentAppSucceeded(ctx, app.Name, app.PendingDeployment.DeploymentId, 20*time.Minute, nil) if err != nil { return err } logProgress(ctx, "Pending deployment is completed!") } - logProgress(ctx, "App is started!") return nil } @@ -142,16 +152,38 @@ func (a *appRunner) deploy(ctx context.Context) error { b := a.bundle w := b.WorkspaceClient() + sourceCodePath := app.SourceCodePath wait, err := w.Apps.Deploy(ctx, apps.CreateAppDeploymentRequest{ AppName: app.Name, AppDeployment: &apps.AppDeployment{ Mode: apps.AppDeploymentModeSnapshot, - SourceCodePath: app.SourceCodePath, + SourceCodePath: sourceCodePath, }, }) // If deploy returns an error, then there's an active deployment in progress, wait for it to complete. + // For this we first need to get an app and its acrive and pending deployments and then wait for them. if err != nil { - return err + app, err := w.Apps.Get(ctx, apps.GetAppRequest{Name: app.Name}) + if err != nil { + return fmt.Errorf("failed to get app %s: %w", app.Name, err) + } + + err = waitForDeploymentToComplete(ctx, w, app) + if err != nil { + return err + } + + // Now we can try to deploy the app again + wait, err = w.Apps.Deploy(ctx, apps.CreateAppDeploymentRequest{ + AppName: app.Name, + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: sourceCodePath, + }, + }) + if err != nil { + return err + } } _, err = wait.OnProgress(func(ad *apps.AppDeployment) { diff --git a/bundle/run/app_test.go b/bundle/run/app_test.go index 44ff698e5..8e82f45ae 100644 --- a/bundle/run/app_test.go +++ b/bundle/run/app_test.go @@ -3,6 +3,7 @@ package run import ( "bytes" "context" + "errors" "os" "path/filepath" "testing" @@ -189,6 +190,69 @@ func TestAppRunWithAnActiveDeploymentInProgress(t *testing.T) { r.run(t) } +func TestAppDeployWithDeploymentInProgress(t *testing.T) { + ctx, b, mwc := setupBundle(t) + + appApi := mwc.GetMockAppsAPI() + appApi.EXPECT().Get(mock.Anything, apps.GetAppRequest{ + Name: "my_app", + }).Return(&apps.App{ + Name: "my_app", + AppStatus: &apps.ApplicationStatus{ + State: apps.ApplicationStateRunning, + }, + ComputeStatus: &apps.ComputeStatus{ + State: apps.ComputeStateActive, + }, + }, nil).Once() + + wait := &apps.WaitGetDeploymentAppSucceeded[apps.AppDeployment]{ + Poll: func(_ time.Duration, _ func(*apps.AppDeployment)) (*apps.AppDeployment, error) { + return nil, nil + }, + } + + // First deployment fails + appApi.EXPECT().Deploy(mock.Anything, apps.CreateAppDeploymentRequest{ + AppName: "my_app", + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: "/Workspace/Users/foo@bar.com/files/my_app", + }, + }).Return(nil, errors.New("deployment in progress")).Once() + + // After first deployment fails, we should get the app and wait for the deployment to complete + appApi.EXPECT().Get(mock.Anything, apps.GetAppRequest{ + Name: "my_app", + }).Return(&apps.App{ + Name: "my_app", + ActiveDeployment: &apps.AppDeployment{ + DeploymentId: "active_deployment_id", + Status: &apps.AppDeploymentStatus{ + State: apps.AppDeploymentStateInProgress, + }, + }, + }, nil).Once() + + appApi.EXPECT().WaitGetDeploymentAppSucceeded(mock.Anything, "my_app", "active_deployment_id", mock.Anything, mock.Anything).Return(nil, nil) + + // Second one should succeeed + appApi.EXPECT().Deploy(mock.Anything, apps.CreateAppDeploymentRequest{ + AppName: "my_app", + AppDeployment: &apps.AppDeployment{ + Mode: apps.AppDeploymentModeSnapshot, + SourceCodePath: "/Workspace/Users/foo@bar.com/files/my_app", + }, + }).Return(wait, nil).Once() + + r := &testAppRunner{ + m: mwc, + b: b, + ctx: ctx, + } + r.run(t) +} + func TestStopApp(t *testing.T) { ctx, b, mwc := setupBundle(t) appsApi := mwc.GetMockAppsAPI() diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index bf2afa9ad..a27913647 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -390,7 +390,7 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules" }, "job_clusters": { - "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", + "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.\nIf more than 100 job clusters are available, you can paginate through them using :method:jobs/get.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.JobCluster" }, "max_concurrent_runs": { @@ -428,7 +428,7 @@ "$ref": "#/$defs/map/string" }, "tasks": { - "description": "A list of task specifications to be executed by this job.", + "description": "A list of task specifications to be executed by this job.\nIf more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available.", "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/jobs.Task" }, "timeout_seconds": { @@ -1667,10 +1667,20 @@ ] }, "apps.AppDeploymentMode": { - "type": "string" + "type": "string", + "enum": [ + "SNAPSHOT", + "AUTO_SYNC" + ] }, "apps.AppDeploymentState": { - "type": "string" + "type": "string", + "enum": [ + "SUCCEEDED", + "FAILED", + "IN_PROGRESS", + "CANCELLED" + ] }, "apps.AppDeploymentStatus": { "oneOf": [ @@ -1752,7 +1762,13 @@ ] }, "apps.AppResourceJobJobPermission": { - "type": "string" + "type": "string", + "enum": [ + "CAN_MANAGE", + "IS_OWNER", + "CAN_MANAGE_RUN", + "CAN_VIEW" + ] }, "apps.AppResourceSecret": { "oneOf": [ @@ -1783,7 +1799,13 @@ ] }, "apps.AppResourceSecretSecretPermission": { - "type": "string" + "type": "string", + "description": "Permission to grant on the secret scope. Supported permissions are: \"READ\", \"WRITE\", \"MANAGE\".", + "enum": [ + "READ", + "WRITE", + "MANAGE" + ] }, "apps.AppResourceServingEndpoint": { "oneOf": [ @@ -1810,7 +1832,12 @@ ] }, "apps.AppResourceServingEndpointServingEndpointPermission": { - "type": "string" + "type": "string", + "enum": [ + "CAN_MANAGE", + "CAN_QUERY", + "CAN_VIEW" + ] }, "apps.AppResourceSqlWarehouse": { "oneOf": [ @@ -1837,10 +1864,21 @@ ] }, "apps.AppResourceSqlWarehouseSqlWarehousePermission": { - "type": "string" + "type": "string", + "enum": [ + "CAN_MANAGE", + "CAN_USE", + "IS_OWNER" + ] }, "apps.ApplicationState": { - "type": "string" + "type": "string", + "enum": [ + "DEPLOYING", + "RUNNING", + "CRASHED", + "UNAVAILABLE" + ] }, "apps.ApplicationStatus": { "oneOf": [ @@ -1863,7 +1901,16 @@ ] }, "apps.ComputeState": { - "type": "string" + "type": "string", + "enum": [ + "ERROR", + "DELETING", + "STARTING", + "STOPPING", + "UPDATING", + "STOPPED", + "ACTIVE" + ] }, "apps.ComputeStatus": { "oneOf": [ @@ -4574,6 +4621,19 @@ } ] }, + "pipelines.DayOfWeek": { + "type": "string", + "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", + "enum": [ + "MONDAY", + "TUESDAY", + "WEDNESDAY", + "THURSDAY", + "FRIDAY", + "SATURDAY", + "SUNDAY" + ] + }, "pipelines.DeploymentKind": { "type": "string", "description": "The deployment method that manages the pipeline:\n- BUNDLE: The pipeline is managed by a Databricks Asset Bundle.\n", @@ -5008,7 +5068,7 @@ "properties": { "days_of_week": { "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", - "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek" + "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek" }, "start_hour": { "description": "An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day.\nContinuous pipeline restart is triggered only within a five-hour window starting at this hour.", @@ -5030,19 +5090,6 @@ } ] }, - "pipelines.RestartWindowDaysOfWeek": { - "type": "string", - "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", - "enum": [ - "MONDAY", - "TUESDAY", - "WEDNESDAY", - "THURSDAY", - "FRIDAY", - "SATURDAY", - "SUNDAY" - ] - }, "pipelines.SchemaSpec": { "oneOf": [ { @@ -6624,6 +6671,20 @@ } ] }, + "pipelines.DayOfWeek": { + "oneOf": [ + { + "type": "array", + "items": { + "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.DayOfWeek" + } + }, + { + "type": "string", + "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" + } + ] + }, "pipelines.IngestionConfig": { "oneOf": [ { @@ -6680,20 +6741,6 @@ } ] }, - "pipelines.RestartWindowDaysOfWeek": { - "oneOf": [ - { - "type": "array", - "items": { - "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.RestartWindowDaysOfWeek" - } - }, - { - "type": "string", - "pattern": "\\$\\{(var(\\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\\[[0-9]+\\])*)+)\\}" - } - ] - }, "serving.AiGatewayRateLimit": { "oneOf": [ { diff --git a/bundle/tests/path_translation_test.go b/bundle/tests/path_translation_test.go deleted file mode 100644 index 05702d2a2..000000000 --- a/bundle/tests/path_translation_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package config_tests - -import ( - "context" - "path/filepath" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestPathTranslationFallback(t *testing.T) { - b := loadTarget(t, "./path_translation/fallback", "development") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - require.NoError(t, diags.Error()) - - j := b.Config.Resources.Jobs["my_job"] - assert.Len(t, j.Tasks, 6) - - assert.Equal(t, "notebook_example", filepath.ToSlash(j.Tasks[0].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[0].NotebookTask.NotebookPath)) - - assert.Equal(t, "spark_python_example", filepath.ToSlash(j.Tasks[1].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[1].SparkPythonTask.PythonFile)) - - assert.Equal(t, "dbt_example", filepath.ToSlash(j.Tasks[2].TaskKey)) - assert.Equal(t, "src/dbt_project", filepath.ToSlash(j.Tasks[2].DbtTask.ProjectDirectory)) - - assert.Equal(t, "sql_example", filepath.ToSlash(j.Tasks[3].TaskKey)) - assert.Equal(t, "src/sql.sql", filepath.ToSlash(j.Tasks[3].SqlTask.File.Path)) - - assert.Equal(t, "python_wheel_example", filepath.ToSlash(j.Tasks[4].TaskKey)) - assert.Equal(t, "dist/wheel1.whl", filepath.ToSlash(j.Tasks[4].Libraries[0].Whl)) - assert.Equal(t, "dist/wheel2.whl", filepath.ToSlash(j.Tasks[4].Libraries[1].Whl)) - - assert.Equal(t, "spark_jar_example", filepath.ToSlash(j.Tasks[5].TaskKey)) - assert.Equal(t, "target/jar1.jar", filepath.ToSlash(j.Tasks[5].Libraries[0].Jar)) - assert.Equal(t, "target/jar2.jar", filepath.ToSlash(j.Tasks[5].Libraries[1].Jar)) - - p := b.Config.Resources.Pipelines["my_pipeline"] - assert.Len(t, p.Libraries, 4) - - assert.Equal(t, "src/file1.py", filepath.ToSlash(p.Libraries[0].File.Path)) - assert.Equal(t, "src/notebook1", filepath.ToSlash(p.Libraries[1].Notebook.Path)) - assert.Equal(t, "src/file2.py", filepath.ToSlash(p.Libraries[2].File.Path)) - assert.Equal(t, "src/notebook2", filepath.ToSlash(p.Libraries[3].Notebook.Path)) -} - -func TestPathTranslationFallbackError(t *testing.T) { - b := loadTarget(t, "./path_translation/fallback", "error") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.ErrorContains(t, diags.Error(), `notebook this value is overridden not found`) -} - -func TestPathTranslationNominal(t *testing.T) { - b := loadTarget(t, "./path_translation/nominal", "development") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.NoError(t, diags.Error()) - - j := b.Config.Resources.Jobs["my_job"] - assert.Len(t, j.Tasks, 8) - - assert.Equal(t, "notebook_example", filepath.ToSlash(j.Tasks[0].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[0].NotebookTask.NotebookPath)) - - assert.Equal(t, "spark_python_example", filepath.ToSlash(j.Tasks[1].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[1].SparkPythonTask.PythonFile)) - - assert.Equal(t, "dbt_example", filepath.ToSlash(j.Tasks[2].TaskKey)) - assert.Equal(t, "src/dbt_project", filepath.ToSlash(j.Tasks[2].DbtTask.ProjectDirectory)) - - assert.Equal(t, "sql_example", filepath.ToSlash(j.Tasks[3].TaskKey)) - assert.Equal(t, "src/sql.sql", filepath.ToSlash(j.Tasks[3].SqlTask.File.Path)) - - assert.Equal(t, "python_wheel_example", filepath.ToSlash(j.Tasks[4].TaskKey)) - assert.Equal(t, "dist/wheel1.whl", filepath.ToSlash(j.Tasks[4].Libraries[0].Whl)) - assert.Equal(t, "dist/wheel2.whl", filepath.ToSlash(j.Tasks[4].Libraries[1].Whl)) - - assert.Equal(t, "spark_jar_example", filepath.ToSlash(j.Tasks[5].TaskKey)) - assert.Equal(t, "target/jar1.jar", filepath.ToSlash(j.Tasks[5].Libraries[0].Jar)) - assert.Equal(t, "target/jar2.jar", filepath.ToSlash(j.Tasks[5].Libraries[1].Jar)) - - assert.Equal(t, "for_each_notebook_example", filepath.ToSlash(j.Tasks[6].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[6].ForEachTask.Task.NotebookTask.NotebookPath)) - - assert.Equal(t, "for_each_spark_python_example", filepath.ToSlash(j.Tasks[7].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[7].ForEachTask.Task.SparkPythonTask.PythonFile)) - - p := b.Config.Resources.Pipelines["my_pipeline"] - assert.Len(t, p.Libraries, 4) - - assert.Equal(t, "src/file1.py", filepath.ToSlash(p.Libraries[0].File.Path)) - assert.Equal(t, "src/notebook1", filepath.ToSlash(p.Libraries[1].Notebook.Path)) - assert.Equal(t, "src/file2.py", filepath.ToSlash(p.Libraries[2].File.Path)) - assert.Equal(t, "src/notebook2", filepath.ToSlash(p.Libraries[3].Notebook.Path)) -} - -func TestPathTranslationNominalError(t *testing.T) { - b := loadTarget(t, "./path_translation/nominal", "error") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.ErrorContains(t, diags.Error(), `notebook this value is overridden not found`) -} diff --git a/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py b/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py b/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py b/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/relative_path_translation_test.go b/bundle/tests/relative_path_translation_test.go deleted file mode 100644 index 0f553ac3d..000000000 --- a/bundle/tests/relative_path_translation_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestRelativePathTranslationDefault(t *testing.T) { - b, diags := initializeTarget(t, "./relative_path_translation", "default") - require.NoError(t, diags.Error()) - - t0 := b.Config.Resources.Jobs["job"].Tasks[0] - assert.Equal(t, "/Workspace/remote/src/file1.py", t0.SparkPythonTask.PythonFile) - t1 := b.Config.Resources.Jobs["job"].Tasks[1] - assert.Equal(t, "/Workspace/remote/src/file1.py", t1.SparkPythonTask.PythonFile) -} - -func TestRelativePathTranslationOverride(t *testing.T) { - b, diags := initializeTarget(t, "./relative_path_translation", "override") - require.NoError(t, diags.Error()) - - t0 := b.Config.Resources.Jobs["job"].Tasks[0] - assert.Equal(t, "/Workspace/remote/src/file2.py", t0.SparkPythonTask.PythonFile) - t1 := b.Config.Resources.Jobs["job"].Tasks[1] - assert.Equal(t, "/Workspace/remote/src/file2.py", t1.SparkPythonTask.PythonFile) -} diff --git a/bundle/tests/relative_path_with_includes_test.go b/bundle/tests/relative_path_with_includes_test.go index 8efac0039..7249cac1f 100644 --- a/bundle/tests/relative_path_with_includes_test.go +++ b/bundle/tests/relative_path_with_includes_test.go @@ -17,8 +17,8 @@ func TestRelativePathsWithIncludes(t *testing.T) { diags := bundle.Apply(context.Background(), b, m) assert.NoError(t, diags.Error()) - assert.Equal(t, filepath.Join(b.SyncRootPath, "artifact_a"), b.Config.Artifacts["test_a"].Path) - assert.Equal(t, filepath.Join(b.SyncRootPath, "subfolder", "artifact_b"), b.Config.Artifacts["test_b"].Path) + assert.Equal(t, b.SyncRootPath+"/artifact_a", b.Config.Artifacts["test_a"].Path) + assert.Equal(t, b.SyncRootPath+"/subfolder/artifact_b", b.Config.Artifacts["test_b"].Path) assert.ElementsMatch( t, @@ -37,6 +37,6 @@ func TestRelativePathsWithIncludes(t *testing.T) { b.Config.Sync.Exclude, ) - assert.Equal(t, filepath.Join("dist", "job_a.whl"), b.Config.Resources.Jobs["job_a"].Tasks[0].Libraries[0].Whl) - assert.Equal(t, filepath.Join("subfolder", "dist", "job_b.whl"), b.Config.Resources.Jobs["job_b"].Tasks[0].Libraries[0].Whl) + assert.Equal(t, "dist/job_a.whl", b.Config.Resources.Jobs["job_a"].Tasks[0].Libraries[0].Whl) + assert.Equal(t, "subfolder/dist/job_b.whl", b.Config.Resources.Jobs["job_b"].Tasks[0].Libraries[0].Whl) } diff --git a/cmd/account/federation-policy/federation-policy.go b/cmd/account/federation-policy/federation-policy.go index d78ac709a..e47bf8324 100755 --- a/cmd/account/federation-policy/federation-policy.go +++ b/cmd/account/federation-policy/federation-policy.go @@ -110,8 +110,9 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&createReq.PolicyId, "policy-id", createReq.PolicyId, `The identifier for the federation policy.`) cmd.Flags().StringVar(&createReq.Policy.Description, "description", createReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy cmd.Use = "create" @@ -180,7 +181,10 @@ func newDelete() *cobra.Command { cmd.Use = "delete POLICY_ID" cmd.Short = `Delete account federation policy.` - cmd.Long = `Delete account federation policy.` + cmd.Long = `Delete account federation policy. + + Arguments: + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -233,7 +237,10 @@ func newGet() *cobra.Command { cmd.Use = "get POLICY_ID" cmd.Short = `Get account federation policy.` - cmd.Long = `Get account federation policy.` + cmd.Long = `Get account federation policy. + + Arguments: + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -338,25 +345,22 @@ func newUpdate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&updateReq.UpdateMask, "update-mask", updateReq.UpdateMask, `The field mask specifies which fields of the policy to update.`) cmd.Flags().StringVar(&updateReq.Policy.Description, "description", updateReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy - cmd.Use = "update POLICY_ID UPDATE_MASK" + cmd.Use = "update POLICY_ID" cmd.Short = `Update account federation policy.` cmd.Long = `Update account federation policy. Arguments: - POLICY_ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - check := root.ExactArgs(2) + check := root.ExactArgs(1) return check(cmd, args) } @@ -378,7 +382,6 @@ func newUpdate() *cobra.Command { } } updateReq.PolicyId = args[0] - updateReq.UpdateMask = args[1] response, err := a.FederationPolicy.Update(ctx, updateReq) if err != nil { diff --git a/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go b/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go index 77f73bcd0..df36de239 100755 --- a/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go +++ b/cmd/account/service-principal-federation-policy/service-principal-federation-policy.go @@ -117,8 +117,9 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&createReq.PolicyId, "policy-id", createReq.PolicyId, `The identifier for the federation policy.`) cmd.Flags().StringVar(&createReq.Policy.Description, "description", createReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&createReq.Policy.Name, "name", createReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy cmd.Use = "create SERVICE_PRINCIPAL_ID" @@ -198,7 +199,7 @@ func newDelete() *cobra.Command { Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: ` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -259,7 +260,7 @@ func newGet() *cobra.Command { Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: ` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) @@ -376,26 +377,23 @@ func newUpdate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&updateJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().StringVar(&updateReq.UpdateMask, "update-mask", updateReq.UpdateMask, `The field mask specifies which fields of the policy to update.`) cmd.Flags().StringVar(&updateReq.Policy.Description, "description", updateReq.Policy.Description, `Description of the federation policy.`) - cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Name of the federation policy.`) + cmd.Flags().StringVar(&updateReq.Policy.Name, "name", updateReq.Policy.Name, `Resource name for the federation policy.`) // TODO: complex arg: oidc_policy - cmd.Use = "update SERVICE_PRINCIPAL_ID POLICY_ID UPDATE_MASK" + cmd.Use = "update SERVICE_PRINCIPAL_ID POLICY_ID" cmd.Short = `Update service principal federation policy.` cmd.Long = `Update service principal federation policy. Arguments: SERVICE_PRINCIPAL_ID: The service principal id for the federation policy. - POLICY_ID: - UPDATE_MASK: Field mask is required to be passed into the PATCH request. Field mask - specifies which fields of the setting payload will be updated. The field - mask needs to be supplied as single string. To specify multiple fields in - the field mask, use comma as the separator (no space).` + POLICY_ID: The identifier for the federation policy.` cmd.Annotations = make(map[string]string) cmd.Args = func(cmd *cobra.Command, args []string) error { - check := root.ExactArgs(3) + check := root.ExactArgs(2) return check(cmd, args) } @@ -421,7 +419,6 @@ func newUpdate() *cobra.Command { return fmt.Errorf("invalid SERVICE_PRINCIPAL_ID: %s", args[0]) } updateReq.PolicyId = args[1] - updateReq.UpdateMask = args[2] response, err := a.ServicePrincipalFederationPolicy.Update(ctx, updateReq) if err != nil { diff --git a/cmd/bundle/generate/app.go b/cmd/bundle/generate/app.go index 819b62b38..9dbd4fe46 100644 --- a/cmd/bundle/generate/app.go +++ b/cmd/bundle/generate/app.go @@ -36,8 +36,8 @@ func NewGenerateAppCommand() *cobra.Command { cmd.Flags().StringVar(&appName, "existing-app-name", "", `App name to generate config for`) cmd.MarkFlagRequired("existing-app-name") - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join("resources"), `Directory path where the output bundle config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join("src", "app"), `Directory path where the app files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Directory path where the output bundle config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src/app", `Directory path where the app files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/generate/dashboard.go b/cmd/bundle/generate/dashboard.go index fa3c91b2a..d56d246c2 100644 --- a/cmd/bundle/generate/dashboard.go +++ b/cmd/bundle/generate/dashboard.go @@ -441,8 +441,8 @@ func NewGenerateDashboardCommand() *cobra.Command { cmd.Flags().MarkHidden("existing-dashboard-id") // Output flags. - cmd.Flags().StringVarP(&d.resourceDir, "resource-dir", "d", "./resources", `directory to write the configuration to`) - cmd.Flags().StringVarP(&d.dashboardDir, "dashboard-dir", "s", "./src", `directory to write the dashboard representation to`) + cmd.Flags().StringVarP(&d.resourceDir, "resource-dir", "d", "resources", `directory to write the configuration to`) + cmd.Flags().StringVarP(&d.dashboardDir, "dashboard-dir", "s", "src", `directory to write the dashboard representation to`) cmd.Flags().BoolVarP(&d.force, "force", "f", false, `force overwrite existing files in the output directory`) // Exactly one of the lookup flags must be provided. diff --git a/cmd/bundle/generate/job.go b/cmd/bundle/generate/job.go index 827d270e5..d97891cd5 100644 --- a/cmd/bundle/generate/job.go +++ b/cmd/bundle/generate/job.go @@ -32,13 +32,8 @@ func NewGenerateJobCommand() *cobra.Command { cmd.Flags().Int64Var(&jobId, "existing-job-id", 0, `Job ID of the job to generate config for`) cmd.MarkFlagRequired("existing-job-id") - wd, err := os.Getwd() - if err != nil { - wd = "." - } - - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join(wd, "resources"), `Dir path where the output config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join(wd, "src"), `Dir path where the downloaded files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Dir path where the output config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src", `Dir path where the downloaded files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/generate/pipeline.go b/cmd/bundle/generate/pipeline.go index 863b0b2f7..1d2c345d6 100644 --- a/cmd/bundle/generate/pipeline.go +++ b/cmd/bundle/generate/pipeline.go @@ -32,13 +32,8 @@ func NewGeneratePipelineCommand() *cobra.Command { cmd.Flags().StringVar(&pipelineId, "existing-pipeline-id", "", `ID of the pipeline to generate config for`) cmd.MarkFlagRequired("existing-pipeline-id") - wd, err := os.Getwd() - if err != nil { - wd = "." - } - - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join(wd, "resources"), `Dir path where the output config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join(wd, "src"), `Dir path where the downloaded files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Dir path where the output config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src", `Dir path where the downloaded files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/generate/utils.go b/cmd/bundle/generate/utils.go index cbea0bfcc..c2c9bbb55 100644 --- a/cmd/bundle/generate/utils.go +++ b/cmd/bundle/generate/utils.go @@ -138,9 +138,7 @@ func (n *downloader) FlushToDisk(ctx context.Context, force bool) error { } errs, errCtx := errgroup.WithContext(ctx) - for k, v := range n.files { - targetPath := k - filePath := v + for targetPath, filePath := range n.files { errs.Go(func() error { reader, err := n.w.Workspace.Download(errCtx, filePath) if err != nil { diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 687c141ec..6b93fd1e5 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -59,6 +59,11 @@ var nativeTemplates = []nativeTemplate{ hidden: true, description: "The default PyDABs template", }, + { + name: "experimental-jobs-as-code", + hidden: true, + description: "Jobs as code template (experimental)", + }, { name: customTemplate, description: "Bring your own template", diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py index 6873257d5..a162da342 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py @@ -1 +1 @@ -print(f'setting up important infrastructure') +print(f"setting up important infrastructure") diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py index 769ee73ee..e5866d6ae 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py @@ -2,26 +2,34 @@ import os, sys, json payload = json.loads(sys.argv[1]) -if 'echo' == payload['command']: - json.dump({ - 'command': payload['command'], - 'flags': payload['flags'], - 'env': {k:v for k,v in os.environ.items()} - }, sys.stdout) +if "echo" == payload["command"]: + json.dump( + { + "command": payload["command"], + "flags": payload["flags"], + "env": {k: v for k, v in os.environ.items()}, + }, + sys.stdout, + ) sys.exit(0) -if 'table' == payload['command']: +if "table" == payload["command"]: sys.stderr.write("some intermediate info\n") - json.dump({'records': [ - {'key': 'First', 'value': 'Second'}, - {'key': 'Third', 'value': 'Fourth'}, - ]}, sys.stdout) + json.dump( + { + "records": [ + {"key": "First", "value": "Second"}, + {"key": "Third", "value": "Fourth"}, + ] + }, + sys.stdout, + ) sys.exit(0) -print(f'host is {os.environ["DATABRICKS_HOST"]}') +print(f"host is {os.environ['DATABRICKS_HOST']}") -print(f'[{payload["command"]}] command flags are {payload["flags"]}') +print(f"[{payload['command']}] command flags are {payload['flags']}") -answer = input('What is your name? ') +answer = input("What is your name? ") -print(f'Hello, {answer}!') +print(f"Hello, {answer}!") diff --git a/cmd/workspace/apps/apps.go b/cmd/workspace/apps/apps.go index a103ba7a8..f7c08ece1 100755 --- a/cmd/workspace/apps/apps.go +++ b/cmd/workspace/apps/apps.go @@ -78,6 +78,7 @@ func newCreate() *cobra.Command { // TODO: short flags cmd.Flags().Var(&createJson, "json", `either inline JSON string or @path/to/file.json with request body`) + cmd.Flags().BoolVar(&createReq.NoCompute, "no-compute", createReq.NoCompute, `If true, the app will not be started after creation.`) // TODO: complex arg: active_deployment // TODO: complex arg: app_status // TODO: complex arg: compute_status diff --git a/cmd/workspace/apps/overrides.go b/cmd/workspace/apps/overrides.go new file mode 100644 index 000000000..e14068717 --- /dev/null +++ b/cmd/workspace/apps/overrides.go @@ -0,0 +1,28 @@ +package apps + +import ( + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/spf13/cobra" +) + +func listOverride(listCmd *cobra.Command, listReq *apps.ListAppsRequest) { + listCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` + {{header "Name"}} {{header "Url"}} {{header "ComputeStatus"}} {{header "DeploymentStatus"}}`) + listCmd.Annotations["template"] = cmdio.Heredoc(` + {{range .}}{{.Name | green}} {{.Url}} {{if .ComputeStatus}}{{if eq .ComputeStatus.State "ACTIVE"}}{{green "%s" .ComputeStatus.State }}{{else}}{{blue "%s" .ComputeStatus.State}}{{end}}{{end}} {{if .ActiveDeployment}}{{if eq .ActiveDeployment.Status.State "SUCCEEDED"}}{{green "%s" .ActiveDeployment.Status.State }}{{else}}{{blue "%s" .ActiveDeployment.Status.State}}{{end}}{{end}} + {{end}}`) +} + +func listDeploymentsOverride(listDeploymentsCmd *cobra.Command, listDeploymentsReq *apps.ListAppDeploymentsRequest) { + listDeploymentsCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` + {{header "DeploymentId"}} {{header "State"}} {{header "CreatedAt"}}`) + listDeploymentsCmd.Annotations["template"] = cmdio.Heredoc(` + {{range .}}{{.DeploymentId}} {{if eq .Status.State "SUCCEEDED"}}{{green "%s" .Status.State }}{{else}}{{blue "%s" .Status.State}}{{end}} {{.CreateTime}} + {{end}}`) +} + +func init() { + listOverrides = append(listOverrides, listOverride) + listDeploymentsOverrides = append(listDeploymentsOverrides, listDeploymentsOverride) +} diff --git a/cmd/workspace/jobs/jobs.go b/cmd/workspace/jobs/jobs.go index b067937e2..38a88f014 100755 --- a/cmd/workspace/jobs/jobs.go +++ b/cmd/workspace/jobs/jobs.go @@ -625,11 +625,19 @@ func newGet() *cobra.Command { // TODO: short flags + cmd.Flags().StringVar(&getReq.PageToken, "page-token", getReq.PageToken, `Use next_page_token returned from the previous GetJob to request the next page of the job's sub-resources.`) + cmd.Use = "get JOB_ID" cmd.Short = `Get a single job.` cmd.Long = `Get a single job. Retrieves the details for a single job. + + In Jobs API 2.2, requests for a single job support pagination of tasks and + job_clusters when either exceeds 100 elements. Use the next_page_token + field to check for more results and pass its value as the page_token in + subsequent requests. Arrays with fewer than 100 elements in a page will be + empty on later pages. Arguments: JOB_ID: The canonical identifier of the job to retrieve information about. This @@ -847,13 +855,19 @@ func newGetRun() *cobra.Command { cmd.Flags().BoolVar(&getRunReq.IncludeHistory, "include-history", getRunReq.IncludeHistory, `Whether to include the repair history in the response.`) cmd.Flags().BoolVar(&getRunReq.IncludeResolvedValues, "include-resolved-values", getRunReq.IncludeResolvedValues, `Whether to include resolved parameter values in the response.`) - cmd.Flags().StringVar(&getRunReq.PageToken, "page-token", getRunReq.PageToken, `To list the next page of job tasks, set this field to the value of the next_page_token returned in the GetJob response.`) + cmd.Flags().StringVar(&getRunReq.PageToken, "page-token", getRunReq.PageToken, `Use next_page_token returned from the previous GetRun to request the next page of the run's sub-resources.`) cmd.Use = "get-run RUN_ID" cmd.Short = `Get a single job run.` cmd.Long = `Get a single job run. - Retrieve the metadata of a run. + Retrieves the metadata of a run. + + In Jobs API 2.2, requests for a single job run support pagination of tasks + and job_clusters when either exceeds 100 elements. Use the next_page_token + field to check for more results and pass its value as the page_token in + subsequent requests. Arrays with fewer than 100 elements in a page will be + empty on later pages. Arguments: RUN_ID: The canonical identifier of the run for which to retrieve the metadata. diff --git a/cmd/workspace/pipelines/pipelines.go b/cmd/workspace/pipelines/pipelines.go index 38636e83b..e94d4c5a8 100755 --- a/cmd/workspace/pipelines/pipelines.go +++ b/cmd/workspace/pipelines/pipelines.go @@ -974,6 +974,7 @@ func newUpdate() *cobra.Command { cmd.Flags().BoolVar(&updateReq.Photon, "photon", updateReq.Photon, `Whether Photon is enabled for this pipeline.`) cmd.Flags().StringVar(&updateReq.PipelineId, "pipeline-id", updateReq.PipelineId, `Unique identifier for this pipeline.`) // TODO: complex arg: restart_window + // TODO: complex arg: run_as cmd.Flags().StringVar(&updateReq.Schema, "schema", updateReq.Schema, `The default schema (database) where tables are read from or published to.`) cmd.Flags().BoolVar(&updateReq.Serverless, "serverless", updateReq.Serverless, `Whether serverless compute is enabled for this pipeline.`) cmd.Flags().StringVar(&updateReq.Storage, "storage", updateReq.Storage, `DBFS root directory for storing checkpoints and tables.`) diff --git a/cmd/workspace/shares/shares.go b/cmd/workspace/shares/shares.go index f70963f29..62c3407f4 100755 --- a/cmd/workspace/shares/shares.go +++ b/cmd/workspace/shares/shares.go @@ -391,6 +391,7 @@ func newUpdate() *cobra.Command { cmd.Flags().StringVar(&updateReq.Comment, "comment", updateReq.Comment, `User-provided free-form text description.`) cmd.Flags().StringVar(&updateReq.NewName, "new-name", updateReq.NewName, `New name for the share.`) + cmd.Flags().StringVar(&updateReq.Owner, "owner", updateReq.Owner, `Username of current owner of share.`) cmd.Flags().StringVar(&updateReq.StorageRoot, "storage-root", updateReq.StorageRoot, `Storage root URL for the share.`) // TODO: array: updates diff --git a/go.mod b/go.mod index ed2ff12ad..0ef800d7b 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ toolchain go1.23.4 require ( github.com/Masterminds/semver/v3 v3.3.1 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 - github.com/databricks/databricks-sdk-go v0.54.0 // Apache 2.0 + github.com/databricks/databricks-sdk-go v0.55.0 // Apache 2.0 github.com/fatih/color v1.18.0 // MIT github.com/google/uuid v1.6.0 // BSD-3-Clause github.com/hashicorp/go-version v1.7.0 // MPL 2.0 diff --git a/go.sum b/go.sum index 2b9290b71..b1364cb26 100644 --- a/go.sum +++ b/go.sum @@ -32,8 +32,8 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cyphar/filepath-securejoin v0.2.5 h1:6iR5tXJ/e6tJZzzdMc1km3Sa7RRIVBKAK32O2s7AYfo= github.com/cyphar/filepath-securejoin v0.2.5/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/databricks/databricks-sdk-go v0.54.0 h1:L8gsA3NXs+uYU3QtW/OUgjxMQxOH24k0MT9JhB3zLlM= -github.com/databricks/databricks-sdk-go v0.54.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= +github.com/databricks/databricks-sdk-go v0.55.0 h1:ReziD6spzTDltM0ml80LggKo27F3oUjgTinCFDJDnak= +github.com/databricks/databricks-sdk-go v0.55.0/go.mod h1:JpLizplEs+up9/Z4Xf2x++o3sM9eTTWFGzIXAptKJzI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/integration/bundle/apps_test.go b/integration/bundle/apps_test.go index f15d8aabc..23cd784be 100644 --- a/integration/bundle/apps_test.go +++ b/integration/bundle/apps_test.go @@ -6,8 +6,10 @@ import ( "testing" "github.com/databricks/cli/integration/internal/acc" + "github.com/databricks/cli/internal/testcli" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/testdiff" "github.com/databricks/databricks-sdk-go/service/apps" "github.com/google/uuid" "github.com/stretchr/testify/require" @@ -27,7 +29,7 @@ func TestDeployBundleWithApp(t *testing.T) { } uniqueId := uuid.New().String() - appId := "app-%s" + uuid.New().String()[0:8] + appId := "app-" + uuid.New().String()[0:8] nodeTypeId := testutil.GetCloud(t).NodeTypeID() instancePoolId := env.Get(ctx, "TEST_INSTANCE_POOL_ID") @@ -49,7 +51,31 @@ func TestDeployBundleWithApp(t *testing.T) { } }) - deployBundle(t, ctx, root) + ctx, replacements := testdiff.WithReplacementsMap(ctx) + replacements.Set(uniqueId, "$UNIQUE_PRJ") + + user, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + require.NotNil(t, user) + testdiff.PrepareReplacementsUser(t, replacements, *user) + testdiff.PrepareReplacementsWorkspaceClient(t, replacements, wt.W) + testdiff.PrepareReplacementsUUID(t, replacements) + testdiff.PrepareReplacementsNumber(t, replacements) + testdiff.PrepareReplacementsTemporaryDirectory(t, replacements) + + testutil.Chdir(t, root) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "validate"}, + testutil.TestData("testdata/apps/bundle_validate.txt"), + ) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "deploy", "--force-lock", "--auto-approve"}, + testutil.TestData("testdata/apps/bundle_deploy.txt"), + ) // App should exists after bundle deployment app, err := wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: appId}) diff --git a/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl b/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl index 4ebeb2655..4ea687cf1 100644 --- a/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl +++ b/integration/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl @@ -9,7 +9,6 @@ variables: description: The catalog the DLT pipeline should use. default: main - resources: pipelines: foo: @@ -19,6 +18,13 @@ resources: path: ./nb.sql development: true catalog: ${var.catalog} + target: ${resources.schemas.bar.id} + + schemas: + bar: + name: test-schema-{{.unique_id}} + catalog_name: ${var.catalog} + comment: This schema was created from DABs include: - "*.yml" diff --git a/integration/bundle/init_default_python_test.go b/integration/bundle/init_default_python_test.go index c93e6b50b..931660032 100644 --- a/integration/bundle/init_default_python_test.go +++ b/integration/bundle/init_default_python_test.go @@ -58,7 +58,10 @@ func testDefaultPython(t *testing.T, pythonVersion string) { require.NoError(t, err) require.NotNil(t, user) testdiff.PrepareReplacementsUser(t, replacements, *user) - testdiff.PrepareReplacements(t, replacements, wt.W) + testdiff.PrepareReplacementsWorkspaceClient(t, replacements, wt.W) + testdiff.PrepareReplacementsUUID(t, replacements) + testdiff.PrepareReplacementsNumber(t, replacements) + testdiff.PrepareReplacementsTemporaryDirectory(t, replacements) tmpDir := t.TempDir() testutil.Chdir(t, tmpDir) diff --git a/integration/bundle/testdata/apps/bundle_deploy.txt b/integration/bundle/testdata/apps/bundle_deploy.txt new file mode 100644 index 000000000..b077f327d --- /dev/null +++ b/integration/bundle/testdata/apps/bundle_deploy.txt @@ -0,0 +1,5 @@ +Uploading bundle files to /Workspace/Users/$USERNAME/.bundle/$UNIQUE_PRJ/files... +Note: Databricks apps included in this bundle may increase initial deployment time due to compute provisioning. +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/integration/bundle/testdata/apps/bundle_validate.txt b/integration/bundle/testdata/apps/bundle_validate.txt new file mode 100644 index 000000000..dc9016a0f --- /dev/null +++ b/integration/bundle/testdata/apps/bundle_validate.txt @@ -0,0 +1,7 @@ +Name: basic +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/$UNIQUE_PRJ + +Validation OK! diff --git a/integration/bundle/testdata/default_python/bundle_init.txt b/integration/bundle/testdata/default_python/bundle_init.txt index 6cfc32f98..c2917ea4e 100644 --- a/integration/bundle/testdata/default_python/bundle_init.txt +++ b/integration/bundle/testdata/default_python/bundle_init.txt @@ -1,6 +1,6 @@ Welcome to the default Python template for Databricks Asset Bundles! -Workspace to use (auto-detected, edit in 'project_name_$UNIQUE_PRJ/databricks.yml'): https://$DATABRICKS_HOST +Workspace to use (auto-detected, edit in 'project_name_$UNIQUE_PRJ/databricks.yml'): $DATABRICKS_URL ✨ Your new project has been created in the 'project_name_$UNIQUE_PRJ' directory! diff --git a/integration/bundle/testdata/default_python/bundle_summary.txt b/integration/bundle/testdata/default_python/bundle_summary.txt index a0bcfdbc8..318cd2543 100644 --- a/integration/bundle/testdata/default_python/bundle_summary.txt +++ b/integration/bundle/testdata/default_python/bundle_summary.txt @@ -23,7 +23,7 @@ "resources/project_name_$UNIQUE_PRJ.pipeline.yml" ], "workspace": { - "host": "https://$DATABRICKS_HOST", + "host": "$DATABRICKS_URL", "current_user": { "active": true, "displayName": "$USERNAME", @@ -141,7 +141,7 @@ "unit": "DAYS" } }, - "url": "https://$DATABRICKS_HOST/jobs/?o=" + "url": "$DATABRICKS_URL/jobs/?o=" } }, "pipelines": { @@ -165,7 +165,7 @@ ], "name": "[dev $USERNAME] project_name_$UNIQUE_PRJ_pipeline", "target": "project_name_$UNIQUE_PRJ_dev", - "url": "https://$DATABRICKS_HOST/pipelines/?o=" + "url": "$DATABRICKS_URL/pipelines/?o=" } } }, diff --git a/integration/bundle/testdata/default_python/bundle_validate.txt b/integration/bundle/testdata/default_python/bundle_validate.txt index 88a5fdd18..578fd6494 100644 --- a/integration/bundle/testdata/default_python/bundle_validate.txt +++ b/integration/bundle/testdata/default_python/bundle_validate.txt @@ -1,7 +1,7 @@ Name: project_name_$UNIQUE_PRJ Target: dev Workspace: - Host: https://$DATABRICKS_HOST + Host: $DATABRICKS_URL User: $USERNAME Path: /Workspace/Users/$USERNAME/.bundle/project_name_$UNIQUE_PRJ/dev diff --git a/integration/cmd/fs/cat_test.go b/integration/cmd/fs/cat_test.go index 3e964fe6e..14ec8140e 100644 --- a/integration/cmd/fs/cat_test.go +++ b/integration/cmd/fs/cat_test.go @@ -18,13 +18,11 @@ func TestFsCat(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Write(context.Background(), "hello.txt", strings.NewReader("abcd"), filer.CreateParentDirectories) require.NoError(t, err) @@ -40,13 +38,11 @@ func TestFsCatOnADir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "dir1") require.NoError(t, err) @@ -61,13 +57,11 @@ func TestFsCatOnNonExistentFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "cat", path.Join(tmpDir, "non-existent-file")) assert.ErrorIs(t, err, fs.ErrNotExist) diff --git a/integration/cmd/fs/cp_test.go b/integration/cmd/fs/cp_test.go index 76aef7acf..6d0266555 100644 --- a/integration/cmd/fs/cp_test.go +++ b/integration/cmd/fs/cp_test.go @@ -126,14 +126,12 @@ func TestFsCpDir(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", sourceDir, targetDir, "--recursive") @@ -147,14 +145,12 @@ func TestFsCpFileToFile(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceFile(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", path.Join(sourceDir, "foo.txt"), path.Join(targetDir, "bar.txt")) @@ -168,14 +164,12 @@ func TestFsCpFileToDir(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceFile(t, context.Background(), sourceFiler) testcli.RequireSuccessfulRun(t, ctx, "fs", "cp", path.Join(sourceDir, "foo.txt"), targetDir) @@ -205,14 +199,12 @@ func TestFsCpDirToDirFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -231,14 +223,12 @@ func TestFsCpFileToDirFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -255,14 +245,12 @@ func TestFsCpFileToFileFileNotOverwritten(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -279,14 +267,12 @@ func TestFsCpDirToDirWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -303,14 +289,12 @@ func TestFsCpFileToFileWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -327,14 +311,12 @@ func TestFsCpFileToDirWithOverwriteFlag(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target @@ -351,13 +333,11 @@ func TestFsCpErrorsWhenSourceIsDirWithoutRecursiveFlag(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "cp", path.Join(tmpDir), path.Join(tmpDir, "foobar")) r := regexp.MustCompile("source path .* is a directory. Please specify the --recursive flag") @@ -376,14 +356,12 @@ func TestFsCpSourceIsDirectoryButTargetIsFile(t *testing.T) { t.Parallel() for _, testCase := range copyTests() { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - sourceFiler, sourceDir := tc.setupSource(t) - targetFiler, targetDir := tc.setupTarget(t) + sourceFiler, sourceDir := testCase.setupSource(t) + targetFiler, targetDir := testCase.setupTarget(t) setupSourceDir(t, context.Background(), sourceFiler) // Write a conflicting file to target diff --git a/integration/cmd/fs/ls_test.go b/integration/cmd/fs/ls_test.go index 25929fdf3..0f53193bf 100644 --- a/integration/cmd/fs/ls_test.go +++ b/integration/cmd/fs/ls_test.go @@ -43,13 +43,11 @@ func TestFsLs(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json") @@ -77,13 +75,11 @@ func TestFsLsWithAbsolutePaths(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json", "--absolute") @@ -111,13 +107,11 @@ func TestFsLsOnFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) setupLsFiles(t, f) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "ls", path.Join(tmpDir, "a", "hello.txt"), "--output=json") @@ -131,13 +125,11 @@ func TestFsLsOnEmptyDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "ls", tmpDir, "--output=json") assert.Equal(t, "", stderr.String()) @@ -155,13 +147,11 @@ func TestFsLsForNonexistingDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "ls", path.Join(tmpDir, "nonexistent"), "--output=json") assert.ErrorIs(t, err, fs.ErrNotExist) diff --git a/integration/cmd/fs/mkdir_test.go b/integration/cmd/fs/mkdir_test.go index eff0599a7..5cea0599c 100644 --- a/integration/cmd/fs/mkdir_test.go +++ b/integration/cmd/fs/mkdir_test.go @@ -17,13 +17,11 @@ func TestFsMkdir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a" stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "mkdir", path.Join(tmpDir, "a")) @@ -43,13 +41,11 @@ func TestFsMkdirCreatesIntermediateDirectories(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a/b/c" stdout, stderr := testcli.RequireSuccessfulRun(t, ctx, "fs", "mkdir", path.Join(tmpDir, "a", "b", "c")) @@ -81,13 +77,11 @@ func TestFsMkdirWhenDirectoryAlreadyExists(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // create directory "a" err := f.Mkdir(context.Background(), "a") diff --git a/integration/cmd/fs/rm_test.go b/integration/cmd/fs/rm_test.go index 018c7920e..fc19bb5b5 100644 --- a/integration/cmd/fs/rm_test.go +++ b/integration/cmd/fs/rm_test.go @@ -17,14 +17,12 @@ func TestFsRmFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a file ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Write(context.Background(), "hello.txt", strings.NewReader("abcd"), filer.CreateParentDirectories) require.NoError(t, err) @@ -48,14 +46,12 @@ func TestFsRmEmptyDir(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a directory ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "a") require.NoError(t, err) @@ -79,14 +75,12 @@ func TestFsRmNonEmptyDirectory(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() // Create a directory ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) err := f.Mkdir(context.Background(), "a") require.NoError(t, err) @@ -110,13 +104,11 @@ func TestFsRmForNonExistentFile(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - _, tmpDir := tc.setupFiler(t) + _, tmpDir := testCase.setupFiler(t) // Expect error if file does not exist _, _, err := testcli.RequireErrorRun(t, ctx, "fs", "rm", path.Join(tmpDir, "does-not-exist")) @@ -129,13 +121,11 @@ func TestFsRmDirRecursively(t *testing.T) { t.Parallel() for _, testCase := range fsTests { - tc := testCase - - t.Run(tc.name, func(t *testing.T) { + t.Run(testCase.name, func(t *testing.T) { t.Parallel() ctx := context.Background() - f, tmpDir := tc.setupFiler(t) + f, tmpDir := testCase.setupFiler(t) // Create a directory err := f.Mkdir(context.Background(), "a") diff --git a/integration/libs/filer/filer_test.go b/integration/libs/filer/filer_test.go index 21c839e1b..bc1713b30 100644 --- a/integration/libs/filer/filer_test.go +++ b/integration/libs/filer/filer_test.go @@ -128,11 +128,9 @@ func TestFilerRecursiveDelete(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() // Common tests we run across all filers to ensure consistent behavior. @@ -239,11 +237,9 @@ func TestFilerReadWrite(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() // Common tests we run across all filers to ensure consistent behavior. @@ -348,11 +344,9 @@ func TestFilerReadDir(t *testing.T) { {"files", setupUcVolumesFiler}, {"workspace files extensions", setupWsfsExtensionsFiler}, } { - tc := testCase - t.Run(testCase.name, func(t *testing.T) { t.Parallel() - f, _ := tc.f(t) + f, _ := testCase.f(t) ctx := context.Background() commonFilerReadDirTest(t, ctx, f) diff --git a/integration/libs/locker/locker_test.go b/integration/libs/locker/locker_test.go index 524996465..93cb1ffce 100644 --- a/integration/libs/locker/locker_test.go +++ b/integration/libs/locker/locker_test.go @@ -66,9 +66,8 @@ func TestLock(t *testing.T) { } var wg sync.WaitGroup - for i := range numConcurrentLocks { + for currentIndex := range numConcurrentLocks { wg.Add(1) - currentIndex := i go func() { defer wg.Done() time.Sleep(time.Duration(rand.Intn(100)) * time.Millisecond) diff --git a/internal/testcli/runner.go b/internal/testcli/runner.go index d32fa3947..f462f44fc 100644 --- a/internal/testcli/runner.go +++ b/internal/testcli/runner.go @@ -39,6 +39,8 @@ type Runner struct { StderrLines <-chan string errch <-chan error + + Verbose bool } func consumeLines(ctx context.Context, wg *sync.WaitGroup, r io.Reader) <-chan string { @@ -139,7 +141,9 @@ func (r *Runner) RunBackground() { go func() { err := root.Execute(ctx, cli) if err != nil { - r.Logf("Error running command: %s", err) + if r.Verbose { + r.Logf("Error running command: %s", err) + } } // Close pipes to signal EOF. @@ -154,7 +158,9 @@ func (r *Runner) RunBackground() { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(r.stdout.Bytes())) for scanner.Scan() { - r.Logf("[databricks stdout]: %s", scanner.Text()) + if r.Verbose { + r.Logf("[databricks stdout]: %s", scanner.Text()) + } } } @@ -162,7 +168,9 @@ func (r *Runner) RunBackground() { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(r.stderr.Bytes())) for scanner.Scan() { - r.Logf("[databricks stderr]: %s", scanner.Text()) + if r.Verbose { + r.Logf("[databricks stderr]: %s", scanner.Text()) + } } } @@ -196,18 +204,24 @@ func (r *Runner) Run() (bytes.Buffer, bytes.Buffer, error) { cli.SetErr(&stderr) cli.SetArgs(r.args) - r.Logf(" args: %s", strings.Join(r.args, ", ")) + if r.Verbose { + r.Logf(" args: %s", strings.Join(r.args, ", ")) + } err := root.Execute(ctx, cli) if err != nil { - r.Logf(" error: %s", err) + if r.Verbose { + r.Logf(" error: %s", err) + } } if stdout.Len() > 0 { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(stdout.Bytes())) for scanner.Scan() { - r.Logf("stdout: %s", scanner.Text()) + if r.Verbose { + r.Logf("stdout: %s", scanner.Text()) + } } } @@ -215,7 +229,9 @@ func (r *Runner) Run() (bytes.Buffer, bytes.Buffer, error) { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(stderr.Bytes())) for scanner.Scan() { - r.Logf("stderr: %s", scanner.Text()) + if r.Verbose { + r.Logf("stderr: %s", scanner.Text()) + } } } @@ -275,8 +291,9 @@ func NewRunner(t testutil.TestingT, ctx context.Context, args ...string) *Runner return &Runner{ TestingT: t, - ctx: ctx, - args: args, + ctx: ctx, + args: args, + Verbose: true, } } diff --git a/libs/cmdio/logger.go b/libs/cmdio/logger.go index 7edad5bf0..48b76ce42 100644 --- a/libs/cmdio/logger.go +++ b/libs/cmdio/logger.go @@ -189,7 +189,7 @@ func (l *Logger) writeJson(event Event) { // we panic because there we cannot catch this in jobs.RunNowAndWait panic(err) } - _, _ = l.Writer.Write([]byte(b)) + _, _ = l.Writer.Write(b) _, _ = l.Writer.Write([]byte("\n")) } diff --git a/libs/dyn/value_underlying.go b/libs/dyn/value_underlying.go index 0a867375d..a33ecd38e 100644 --- a/libs/dyn/value_underlying.go +++ b/libs/dyn/value_underlying.go @@ -81,7 +81,7 @@ func (v Value) AsInt() (int64, bool) { case int32: return int64(vv), true case int64: - return int64(vv), true + return vv, true default: return 0, false } diff --git a/libs/exec/exec_test.go b/libs/exec/exec_test.go index c363c1f7c..f245f9dd1 100644 --- a/libs/exec/exec_test.go +++ b/libs/exec/exec_test.go @@ -85,7 +85,7 @@ func testExecutorWithShell(t *testing.T, shell string) { // Create temporary directory with only the shell executable in the PATH. tmpDir := t.TempDir() - t.Setenv("PATH", tmpDir) + t.Setenv("PATH", fmt.Sprintf("%s%c%s", tmpDir, os.PathListSeparator, os.Getenv("PATH"))) if runtime.GOOS == "windows" { err = os.Symlink(p, fmt.Sprintf("%s/%s.exe", tmpDir, shell)) require.NoError(t, err) diff --git a/libs/filer/files_client.go b/libs/filer/files_client.go index 98a534684..88bbadd32 100644 --- a/libs/filer/files_client.go +++ b/libs/filer/files_client.go @@ -303,8 +303,6 @@ func (w *FilesClient) recursiveDelete(ctx context.Context, name string) error { group.SetLimit(maxFilesRequestsInFlight) for _, file := range filesToDelete { - file := file - // Skip the file if the context has already been cancelled. select { case <-groupCtx.Done(): diff --git a/libs/notebook/detect.go b/libs/notebook/detect.go index 40c850945..579cc1de3 100644 --- a/libs/notebook/detect.go +++ b/libs/notebook/detect.go @@ -47,7 +47,7 @@ func (f file) close() error { func (f file) readHeader() (string, error) { // Scan header line with some padding. buf := make([]byte, headerLength) - n, err := f.f.Read([]byte(buf)) + n, err := f.f.Read(buf) if err != nil && err != io.EOF { return "", err } diff --git a/libs/notebook/testdata/.ruff.toml b/libs/notebook/testdata/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/libs/notebook/testdata/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/libs/sync/testdata/.ruff.toml b/libs/sync/testdata/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/libs/sync/testdata/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl index 42164dff0..d3e9beef3 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl @@ -29,7 +29,8 @@ "source": [ {{- if (eq .include_python "yes") }} "import sys\n", - "sys.path.append('../src')\n", + "\n", + "sys.path.append(\"../src\")\n", "from {{.project_name}} import main\n", "\n", "main.get_taxis(spark).show(10)" diff --git a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl index a0852c725..e3b70c605 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl @@ -5,28 +5,32 @@ This file is primarily used by the setuptools library and typically should not be executed directly. See README.md for how to deploy, test, and run the {{.project_name}} project. """ + from setuptools import setup, find_packages import sys -sys.path.append('./src') + +sys.path.append("./src") import datetime import {{.project_name}} +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + setup( name="{{.project_name}}", # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) # to ensure that changes to wheel package are picked up when used on all-purpose clusters - version={{.project_name}}.__version__ + "+" + datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"), + version={{.project_name}}.__version__ + "+" + local_version, url="https://databricks.com", author="{{user_name}}", description="wheel file based on {{.project_name}}/src", - packages=find_packages(where='./src'), - package_dir={'': 'src'}, + packages=find_packages(where="./src"), + package_dir={"": "src"}, entry_points={ "packages": [ - "main={{.project_name}}.main:main" - ] + "main={{.project_name}}.main:main", + ], }, install_requires=[ # Dependencies in case the output wheel file is used as a library dependency. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl index 253ed321c..d0286639f 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -35,6 +35,7 @@ "# Import DLT and src/{{.project_name}}\n", "import dlt\n", "import sys\n", + "\n", "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", "from pyspark.sql.functions import expr\n", "from {{.project_name}} import main" @@ -63,17 +64,18 @@ {{- if (eq .include_python "yes") }} "@dlt.view\n", "def taxi_raw():\n", - " return main.get_taxis(spark)\n", + " return main.get_taxis(spark)\n", {{else}} "\n", "@dlt.view\n", "def taxi_raw():\n", - " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", + " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", {{end -}} "\n", + "\n", "@dlt.table\n", "def filtered_taxis():\n", - " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" ] } ], diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl index c514c6dc5..5ae344c7e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -1,21 +1,25 @@ from pyspark.sql import SparkSession, DataFrame + def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") + return spark.read.table("samples.nyctaxi.trips") # Create a new Databricks Connect session. If this fails, # check that you have configured Databricks Connect correctly. # See https://docs.databricks.com/dev-tools/databricks-connect.html. def get_spark() -> SparkSession: - try: - from databricks.connect import DatabricksSession - return DatabricksSession.builder.getOrCreate() - except ImportError: - return SparkSession.builder.getOrCreate() + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + def main(): - get_taxis(get_spark()).show(5) + get_taxis(get_spark()).show(5) -if __name__ == '__main__': - main() + +if __name__ == "__main__": + main() diff --git a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json new file mode 100644 index 000000000..00d59af5f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json @@ -0,0 +1,28 @@ +{ + "welcome_message": "\nWelcome to (EXPERIMENTAL) \"Jobs as code\" template for Databricks Asset Bundles!", + "properties": { + "project_name": { + "type": "string", + "default": "jobs_as_code_project", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project", + "order": 1, + "pattern": "^[A-Za-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." + }, + "include_notebook": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", + "order": 2 + }, + "include_python": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a stub (sample) Python package in '{{.project_name}}/src'", + "order": 3 + } + }, + "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." +} diff --git a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl new file mode 100644 index 000000000..7d0c88e7d --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl @@ -0,0 +1,7 @@ +{{define "latest_lts_dbr_version" -}} + 15.4.x-scala2.12 +{{- end}} + +{{define "latest_lts_db_connect_version_spec" -}} + >=15.4,<15.5 +{{- end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl new file mode 100644 index 000000000..2f8e8ae3e --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl @@ -0,0 +1,30 @@ +# Preamble + +This file only template directives; it is skipped for the actual output. + +{{skip "__preamble"}} + +# TODO add DLT support, placeholder for now +{{$notDLT := true }} +{{$notNotebook := not (eq .include_notebook "yes")}} +{{$notPython := not (eq .include_python "yes")}} + +{{if $notPython}} + {{skip "{{.project_name}}/src/{{.project_name}}"}} + {{skip "{{.project_name}}/tests/main_test.py"}} +{{end}} + +{{if $notDLT}} + {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.py"}} +{{end}} + +{{if $notNotebook}} + {{skip "{{.project_name}}/src/notebook.ipynb"}} +{{end}} + +{{if (and $notDLT $notNotebook $notPython)}} + {{skip "{{.project_name}}/resources/{{.project_name}}_job.py"}} +{{else}} + {{skip "{{.project_name}}/resources/.gitkeep"}} +{{end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl new file mode 100644 index 000000000..497ce3723 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl @@ -0,0 +1,60 @@ +# {{.project_name}} + +The '{{.project_name}}' project was generated by using the "Jobs as code" template. + +## Prerequisites + +1. Install Databricks CLI 0.238 or later. + See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). + +2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). + We use uv to create a virtual environment and install the required dependencies. + +3. Authenticate to your Databricks workspace if you have not done so already: + ``` + $ databricks configure + ``` + +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. + {{- if (eq .include_python "yes") }} Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + {{- end}} + +5. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. + +## Deploy and run jobs + +1. Create a new virtual environment and install the required dependencies: + ``` + $ uv sync + ``` + +2. To deploy the bundle to the development target: + ``` + $ databricks bundle deploy --target dev + ``` + + *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] {{.project_name}}_job` to your workspace. + You can find that job by opening your workspace and clicking on **Workflows**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/{{.project_name}}_job.py). The schedule + is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). + +4. To run a job: + ``` + $ databricks bundle run + ``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 000000000..758ec3f16 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,51 @@ +# This is a Databricks asset bundle definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: {{.project_name}} + uuid: {{bundle_uuid}} + databricks_cli_version: ">= 0.238.0" + +experimental: + python: + # Activate virtual environment before loading resources defined in Python. + # If disabled, defaults to using the Python interpreter available in the current shell. + venv_path: .venv + # Functions called to load resources defined in Python. See resources/__init__.py + resources: + - "resources:load_resources" + +{{ if .include_python -}} +artifacts: + default: + type: whl + path: . + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build + +{{ end -}} +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: {{workspace_host}} + + prod: + mode: production + workspace: + host: {{workspace_host}} + # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE + run_as: + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl new file mode 100644 index 000000000..ee9570302 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl @@ -0,0 +1,27 @@ +# Fixtures +{{- /* +We don't want to have too many README.md files, since they +stand out so much. But we do need to have a file here to make +sure the folder is added to Git. +*/}} + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl new file mode 100644 index 000000000..cee0d8946 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl @@ -0,0 +1,57 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "{{.project_name}}" +requires-python = ">=3.10" +description = "wheel file based on {{.project_name}}" + +# Dependencies in case the output wheel file is used as a library dependency. +# For defining dependencies, when this package is used in Databricks, see: +# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +# +# Example: +# dependencies = [ +# "requests==x.y.z", +# ] +dependencies = [ +] + +# see setup.py +dynamic = ["version"] + +{{ if .include_python -}} +[project.entry-points.packages] +main = "{{.project_name}}.main:main" + +{{ end -}} + +[tool.setuptools] +{{ if .include_python -}} +py-modules = ["resources", "{{.project_name}}"] + +{{ else }} +py-modules = ["resources"] + +{{ end -}} +[tool.uv] +## Dependencies for local development +dev-dependencies = [ + "databricks-bundles==0.7.0", + + ## Add code completion support for DLT + # "databricks-dlt", + + ## databricks-connect can be used to run parts of this project locally. + ## See https://docs.databricks.com/dev-tools/databricks-connect.html. + ## + ## Uncomment line below to install a version of db-connect that corresponds to + ## the Databricks Runtime version used for this project. + # "databricks-connect{{template "latest_lts_db_connect_version_spec"}}", +] + +override-dependencies = [ + # pyspark package conflicts with 'databricks-connect' + "pyspark; sys_platform == 'never'", +] diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py new file mode 100644 index 000000000..fbcb9dc5f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import ( + Bundle, + Resources, + load_resources_from_current_package_module, +) + + +def load_resources(bundle: Bundle) -> Resources: + """ + 'load_resources' function is referenced in databricks.yml and is responsible for loading + bundle resources defined in Python code. This function is called by Databricks CLI during + bundle deployment. After deployment, this function is not used. + """ + + # the default implementation loads all Python files in 'resources' directory + return load_resources_from_current_package_module() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl new file mode 100644 index 000000000..7c7a0d33f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl @@ -0,0 +1,108 @@ +{{$include_dlt := "no" -}} +from databricks.bundles.jobs import Job + +""" +The main job for {{.project_name}}. + +{{- /* Clarify what this job is for for DLT-only users. */}} +{{if and (eq $include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} +This job runs {{.project_name}}_pipeline on a schedule. +{{end -}} +""" + + +{{.project_name}}_job = Job.from_dict( + { + "name": "{{.project_name}}_job", + "trigger": { + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + "periodic": { + "interval": 1, + "unit": "DAYS", + }, + }, + {{- if not is_service_principal}} + "email_notifications": { + "on_failure": [ + "{{user_name}}", + ], + }, + {{else}} + {{- end -}} + "tasks": [ + {{- if eq .include_notebook "yes" -}} + {{- "\n " -}} + { + "task_key": "notebook_task", + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "src/notebook.ipynb", + }, + }, + {{- end -}} + {{- if (eq $include_dlt "yes") -}} + {{- "\n " -}} + { + "task_key": "refresh_pipeline", + {{- if (eq .include_notebook "yes" )}} + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + {{- end}} + "pipeline_task": { + {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}} + "pipeline_id": "${resources.pipelines.{{.project_name}}_pipeline.id}", + }, + }, + {{- end -}} + {{- if (eq .include_python "yes") -}} + {{- "\n " -}} + { + "task_key": "main_task", + {{- if (eq $include_dlt "yes") }} + "depends_on": [ + { + "task_key": "refresh_pipeline", + }, + ], + {{- else if (eq .include_notebook "yes" )}} + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + {{- end}} + "job_cluster_key": "job_cluster", + "python_wheel_task": { + "package_name": "{{.project_name}}", + "entry_point": "main", + }, + "libraries": [ + # By default we just include the .whl file generated for the {{.project_name}} package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + { + "whl": "dist/*.whl", + }, + ], + }, + {{- end -}} + {{""}} + ], + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "spark_version": "{{template "latest_lts_dbr_version"}}", + "node_type_id": "{{smallest_node_type}}", + "autoscale": { + "min_workers": 1, + "max_workers": 4, + }, + }, + }, + ], + } +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl new file mode 100644 index 000000000..c8579ae65 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl @@ -0,0 +1,24 @@ +from databricks.bundles.pipelines import Pipeline + +{{.project_name}}_pipeline = Pipeline.from_dict( + { + "name": "{{.project_name}}_pipeline", + "target": "{{.project_name}}_${bundle.target}", + {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + "catalog": "catalog_name", + {{- else}} + "catalog": "{{default_catalog}}", + {{- end}} + "libraries": [ + { + "notebook": { + "path": "src/dlt_pipeline.ipynb", + }, + }, + ], + "configuration": { + "bundle.sourcePath": "${workspace.file_path}/src", + }, + } +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl new file mode 100644 index 000000000..19c9d0ebe --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl @@ -0,0 +1,18 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the {{.project_name}} project. +""" + +import os + +from setuptools import setup + +local_version = os.getenv("LOCAL_VERSION") +version = "0.0.1" + +setup( + version=f"{version}+{local_version}" if local_version else version, +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl new file mode 100644 index 000000000..629106dbf --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "# Import DLT and src/{{.project_name}}\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from {{.project_name}} import main" + {{else}} + "import dlt\n", + "from pyspark.sql.functions import expr\n", + "from pyspark.sql import SparkSession\n", + "\n", + "spark = SparkSession.builder.getOrCreate()" + {{end -}} + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + {{else}} + "@dlt.view\n", + "def taxi_raw():\n", + " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", + {{end -}} + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl new file mode 100644 index 000000000..6782a053b --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -0,0 +1,79 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "ee353e42-ff58-4955-9608-12865bd0950e", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "from {{.project_name}} import main\n", + "\n", + "main.get_taxis(spark).show(10)" + {{else}} + "spark.range(10)" + {{end -}} + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl new file mode 100644 index 000000000..e69de29bb diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl new file mode 100644 index 000000000..6f89fca53 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl @@ -0,0 +1,8 @@ +from {{.project_name}}.main import get_taxis, get_spark + +# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/libs/testdiff/context.go b/libs/testdiff/context.go new file mode 100644 index 000000000..7b6f5ff88 --- /dev/null +++ b/libs/testdiff/context.go @@ -0,0 +1,34 @@ +package testdiff + +import ( + "context" +) + +type key int + +const ( + replacementsMapKey = key(1) +) + +func WithReplacementsMap(ctx context.Context) (context.Context, *ReplacementsContext) { + value := ctx.Value(replacementsMapKey) + if value != nil { + if existingMap, ok := value.(*ReplacementsContext); ok { + return ctx, existingMap + } + } + + newMap := &ReplacementsContext{} + ctx = context.WithValue(ctx, replacementsMapKey, newMap) + return ctx, newMap +} + +func GetReplacementsMap(ctx context.Context) *ReplacementsContext { + value := ctx.Value(replacementsMapKey) + if value != nil { + if existingMap, ok := value.(*ReplacementsContext); ok { + return existingMap + } + } + return nil +} diff --git a/libs/testdiff/context_test.go b/libs/testdiff/context_test.go new file mode 100644 index 000000000..5a0191009 --- /dev/null +++ b/libs/testdiff/context_test.go @@ -0,0 +1,30 @@ +package testdiff + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGetReplacementsMap_Nil(t *testing.T) { + ctx := context.Background() + repls := GetReplacementsMap(ctx) + assert.Nil(t, repls) +} + +func TestGetReplacementsMap_NotNil(t *testing.T) { + ctx := context.Background() + ctx, _ = WithReplacementsMap(ctx) + repls := GetReplacementsMap(ctx) + assert.NotNil(t, repls) +} + +func TestWithReplacementsMap_UseExisting(t *testing.T) { + ctx := context.Background() + ctx, r1 := WithReplacementsMap(ctx) + ctx, r2 := WithReplacementsMap(ctx) + repls := GetReplacementsMap(ctx) + assert.Equal(t, r1, repls) + assert.Equal(t, r2, repls) +} diff --git a/libs/testdiff/golden.go b/libs/testdiff/golden.go index 08d1e9608..c1c51b6c5 100644 --- a/libs/testdiff/golden.go +++ b/libs/testdiff/golden.go @@ -3,17 +3,11 @@ package testdiff import ( "context" "flag" - "fmt" "os" - "regexp" - "slices" "strings" "testing" "github.com/databricks/cli/internal/testutil" - "github.com/databricks/cli/libs/iamutil" - "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" ) @@ -71,12 +65,6 @@ func AssertOutputJQ(t testutil.TestingT, ctx context.Context, out, outTitle, exp } } -var ( - uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`) - numIdRegex = regexp.MustCompile(`[0-9]{3,}`) - privatePathRegex = regexp.MustCompile(`(/tmp|/private)(/.*)/([a-zA-Z0-9]+)`) -) - func ReplaceOutput(t testutil.TestingT, ctx context.Context, out string) string { t.Helper() out = NormalizeNewlines(out) @@ -84,136 +72,7 @@ func ReplaceOutput(t testutil.TestingT, ctx context.Context, out string) string if replacements == nil { t.Fatal("WithReplacementsMap was not called") } - out = replacements.Replace(out) - out = uuidRegex.ReplaceAllString(out, "") - out = numIdRegex.ReplaceAllString(out, "") - out = privatePathRegex.ReplaceAllString(out, "/tmp/.../$3") - - return out -} - -type key int - -const ( - replacementsMapKey = key(1) -) - -type Replacement struct { - Old string - New string -} - -type ReplacementsContext struct { - Repls []Replacement -} - -func (r *ReplacementsContext) Replace(s string) string { - // QQQ Should probably only replace whole words - for _, repl := range r.Repls { - s = strings.ReplaceAll(s, repl.Old, repl.New) - } - return s -} - -func (r *ReplacementsContext) Set(old, new string) { - if old == "" || new == "" { - return - } - r.Repls = append(r.Repls, Replacement{Old: old, New: new}) -} - -func WithReplacementsMap(ctx context.Context) (context.Context, *ReplacementsContext) { - value := ctx.Value(replacementsMapKey) - if value != nil { - if existingMap, ok := value.(*ReplacementsContext); ok { - return ctx, existingMap - } - } - - newMap := &ReplacementsContext{} - ctx = context.WithValue(ctx, replacementsMapKey, newMap) - return ctx, newMap -} - -func GetReplacementsMap(ctx context.Context) *ReplacementsContext { - value := ctx.Value(replacementsMapKey) - if value != nil { - if existingMap, ok := value.(*ReplacementsContext); ok { - return existingMap - } - } - return nil -} - -func PrepareReplacements(t testutil.TestingT, r *ReplacementsContext, w *databricks.WorkspaceClient) { - t.Helper() - // in some clouds (gcp) w.Config.Host includes "https://" prefix in others it's really just a host (azure) - host := strings.TrimPrefix(strings.TrimPrefix(w.Config.Host, "http://"), "https://") - r.Set(host, "$DATABRICKS_HOST") - r.Set(w.Config.ClusterID, "$DATABRICKS_CLUSTER_ID") - r.Set(w.Config.WarehouseID, "$DATABRICKS_WAREHOUSE_ID") - r.Set(w.Config.ServerlessComputeID, "$DATABRICKS_SERVERLESS_COMPUTE_ID") - r.Set(w.Config.MetadataServiceURL, "$DATABRICKS_METADATA_SERVICE_URL") - r.Set(w.Config.AccountID, "$DATABRICKS_ACCOUNT_ID") - r.Set(w.Config.Token, "$DATABRICKS_TOKEN") - r.Set(w.Config.Username, "$DATABRICKS_USERNAME") - r.Set(w.Config.Password, "$DATABRICKS_PASSWORD") - r.Set(w.Config.Profile, "$DATABRICKS_CONFIG_PROFILE") - r.Set(w.Config.ConfigFile, "$DATABRICKS_CONFIG_FILE") - r.Set(w.Config.GoogleServiceAccount, "$DATABRICKS_GOOGLE_SERVICE_ACCOUNT") - r.Set(w.Config.GoogleCredentials, "$GOOGLE_CREDENTIALS") - r.Set(w.Config.AzureResourceID, "$DATABRICKS_AZURE_RESOURCE_ID") - r.Set(w.Config.AzureClientSecret, "$ARM_CLIENT_SECRET") - // r.Set(w.Config.AzureClientID, "$ARM_CLIENT_ID") - r.Set(w.Config.AzureClientID, "$USERNAME") - r.Set(w.Config.AzureTenantID, "$ARM_TENANT_ID") - r.Set(w.Config.ActionsIDTokenRequestURL, "$ACTIONS_ID_TOKEN_REQUEST_URL") - r.Set(w.Config.ActionsIDTokenRequestToken, "$ACTIONS_ID_TOKEN_REQUEST_TOKEN") - r.Set(w.Config.AzureEnvironment, "$ARM_ENVIRONMENT") - r.Set(w.Config.ClientID, "$DATABRICKS_CLIENT_ID") - r.Set(w.Config.ClientSecret, "$DATABRICKS_CLIENT_SECRET") - r.Set(w.Config.DatabricksCliPath, "$DATABRICKS_CLI_PATH") - // This is set to words like "path" that happen too frequently - // r.Set(w.Config.AuthType, "$DATABRICKS_AUTH_TYPE") -} - -func PrepareReplacementsUser(t testutil.TestingT, r *ReplacementsContext, u iam.User) { - t.Helper() - // There could be exact matches or overlap between different name fields, so sort them by length - // to ensure we match the largest one first and map them all to the same token - names := []string{ - u.DisplayName, - u.UserName, - iamutil.GetShortUserName(&u), - } - if u.Name != nil { - names = append(names, u.Name.FamilyName) - names = append(names, u.Name.GivenName) - } - for _, val := range u.Emails { - names = append(names, val.Value) - } - stableSortReverseLength(names) - - for _, name := range names { - r.Set(name, "$USERNAME") - } - - for ind, val := range u.Groups { - r.Set(val.Value, fmt.Sprintf("$USER.Groups[%d]", ind)) - } - - r.Set(u.Id, "$USER.Id") - - for ind, val := range u.Roles { - r.Set(val.Value, fmt.Sprintf("$USER.Roles[%d]", ind)) - } -} - -func stableSortReverseLength(strs []string) { - slices.SortStableFunc(strs, func(a, b string) int { - return len(b) - len(a) - }) + return replacements.Replace(out) } func NormalizeNewlines(input string) string { diff --git a/libs/testdiff/golden_test.go b/libs/testdiff/golden_test.go deleted file mode 100644 index 0fc32be21..000000000 --- a/libs/testdiff/golden_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package testdiff - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestSort(t *testing.T) { - input := []string{"a", "bc", "cd"} - stableSortReverseLength(input) - assert.Equal(t, []string{"bc", "cd", "a"}, input) -} diff --git a/libs/testdiff/replacement.go b/libs/testdiff/replacement.go new file mode 100644 index 000000000..1ab976109 --- /dev/null +++ b/libs/testdiff/replacement.go @@ -0,0 +1,155 @@ +package testdiff + +import ( + "encoding/json" + "fmt" + "regexp" + "strings" + + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/iamutil" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/iam" +) + +const ( + testerName = "$USERNAME" +) + +var ( + uuidRegex = regexp.MustCompile(`[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}`) + numIdRegex = regexp.MustCompile(`[0-9]{3,}`) + privatePathRegex = regexp.MustCompile(`(/tmp|/private)(/.*)/([a-zA-Z0-9]+)`) +) + +type Replacement struct { + Old *regexp.Regexp + New string +} + +type ReplacementsContext struct { + Repls []Replacement +} + +func (r *ReplacementsContext) Replace(s string) string { + // QQQ Should probably only replace whole words + for _, repl := range r.Repls { + s = repl.Old.ReplaceAllString(s, repl.New) + } + return s +} + +func (r *ReplacementsContext) append(pattern *regexp.Regexp, replacement string) { + r.Repls = append(r.Repls, Replacement{ + Old: pattern, + New: replacement, + }) +} + +func (r *ReplacementsContext) appendLiteral(old, new string) { + r.append( + // Transform the input strings such that they can be used as literal strings in regular expressions. + regexp.MustCompile(regexp.QuoteMeta(old)), + // Transform the replacement string such that `$` is interpreted as a literal dollar sign. + // For more information about how the replacement string is used, see [regexp.Regexp.Expand]. + strings.ReplaceAll(new, `$`, `$$`), + ) +} + +func (r *ReplacementsContext) Set(old, new string) { + if old == "" || new == "" { + return + } + + // Always include both verbatim and json version of replacement. + // This helps when the string in question contains \ or other chars that need to be quoted. + // In that case we cannot rely that json(old) == '"{old}"' and need to add it explicitly. + + encodedNew, err := json.Marshal(new) + if err == nil { + encodedOld, err := json.Marshal(old) + if err == nil { + r.appendLiteral(string(encodedOld), string(encodedNew)) + } + } + + r.appendLiteral(old, new) +} + +func PrepareReplacementsWorkspaceClient(t testutil.TestingT, r *ReplacementsContext, w *databricks.WorkspaceClient) { + t.Helper() + // in some clouds (gcp) w.Config.Host includes "https://" prefix in others it's really just a host (azure) + host := strings.TrimPrefix(strings.TrimPrefix(w.Config.Host, "http://"), "https://") + r.Set("https://"+host, "$DATABRICKS_URL") + r.Set("http://"+host, "$DATABRICKS_URL") + r.Set(host, "$DATABRICKS_HOST") + r.Set(w.Config.ClusterID, "$DATABRICKS_CLUSTER_ID") + r.Set(w.Config.WarehouseID, "$DATABRICKS_WAREHOUSE_ID") + r.Set(w.Config.ServerlessComputeID, "$DATABRICKS_SERVERLESS_COMPUTE_ID") + r.Set(w.Config.MetadataServiceURL, "$DATABRICKS_METADATA_SERVICE_URL") + r.Set(w.Config.AccountID, "$DATABRICKS_ACCOUNT_ID") + r.Set(w.Config.Token, "$DATABRICKS_TOKEN") + r.Set(w.Config.Username, "$DATABRICKS_USERNAME") + r.Set(w.Config.Password, "$DATABRICKS_PASSWORD") + r.Set(w.Config.Profile, "$DATABRICKS_CONFIG_PROFILE") + r.Set(w.Config.ConfigFile, "$DATABRICKS_CONFIG_FILE") + r.Set(w.Config.GoogleServiceAccount, "$DATABRICKS_GOOGLE_SERVICE_ACCOUNT") + r.Set(w.Config.GoogleCredentials, "$GOOGLE_CREDENTIALS") + r.Set(w.Config.AzureResourceID, "$DATABRICKS_AZURE_RESOURCE_ID") + r.Set(w.Config.AzureClientSecret, "$ARM_CLIENT_SECRET") + // r.Set(w.Config.AzureClientID, "$ARM_CLIENT_ID") + r.Set(w.Config.AzureClientID, testerName) + r.Set(w.Config.AzureTenantID, "$ARM_TENANT_ID") + r.Set(w.Config.ActionsIDTokenRequestURL, "$ACTIONS_ID_TOKEN_REQUEST_URL") + r.Set(w.Config.ActionsIDTokenRequestToken, "$ACTIONS_ID_TOKEN_REQUEST_TOKEN") + r.Set(w.Config.AzureEnvironment, "$ARM_ENVIRONMENT") + r.Set(w.Config.ClientID, "$DATABRICKS_CLIENT_ID") + r.Set(w.Config.ClientSecret, "$DATABRICKS_CLIENT_SECRET") + r.Set(w.Config.DatabricksCliPath, "$DATABRICKS_CLI_PATH") + // This is set to words like "path" that happen too frequently + // r.Set(w.Config.AuthType, "$DATABRICKS_AUTH_TYPE") +} + +func PrepareReplacementsUser(t testutil.TestingT, r *ReplacementsContext, u iam.User) { + t.Helper() + // There could be exact matches or overlap between different name fields, so sort them by length + // to ensure we match the largest one first and map them all to the same token + + r.Set(u.UserName, testerName) + r.Set(u.DisplayName, testerName) + if u.Name != nil { + r.Set(u.Name.FamilyName, testerName) + r.Set(u.Name.GivenName, testerName) + } + + for _, val := range u.Emails { + r.Set(val.Value, testerName) + } + + r.Set(iamutil.GetShortUserName(&u), testerName) + + for ind, val := range u.Groups { + r.Set(val.Value, fmt.Sprintf("$USER.Groups[%d]", ind)) + } + + r.Set(u.Id, "$USER.Id") + + for ind, val := range u.Roles { + r.Set(val.Value, fmt.Sprintf("$USER.Roles[%d]", ind)) + } +} + +func PrepareReplacementsUUID(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(uuidRegex, "") +} + +func PrepareReplacementsNumber(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(numIdRegex, "") +} + +func PrepareReplacementsTemporaryDirectory(t testutil.TestingT, r *ReplacementsContext) { + t.Helper() + r.append(privatePathRegex, "/tmp/.../$3") +} diff --git a/libs/testdiff/replacement_test.go b/libs/testdiff/replacement_test.go new file mode 100644 index 000000000..de247c03e --- /dev/null +++ b/libs/testdiff/replacement_test.go @@ -0,0 +1,46 @@ +package testdiff + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestReplacement_Literal(t *testing.T) { + var repls ReplacementsContext + + repls.Set(`foobar`, `[replacement]`) + assert.Equal(t, `[replacement]`, repls.Replace(`foobar`)) +} + +func TestReplacement_Encoded(t *testing.T) { + var repls ReplacementsContext + + repls.Set(`foo"bar`, `[replacement]`) + assert.Equal(t, `"[replacement]"`, repls.Replace(`"foo\"bar"`)) +} + +func TestReplacement_UUID(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsUUID(t, &repls) + + assert.Equal(t, "", repls.Replace("123e4567-e89b-12d3-a456-426614174000")) +} + +func TestReplacement_Number(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsNumber(t, &repls) + + assert.Equal(t, "12", repls.Replace("12")) + assert.Equal(t, "", repls.Replace("123")) +} + +func TestReplacement_TemporaryDirectory(t *testing.T) { + var repls ReplacementsContext + + PrepareReplacementsTemporaryDirectory(t, &repls) + + assert.Equal(t, "/tmp/.../tail", repls.Replace("/tmp/foo/bar/qux/tail")) +} diff --git a/libs/testdiff/testdiff.go b/libs/testdiff/testdiff.go index fef1d5ae2..f65adf7f7 100644 --- a/libs/testdiff/testdiff.go +++ b/libs/testdiff/testdiff.go @@ -17,18 +17,20 @@ func UnifiedDiff(filename1, filename2, s1, s2 string) string { return fmt.Sprint(gotextdiff.ToUnified(filename1, filename2, s1, edits)) } -func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out string) { +func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out string) bool { t.Helper() if len(out) < 1000 && len(expected) < 1000 { // This shows full strings + diff which could be useful when debugging newlines - assert.Equal(t, expected, out, "%s vs %s", filename1, filename2) + return assert.Equal(t, expected, out, "%s vs %s", filename1, filename2) } else { // only show diff for large texts diff := UnifiedDiff(filename1, filename2, expected, out) if diff != "" { - t.Errorf("Diff:\n" + diff) + t.Error("Diff:\n" + diff) + return false } } + return true } func AssertEqualJQ(t testutil.TestingT, expectedName, outName, expected, out string, ignorePaths []string) {