diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index d998224a4..d3363b7e3 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -60,6 +60,12 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@887a942a15af3a7626099df99e897a18d9e5ab3a # v5.1.0 + - name: Run ruff + uses: astral-sh/ruff-action@31a518504640beb4897d0b9f9e50a2a9196e75ba # v3.0.1 + with: + version: "0.9.1" + args: "format --check" + - name: Set go env run: | echo "GOPATH=$(go env GOPATH)" >> $GITHUB_ENV diff --git a/.gitignore b/.gitignore index 274c1341a..2060b6bac 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ __pycache__ .vscode/tasks.json .databricks +.ruff_cache diff --git a/.golangci.yaml b/.golangci.yaml index ea6d65db1..8a83135ee 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -22,6 +22,13 @@ linters-settings: disable: - fieldalignment - shadow + settings: + printf: + funcs: + - (github.com/databricks/cli/internal/testutil.TestingT).Infof + - (github.com/databricks/cli/internal/testutil.TestingT).Errorf + - (github.com/databricks/cli/internal/testutil.TestingT).Fatalf + - (github.com/databricks/cli/internal/testutil.TestingT).Skipf gofmt: rewrite-rules: - pattern: 'a[b:len(a)]' diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b59fa540..53392e5db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,49 @@ # Version changelog +## [Release] Release v0.239.0 + +### New feature announcement + +#### Databricks Apps support + +You can now manage Databricks Apps using DABs by defining an `app` resource in your bundle configuration. +For more information see Databricks documentation https://docs.databricks.com/en/dev-tools/bundles/resources.html#app + +#### Referencing complex variables in complex variables + +You can now reference complex variables within other complex variables. +For more details see https://github.com/databricks/cli/pull/2157 + +CLI: + * Filter out system clusters in cluster picker ([#2131](https://github.com/databricks/cli/pull/2131)). + * Add command line flags for fields that are not in the API request body ([#2155](https://github.com/databricks/cli/pull/2155)). + +Bundles: + * Added support for Databricks Apps in DABs ([#1928](https://github.com/databricks/cli/pull/1928)). + * Allow artifact path to be located outside the sync root ([#2128](https://github.com/databricks/cli/pull/2128)). + * Retry app deployment if there is an active deployment in progress ([#2153](https://github.com/databricks/cli/pull/2153)). + * Resolve variables in a loop ([#2164](https://github.com/databricks/cli/pull/2164)). + * Improve resolution of complex variables within complex variables ([#2157](https://github.com/databricks/cli/pull/2157)). + * Added output message to warn about slower deployments with apps ([#2161](https://github.com/databricks/cli/pull/2161)). + * Patch references to UC schemas to capture dependencies automatically ([#1989](https://github.com/databricks/cli/pull/1989)). + * Format default-python template ([#2110](https://github.com/databricks/cli/pull/2110)). + * Encourage the use of root_path in production to ensure single deployment ([#1712](https://github.com/databricks/cli/pull/1712)). + * Log warnings to stderr for "bundle validate -o json" ([#2109](https://github.com/databricks/cli/pull/2109)). + +API Changes: + * Changed `databricks account federation-policy update` command with new required argument order. + * Changed `databricks account service-principal-federation-policy update` command with new required argument order. + +OpenAPI commit 779817ed8d63031f5ea761fbd25ee84f38feec0d (2025-01-08) +Dependency updates: + * Upgrade TF provider to 1.63.0 ([#2162](https://github.com/databricks/cli/pull/2162)). + * Bump golangci-lint version to v1.63.4 from v1.63.1 ([#2114](https://github.com/databricks/cli/pull/2114)). + * Bump astral-sh/setup-uv from 4 to 5 ([#2116](https://github.com/databricks/cli/pull/2116)). + * Bump golang.org/x/oauth2 from 0.24.0 to 0.25.0 ([#2080](https://github.com/databricks/cli/pull/2080)). + * Bump github.com/hashicorp/hc-install from 0.9.0 to 0.9.1 ([#2079](https://github.com/databricks/cli/pull/2079)). + * Bump golang.org/x/term from 0.27.0 to 0.28.0 ([#2078](https://github.com/databricks/cli/pull/2078)). + * Bump github.com/databricks/databricks-sdk-go from 0.54.0 to 0.55.0 ([#2126](https://github.com/databricks/cli/pull/2126)). + ## [Release] Release v0.238.0 Bundles: diff --git a/Makefile b/Makefile index 37cf5567c..4b66c9254 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ lintcheck: # formatting/goimports will not be applied by 'make lint'. However, it will be applied by 'make fmt'. # If you need to ensure that formatting & imports are always fixed, do "make fmt lint" fmt: + ruff format -q golangci-lint run --enable-only="gofmt,gofumpt,goimports" --fix ./... test: @@ -44,7 +45,7 @@ snapshot: vendor: go mod vendor - + schema: go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index e6eb380a4..5f1181313 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "runtime" "slices" "sort" @@ -89,6 +90,7 @@ func TestAccept(t *testing.T) { require.NotNil(t, user) testdiff.PrepareReplacementsUser(t, &repls, *user) testdiff.PrepareReplacementsWorkspaceClient(t, &repls, workspaceClient) + testdiff.PrepareReplacementsUUID(t, &repls) testDirs := getTests(t) require.NotEmpty(t, testDirs) @@ -154,70 +156,86 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont require.NoError(t, err) cmd.Env = append(os.Environ(), "GOCOVERDIR="+coverDir) } + + // Write combined output to a file + out, err := os.Create(filepath.Join(tmpDir, "output.txt")) + require.NoError(t, err) + cmd.Stdout = out + cmd.Stderr = out cmd.Dir = tmpDir - outB, err := cmd.CombinedOutput() + err = cmd.Run() - out := formatOutput(string(outB), err) - out = repls.Replace(out) - doComparison(t, filepath.Join(dir, "output.txt"), "script output", out) + // Include exit code in output (if non-zero) + formatOutput(out, err) + require.NoError(t, out.Close()) - for key := range outputs { - if key == "output.txt" { - // handled above - continue - } - pathNew := filepath.Join(tmpDir, key) - newValBytes, err := os.ReadFile(pathNew) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - t.Errorf("%s: expected to find this file but could not (%s)", key, tmpDir) - } else { - t.Errorf("%s: could not read: %s", key, err) - } - continue - } - pathExpected := filepath.Join(dir, key) - newVal := repls.Replace(string(newValBytes)) - doComparison(t, pathExpected, pathNew, newVal) + // Compare expected outputs + for relPath := range outputs { + doComparison(t, repls, dir, tmpDir, relPath) } // Make sure there are not unaccounted for new files - files, err := os.ReadDir(tmpDir) + files, err := ListDir(t, tmpDir) require.NoError(t, err) - - for _, f := range files { - name := f.Name() - if _, ok := inputs[name]; ok { + for _, relPath := range files { + if _, ok := inputs[relPath]; ok { continue } - if _, ok := outputs[name]; ok { + if _, ok := outputs[relPath]; ok { continue } - t.Errorf("Unexpected output: %s", f) - if strings.HasPrefix(name, "out") { + if strings.HasPrefix(relPath, "out") { // We have a new file starting with "out" // Show the contents & support overwrite mode for it: - pathNew := filepath.Join(tmpDir, name) - newVal := testutil.ReadFile(t, pathNew) - newVal = repls.Replace(newVal) - doComparison(t, filepath.Join(dir, name), filepath.Join(tmpDir, name), newVal) + doComparison(t, repls, dir, tmpDir, relPath) } } } -func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) { - valueNew = testdiff.NormalizeNewlines(valueNew) - valueExpected := string(readIfExists(t, pathExpected)) - valueExpected = testdiff.NormalizeNewlines(valueExpected) - testdiff.AssertEqualTexts(t, pathExpected, pathNew, valueExpected, valueNew) - if testdiff.OverwriteMode { - if valueNew != "" { - t.Logf("Overwriting: %s", pathExpected) - testutil.WriteFile(t, pathExpected, valueNew) - } else { - t.Logf("Removing: %s", pathExpected) - _ = os.Remove(pathExpected) +func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirNew, relPath string) { + pathRef := filepath.Join(dirRef, relPath) + pathNew := filepath.Join(dirNew, relPath) + bufRef, okRef := readIfExists(t, pathRef) + bufNew, okNew := readIfExists(t, pathNew) + if !okRef && !okNew { + t.Errorf("Both files are missing: %s, %s", pathRef, pathNew) + return + } + + valueRef := testdiff.NormalizeNewlines(string(bufRef)) + valueNew := testdiff.NormalizeNewlines(string(bufNew)) + + // Apply replacements to the new value only. + // The reference value is stored after applying replacements. + valueNew = repls.Replace(valueNew) + + // The test did not produce an expected output file. + if okRef && !okNew { + t.Errorf("Missing output file: %s", relPath) + testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if testdiff.OverwriteMode { + t.Logf("Removing output file: %s", relPath) + require.NoError(t, os.Remove(pathRef)) } + return + } + + // The test produced an unexpected output file. + if !okRef && okNew { + t.Errorf("Unexpected output file: %s", relPath) + testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if testdiff.OverwriteMode { + t.Logf("Writing output file: %s", relPath) + testutil.WriteFile(t, pathRef, valueNew) + } + return + } + + // Compare the reference and new values. + equal := testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew) + if !equal && testdiff.OverwriteMode { + t.Logf("Overwriting existing output file: %s", relPath) + testutil.WriteFile(t, pathRef, valueNew) } } @@ -234,13 +252,13 @@ func readMergedScriptContents(t *testing.T, dir string) string { cleanups := []string{} for { - x := readIfExists(t, filepath.Join(dir, CleanupScript)) - if len(x) > 0 { + x, ok := readIfExists(t, filepath.Join(dir, CleanupScript)) + if ok { cleanups = append(cleanups, string(x)) } - x = readIfExists(t, filepath.Join(dir, PrepareScript)) - if len(x) > 0 { + x, ok = readIfExists(t, filepath.Join(dir, PrepareScript)) + if ok { prepares = append(prepares, string(x)) } @@ -316,29 +334,28 @@ func copyFile(src, dst string) error { return err } -func formatOutput(out string, err error) string { +func formatOutput(w io.Writer, err error) { if err == nil { - return out + return } if exiterr, ok := err.(*exec.ExitError); ok { exitCode := exiterr.ExitCode() - out += fmt.Sprintf("\nExit code: %d\n", exitCode) + fmt.Fprintf(w, "\nExit code: %d\n", exitCode) } else { - out += fmt.Sprintf("\nError: %s\n", err) + fmt.Fprintf(w, "\nError: %s\n", err) } - return out } -func readIfExists(t *testing.T, path string) []byte { +func readIfExists(t *testing.T, path string) ([]byte, bool) { data, err := os.ReadFile(path) if err == nil { - return data + return data, true } if !errors.Is(err, os.ErrNotExist) { t.Fatalf("%s: %s", path, err) } - return []byte{} + return []byte{}, false } func CopyDir(src, dst string, inputs, outputs map[string]bool) error { @@ -353,8 +370,10 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error { return err } - if strings.HasPrefix(name, "out") { - outputs[relPath] = true + if strings.HasPrefix(relPath, "out") { + if !info.IsDir() { + outputs[relPath] = true + } return nil } else { inputs[relPath] = true @@ -373,3 +392,47 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error { return copyFile(path, destPath) }) } + +func ListDir(t *testing.T, src string) ([]string, error) { + // exclude folders in .gitignore from comparison + ignored := []string{ + "\\.ruff_cache", + "\\.venv", + ".*\\.egg-info", + "__pycache__", + // depends on uv version + "uv.lock", + } + + var files []string + err := filepath.Walk(src, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + for _, ignoredFolder := range ignored { + if matched, _ := regexp.MatchString(ignoredFolder, info.Name()); matched { + return filepath.SkipDir + } + } + + return nil + } else { + for _, ignoredFolder := range ignored { + if matched, _ := regexp.MatchString(ignoredFolder, info.Name()); matched { + return nil + } + } + } + + relPath, err := filepath.Rel(src, path) + if err != nil { + return err + } + + files = append(files, relPath) + return nil + }) + return files, err +} diff --git a/acceptance/bin/sort_blocks.py b/acceptance/bin/sort_blocks.py index f50c6f50f..d558f252a 100755 --- a/acceptance/bin/sort_blocks.py +++ b/acceptance/bin/sort_blocks.py @@ -4,6 +4,7 @@ Helper to sort blocks in text file. A block is a set of lines separated from oth This is to workaround non-determinism in the output. """ + import sys blocks = [] @@ -11,10 +12,10 @@ blocks = [] for line in sys.stdin: if not line.strip(): if blocks and blocks[-1]: - blocks.append('') + blocks.append("") continue if not blocks: - blocks.append('') + blocks.append("") blocks[-1] += line blocks.sort() diff --git a/acceptance/bundle/help/bundle-deploy/output.txt b/acceptance/bundle/help/bundle-deploy/output.txt new file mode 100644 index 000000000..13c903f3e --- /dev/null +++ b/acceptance/bundle/help/bundle-deploy/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle deploy --help +Deploy bundle + +Usage: + databricks bundle deploy [flags] + +Flags: + --auto-approve Skip interactive approvals that might be required for deployment. + -c, --cluster-id string Override cluster in the deployment with the given cluster ID. + --fail-on-active-runs Fail if there are running jobs or pipelines in the deployment. + --force Force-override Git branch validation. + --force-lock Force acquisition of deployment lock. + -h, --help help for deploy + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-deploy/script b/acceptance/bundle/help/bundle-deploy/script new file mode 100644 index 000000000..6375cfea3 --- /dev/null +++ b/acceptance/bundle/help/bundle-deploy/script @@ -0,0 +1 @@ +trace $CLI bundle deploy --help diff --git a/acceptance/bundle/help/bundle-deployment/output.txt b/acceptance/bundle/help/bundle-deployment/output.txt new file mode 100644 index 000000000..ddf5b3305 --- /dev/null +++ b/acceptance/bundle/help/bundle-deployment/output.txt @@ -0,0 +1,22 @@ + +>>> $CLI bundle deployment --help +Deployment related commands + +Usage: + databricks bundle deployment [command] + +Available Commands: + bind Bind bundle-defined resources to existing resources + unbind Unbind bundle-defined resources from its managed remote resource + +Flags: + -h, --help help for deployment + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Use "databricks bundle deployment [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle-deployment/script b/acceptance/bundle/help/bundle-deployment/script new file mode 100644 index 000000000..ef93f7dc2 --- /dev/null +++ b/acceptance/bundle/help/bundle-deployment/script @@ -0,0 +1 @@ +trace $CLI bundle deployment --help diff --git a/acceptance/bundle/help/bundle-destroy/output.txt b/acceptance/bundle/help/bundle-destroy/output.txt new file mode 100644 index 000000000..d70164301 --- /dev/null +++ b/acceptance/bundle/help/bundle-destroy/output.txt @@ -0,0 +1,18 @@ + +>>> $CLI bundle destroy --help +Destroy deployed bundle resources + +Usage: + databricks bundle destroy [flags] + +Flags: + --auto-approve Skip interactive approvals for deleting resources and files + --force-lock Force acquisition of deployment lock. + -h, --help help for destroy + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-destroy/script b/acceptance/bundle/help/bundle-destroy/script new file mode 100644 index 000000000..955d7b7f9 --- /dev/null +++ b/acceptance/bundle/help/bundle-destroy/script @@ -0,0 +1 @@ +trace $CLI bundle destroy --help diff --git a/acceptance/bundle/help/bundle-generate-dashboard/output.txt b/acceptance/bundle/help/bundle-generate-dashboard/output.txt new file mode 100644 index 000000000..a63ce0ff8 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-dashboard/output.txt @@ -0,0 +1,24 @@ + +>>> $CLI bundle generate dashboard --help +Generate configuration for a dashboard + +Usage: + databricks bundle generate dashboard [flags] + +Flags: + -s, --dashboard-dir string directory to write the dashboard representation to (default "src") + --existing-id string ID of the dashboard to generate configuration for + --existing-path string workspace path of the dashboard to generate configuration for + -f, --force force overwrite existing files in the output directory + -h, --help help for dashboard + --resource string resource key of dashboard to watch for changes + -d, --resource-dir string directory to write the configuration to (default "resources") + --watch watch for changes to the dashboard and update the configuration + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-dashboard/script b/acceptance/bundle/help/bundle-generate-dashboard/script new file mode 100644 index 000000000..320156129 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-dashboard/script @@ -0,0 +1 @@ +trace $CLI bundle generate dashboard --help diff --git a/acceptance/bundle/help/bundle-generate-job/output.txt b/acceptance/bundle/help/bundle-generate-job/output.txt new file mode 100644 index 000000000..adc3f45ae --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-job/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle generate job --help +Generate bundle configuration for a job + +Usage: + databricks bundle generate job [flags] + +Flags: + -d, --config-dir string Dir path where the output config will be stored (default "resources") + --existing-job-id int Job ID of the job to generate config for + -f, --force Force overwrite existing files in the output directory + -h, --help help for job + -s, --source-dir string Dir path where the downloaded files will be stored (default "src") + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-job/script b/acceptance/bundle/help/bundle-generate-job/script new file mode 100644 index 000000000..109ed59aa --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-job/script @@ -0,0 +1 @@ +trace $CLI bundle generate job --help diff --git a/acceptance/bundle/help/bundle-generate-pipeline/output.txt b/acceptance/bundle/help/bundle-generate-pipeline/output.txt new file mode 100644 index 000000000..cf5f70920 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-pipeline/output.txt @@ -0,0 +1,21 @@ + +>>> $CLI bundle generate pipeline --help +Generate bundle configuration for a pipeline + +Usage: + databricks bundle generate pipeline [flags] + +Flags: + -d, --config-dir string Dir path where the output config will be stored (default "resources") + --existing-pipeline-id string ID of the pipeline to generate config for + -f, --force Force overwrite existing files in the output directory + -h, --help help for pipeline + -s, --source-dir string Dir path where the downloaded files will be stored (default "src") + +Global Flags: + --debug enable debug logging + --key string resource key to use for the generated configuration + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-generate-pipeline/script b/acceptance/bundle/help/bundle-generate-pipeline/script new file mode 100644 index 000000000..c6af62d0a --- /dev/null +++ b/acceptance/bundle/help/bundle-generate-pipeline/script @@ -0,0 +1 @@ +trace $CLI bundle generate pipeline --help diff --git a/acceptance/bundle/help/bundle-generate/output.txt b/acceptance/bundle/help/bundle-generate/output.txt new file mode 100644 index 000000000..1d77dfdbd --- /dev/null +++ b/acceptance/bundle/help/bundle-generate/output.txt @@ -0,0 +1,25 @@ + +>>> $CLI bundle generate --help +Generate bundle configuration + +Usage: + databricks bundle generate [command] + +Available Commands: + app Generate bundle configuration for a Databricks app + dashboard Generate configuration for a dashboard + job Generate bundle configuration for a job + pipeline Generate bundle configuration for a pipeline + +Flags: + -h, --help help for generate + --key string resource key to use for the generated configuration + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Use "databricks bundle generate [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle-generate/script b/acceptance/bundle/help/bundle-generate/script new file mode 100644 index 000000000..932588768 --- /dev/null +++ b/acceptance/bundle/help/bundle-generate/script @@ -0,0 +1 @@ +trace $CLI bundle generate --help diff --git a/acceptance/bundle/help/bundle-init/output.txt b/acceptance/bundle/help/bundle-init/output.txt new file mode 100644 index 000000000..bafe5a187 --- /dev/null +++ b/acceptance/bundle/help/bundle-init/output.txt @@ -0,0 +1,31 @@ + +>>> $CLI bundle init --help +Initialize using a bundle template. + +TEMPLATE_PATH optionally specifies which template to use. It can be one of the following: +- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows +- default-sql: The default SQL template for .sql files that run with Databricks SQL +- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) +- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks) +- a local file system path with a template directory +- a Git repository URL, e.g. https://github.com/my/repository + +See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates. + +Usage: + databricks bundle init [TEMPLATE_PATH] [flags] + +Flags: + --branch string Git branch to use for template initialization + --config-file string JSON file containing key value pairs of input parameters required for template initialization. + -h, --help help for init + --output-dir string Directory to write the initialized template to. + --tag string Git tag to use for template initialization + --template-dir string Directory path within a Git repository containing the template. + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-init/script b/acceptance/bundle/help/bundle-init/script new file mode 100644 index 000000000..9bcce7559 --- /dev/null +++ b/acceptance/bundle/help/bundle-init/script @@ -0,0 +1 @@ +trace $CLI bundle init --help diff --git a/acceptance/bundle/help/bundle-open/output.txt b/acceptance/bundle/help/bundle-open/output.txt new file mode 100644 index 000000000..8b98aa850 --- /dev/null +++ b/acceptance/bundle/help/bundle-open/output.txt @@ -0,0 +1,17 @@ + +>>> $CLI bundle open --help +Open a resource in the browser + +Usage: + databricks bundle open [flags] + +Flags: + --force-pull Skip local cache and load the state from the remote workspace + -h, --help help for open + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-open/script b/acceptance/bundle/help/bundle-open/script new file mode 100644 index 000000000..b4dfa2222 --- /dev/null +++ b/acceptance/bundle/help/bundle-open/script @@ -0,0 +1 @@ +trace $CLI bundle open --help diff --git a/acceptance/bundle/help/bundle-run/output.txt b/acceptance/bundle/help/bundle-run/output.txt new file mode 100644 index 000000000..17763a295 --- /dev/null +++ b/acceptance/bundle/help/bundle-run/output.txt @@ -0,0 +1,57 @@ + +>>> $CLI bundle run --help +Run the job or pipeline identified by KEY. + +The KEY is the unique identifier of the resource to run. In addition to +customizing the run using any of the available flags, you can also specify +keyword or positional arguments as shown in these examples: + + databricks bundle run my_job -- --key1 value1 --key2 value2 + +Or: + + databricks bundle run my_job -- value1 value2 value3 + +If the specified job uses job parameters or the job has a notebook task with +parameters, the first example applies and flag names are mapped to the +parameter names. + +If the specified job does not use job parameters and the job has a Python file +task or a Python wheel task, the second example applies. + +Usage: + databricks bundle run [flags] KEY + +Job Flags: + --params stringToString comma separated k=v pairs for job parameters (default []) + +Job Task Flags: + Note: please prefer use of job-level parameters (--param) over task-level parameters. + For more information, see https://docs.databricks.com/en/workflows/jobs/create-run-jobs.html#pass-parameters-to-a-databricks-job-task + --dbt-commands strings A list of commands to execute for jobs with DBT tasks. + --jar-params strings A list of parameters for jobs with Spark JAR tasks. + --notebook-params stringToString A map from keys to values for jobs with notebook tasks. (default []) + --pipeline-params stringToString A map from keys to values for jobs with pipeline tasks. (default []) + --python-named-params stringToString A map from keys to values for jobs with Python wheel tasks. (default []) + --python-params strings A list of parameters for jobs with Python tasks. + --spark-submit-params strings A list of parameters for jobs with Spark submit tasks. + --sql-params stringToString A map from keys to values for jobs with SQL tasks. (default []) + +Pipeline Flags: + --full-refresh strings List of tables to reset and recompute. + --full-refresh-all Perform a full graph reset and recompute. + --refresh strings List of tables to update. + --refresh-all Perform a full graph update. + --validate-only Perform an update to validate graph correctness. + +Flags: + -h, --help help for run + --no-wait Don't wait for the run to complete. + --restart Restart the run if it is already running. + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-run/script b/acceptance/bundle/help/bundle-run/script new file mode 100644 index 000000000..edcf1786a --- /dev/null +++ b/acceptance/bundle/help/bundle-run/script @@ -0,0 +1 @@ +trace $CLI bundle run --help diff --git a/acceptance/bundle/help/bundle-schema/output.txt b/acceptance/bundle/help/bundle-schema/output.txt new file mode 100644 index 000000000..8f2983f5b --- /dev/null +++ b/acceptance/bundle/help/bundle-schema/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle schema --help +Generate JSON Schema for bundle configuration + +Usage: + databricks bundle schema [flags] + +Flags: + -h, --help help for schema + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-schema/script b/acceptance/bundle/help/bundle-schema/script new file mode 100644 index 000000000..a084fec8e --- /dev/null +++ b/acceptance/bundle/help/bundle-schema/script @@ -0,0 +1 @@ +trace $CLI bundle schema --help diff --git a/acceptance/bundle/help/bundle-summary/output.txt b/acceptance/bundle/help/bundle-summary/output.txt new file mode 100644 index 000000000..935c4bdc5 --- /dev/null +++ b/acceptance/bundle/help/bundle-summary/output.txt @@ -0,0 +1,17 @@ + +>>> $CLI bundle summary --help +Summarize resources deployed by this bundle + +Usage: + databricks bundle summary [flags] + +Flags: + --force-pull Skip local cache and load the state from the remote workspace + -h, --help help for summary + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-summary/script b/acceptance/bundle/help/bundle-summary/script new file mode 100644 index 000000000..967279d86 --- /dev/null +++ b/acceptance/bundle/help/bundle-summary/script @@ -0,0 +1 @@ +trace $CLI bundle summary --help diff --git a/acceptance/bundle/help/bundle-sync/output.txt b/acceptance/bundle/help/bundle-sync/output.txt new file mode 100644 index 000000000..6588e6978 --- /dev/null +++ b/acceptance/bundle/help/bundle-sync/output.txt @@ -0,0 +1,19 @@ + +>>> $CLI bundle sync --help +Synchronize bundle tree to the workspace + +Usage: + databricks bundle sync [flags] + +Flags: + --full perform full synchronization (default is incremental) + -h, --help help for sync + --interval duration file system polling interval (for --watch) (default 1s) + --output type type of the output format + --watch watch local file system for changes + +Global Flags: + --debug enable debug logging + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-sync/script b/acceptance/bundle/help/bundle-sync/script new file mode 100644 index 000000000..fe1d6c7e3 --- /dev/null +++ b/acceptance/bundle/help/bundle-sync/script @@ -0,0 +1 @@ +trace $CLI bundle sync --help diff --git a/acceptance/bundle/help/bundle-validate/output.txt b/acceptance/bundle/help/bundle-validate/output.txt new file mode 100644 index 000000000..a0c350faf --- /dev/null +++ b/acceptance/bundle/help/bundle-validate/output.txt @@ -0,0 +1,16 @@ + +>>> $CLI bundle validate --help +Validate configuration + +Usage: + databricks bundle validate [flags] + +Flags: + -h, --help help for validate + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" diff --git a/acceptance/bundle/help/bundle-validate/script b/acceptance/bundle/help/bundle-validate/script new file mode 100644 index 000000000..8b8434b2d --- /dev/null +++ b/acceptance/bundle/help/bundle-validate/script @@ -0,0 +1 @@ +trace $CLI bundle validate --help diff --git a/acceptance/bundle/help/bundle/output.txt b/acceptance/bundle/help/bundle/output.txt new file mode 100644 index 000000000..e0e2ea47c --- /dev/null +++ b/acceptance/bundle/help/bundle/output.txt @@ -0,0 +1,33 @@ + +>>> $CLI bundle --help +Databricks Asset Bundles let you express data/AI/analytics projects as code. + +Online documentation: https://docs.databricks.com/en/dev-tools/bundles/index.html + +Usage: + databricks bundle [command] + +Available Commands: + deploy Deploy bundle + deployment Deployment related commands + destroy Destroy deployed bundle resources + generate Generate bundle configuration + init Initialize using a bundle template + open Open a resource in the browser + run Run a job or pipeline update + schema Generate JSON Schema for bundle configuration + summary Summarize resources deployed by this bundle + sync Synchronize bundle tree to the workspace + validate Validate configuration + +Flags: + -h, --help help for bundle + --var strings set values for variables defined in bundle config. Example: --var="foo=bar" + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + +Use "databricks bundle [command] --help" for more information about a command. diff --git a/acceptance/bundle/help/bundle/script b/acceptance/bundle/help/bundle/script new file mode 100644 index 000000000..eac116817 --- /dev/null +++ b/acceptance/bundle/help/bundle/script @@ -0,0 +1 @@ +trace $CLI bundle --help diff --git a/acceptance/bundle/init/dbt-sql/script b/acceptance/bundle/init/dbt-sql/script deleted file mode 100644 index 5f6efda99..000000000 --- a/acceptance/bundle/init/dbt-sql/script +++ /dev/null @@ -1,5 +0,0 @@ -trace $CLI bundle init dbt-sql --config-file ./input.json - -cd my_dbt_sql -trace $CLI bundle validate -t dev -trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/init/dbt-sql/script.cleanup b/acceptance/bundle/init/dbt-sql/script.cleanup deleted file mode 100644 index b717c8d4a..000000000 --- a/acceptance/bundle/init/dbt-sql/script.cleanup +++ /dev/null @@ -1 +0,0 @@ -rm -fr my_dbt_sql diff --git a/acceptance/bundle/init/default-python/script.cleanup b/acceptance/bundle/init/default-python/script.cleanup deleted file mode 100644 index 4fd2e4aa7..000000000 --- a/acceptance/bundle/init/default-python/script.cleanup +++ /dev/null @@ -1 +0,0 @@ -rm -fr my_default_python diff --git a/acceptance/bundle/init/default-sql/script b/acceptance/bundle/init/default-sql/script deleted file mode 100644 index 6f7332a39..000000000 --- a/acceptance/bundle/init/default-sql/script +++ /dev/null @@ -1,5 +0,0 @@ -trace $CLI bundle init default-sql --config-file ./input.json - -cd my_default_sql -trace $CLI bundle validate -t dev -trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/init/default-sql/script.cleanup b/acceptance/bundle/init/default-sql/script.cleanup deleted file mode 100644 index c46d9ad60..000000000 --- a/acceptance/bundle/init/default-sql/script.cleanup +++ /dev/null @@ -1 +0,0 @@ -rm -fr my_default_sql diff --git a/bundle/tests/path_translation/fallback/README.md b/acceptance/bundle/paths/fallback/README.md similarity index 100% rename from bundle/tests/path_translation/fallback/README.md rename to acceptance/bundle/paths/fallback/README.md diff --git a/bundle/tests/path_translation/nominal/databricks.yml b/acceptance/bundle/paths/fallback/databricks.yml similarity index 80% rename from bundle/tests/path_translation/nominal/databricks.yml rename to acceptance/bundle/paths/fallback/databricks.yml index cd425920d..c6d0abe0a 100644 --- a/bundle/tests/path_translation/nominal/databricks.yml +++ b/acceptance/bundle/paths/fallback/databricks.yml @@ -1,5 +1,5 @@ bundle: - name: path_translation_nominal + name: fallback include: - "resources/*.yml" diff --git a/acceptance/bundle/paths/fallback/output.job.json b/acceptance/bundle/paths/fallback/output.job.json new file mode 100644 index 000000000..fe9e1cf3d --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.job.json @@ -0,0 +1,67 @@ +[ + { + "job_cluster_key": "default", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook" + }, + "task_key": "notebook_example" + }, + { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file.py" + }, + "task_key": "spark_python_example" + }, + { + "dbt_task": { + "commands": [ + "dbt run", + "dbt run" + ], + "project_directory": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/dbt_project" + }, + "job_cluster_key": "default", + "task_key": "dbt_example" + }, + { + "job_cluster_key": "default", + "sql_task": { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/sql.sql" + }, + "warehouse_id": "cafef00d" + }, + "task_key": "sql_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "whl": "dist/wheel1.whl" + }, + { + "whl": "dist/wheel2.whl" + } + ], + "python_wheel_task": { + "package_name": "my_package" + }, + "task_key": "python_wheel_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "jar": "target/jar1.jar" + }, + { + "jar": "target/jar2.jar" + } + ], + "spark_jar_task": { + "main_class_name": "com.example.Main" + }, + "task_key": "spark_jar_example" + } +] diff --git a/acceptance/bundle/paths/fallback/output.pipeline.json b/acceptance/bundle/paths/fallback/output.pipeline.json new file mode 100644 index 000000000..38521cb22 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.pipeline.json @@ -0,0 +1,22 @@ +[ + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file1.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook1" + } + }, + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file2.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook2" + } + } +] diff --git a/acceptance/bundle/paths/fallback/output.txt b/acceptance/bundle/paths/fallback/output.txt new file mode 100644 index 000000000..f694610d2 --- /dev/null +++ b/acceptance/bundle/paths/fallback/output.txt @@ -0,0 +1,18 @@ + +>>> $CLI bundle validate -t development -o json + +Exit code: 0 + +>>> $CLI bundle validate -t error +Error: notebook this value is overridden not found. Local notebook references are expected +to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] + +Name: fallback +Target: error +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/fallback/error + +Found 1 error + +Exit code: 1 diff --git a/bundle/tests/path_translation/fallback/override_job.yml b/acceptance/bundle/paths/fallback/override_job.yml similarity index 100% rename from bundle/tests/path_translation/fallback/override_job.yml rename to acceptance/bundle/paths/fallback/override_job.yml diff --git a/bundle/tests/path_translation/fallback/override_pipeline.yml b/acceptance/bundle/paths/fallback/override_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/fallback/override_pipeline.yml rename to acceptance/bundle/paths/fallback/override_pipeline.yml diff --git a/bundle/tests/path_translation/fallback/resources/my_job.yml b/acceptance/bundle/paths/fallback/resources/my_job.yml similarity index 71% rename from bundle/tests/path_translation/fallback/resources/my_job.yml rename to acceptance/bundle/paths/fallback/resources/my_job.yml index 4907df4f0..921ee412b 100644 --- a/bundle/tests/path_translation/fallback/resources/my_job.yml +++ b/acceptance/bundle/paths/fallback/resources/my_job.yml @@ -4,33 +4,45 @@ resources: name: "placeholder" tasks: - task_key: notebook_example + job_cluster_key: default notebook_task: notebook_path: "this value is overridden" - task_key: spark_python_example + job_cluster_key: default spark_python_task: python_file: "this value is overridden" - task_key: dbt_example + job_cluster_key: default dbt_task: project_directory: "this value is overridden" commands: - "dbt run" - task_key: sql_example + job_cluster_key: default sql_task: file: path: "this value is overridden" warehouse_id: cafef00d - task_key: python_wheel_example + job_cluster_key: default python_wheel_task: package_name: my_package libraries: - whl: ../dist/wheel1.whl - task_key: spark_jar_example + job_cluster_key: default spark_jar_task: main_class_name: com.example.Main libraries: - jar: ../target/jar1.jar + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/bundle/tests/path_translation/fallback/resources/my_pipeline.yml b/acceptance/bundle/paths/fallback/resources/my_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/fallback/resources/my_pipeline.yml rename to acceptance/bundle/paths/fallback/resources/my_pipeline.yml diff --git a/acceptance/bundle/paths/fallback/script b/acceptance/bundle/paths/fallback/script new file mode 100644 index 000000000..29aa420c5 --- /dev/null +++ b/acceptance/bundle/paths/fallback/script @@ -0,0 +1,10 @@ +errcode trace $CLI bundle validate -t development -o json > output.tmp.json + +# Capture job tasks +jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json + +# Capture pipeline libraries +jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json + +# Expect failure for the "error" target +errcode trace $CLI bundle validate -t error diff --git a/acceptance/bundle/paths/fallback/script.cleanup b/acceptance/bundle/paths/fallback/script.cleanup new file mode 100644 index 000000000..f93425dff --- /dev/null +++ b/acceptance/bundle/paths/fallback/script.cleanup @@ -0,0 +1 @@ +rm -f output.tmp.json diff --git a/bundle/tests/path_translation/fallback/src/dbt_project/.gitkeep b/acceptance/bundle/paths/fallback/src/dbt_project/.gitkeep similarity index 100% rename from bundle/tests/path_translation/fallback/src/dbt_project/.gitkeep rename to acceptance/bundle/paths/fallback/src/dbt_project/.gitkeep diff --git a/bundle/tests/path_translation/fallback/src/file.py b/acceptance/bundle/paths/fallback/src/file.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file.py rename to acceptance/bundle/paths/fallback/src/file.py diff --git a/bundle/tests/path_translation/fallback/src/file1.py b/acceptance/bundle/paths/fallback/src/file1.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file1.py rename to acceptance/bundle/paths/fallback/src/file1.py diff --git a/bundle/tests/path_translation/fallback/src/file2.py b/acceptance/bundle/paths/fallback/src/file2.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/file2.py rename to acceptance/bundle/paths/fallback/src/file2.py diff --git a/bundle/tests/path_translation/fallback/src/notebook.py b/acceptance/bundle/paths/fallback/src/notebook.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook.py rename to acceptance/bundle/paths/fallback/src/notebook.py diff --git a/bundle/tests/path_translation/fallback/src/notebook1.py b/acceptance/bundle/paths/fallback/src/notebook1.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook1.py rename to acceptance/bundle/paths/fallback/src/notebook1.py diff --git a/bundle/tests/path_translation/fallback/src/notebook2.py b/acceptance/bundle/paths/fallback/src/notebook2.py similarity index 100% rename from bundle/tests/path_translation/fallback/src/notebook2.py rename to acceptance/bundle/paths/fallback/src/notebook2.py diff --git a/bundle/tests/path_translation/fallback/src/sql.sql b/acceptance/bundle/paths/fallback/src/sql.sql similarity index 100% rename from bundle/tests/path_translation/fallback/src/sql.sql rename to acceptance/bundle/paths/fallback/src/sql.sql diff --git a/bundle/tests/path_translation/nominal/README.md b/acceptance/bundle/paths/nominal/README.md similarity index 100% rename from bundle/tests/path_translation/nominal/README.md rename to acceptance/bundle/paths/nominal/README.md diff --git a/bundle/tests/path_translation/fallback/databricks.yml b/acceptance/bundle/paths/nominal/databricks.yml similarity index 79% rename from bundle/tests/path_translation/fallback/databricks.yml rename to acceptance/bundle/paths/nominal/databricks.yml index 92be3f921..5d3c22f91 100644 --- a/bundle/tests/path_translation/fallback/databricks.yml +++ b/acceptance/bundle/paths/nominal/databricks.yml @@ -1,5 +1,5 @@ bundle: - name: path_translation_fallback + name: nominal include: - "resources/*.yml" diff --git a/acceptance/bundle/paths/nominal/output.job.json b/acceptance/bundle/paths/nominal/output.job.json new file mode 100644 index 000000000..9e1cb4d90 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.job.json @@ -0,0 +1,89 @@ +[ + { + "job_cluster_key": "default", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook" + }, + "task_key": "notebook_example" + }, + { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file.py" + }, + "task_key": "spark_python_example" + }, + { + "dbt_task": { + "commands": [ + "dbt run", + "dbt run" + ], + "project_directory": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/dbt_project" + }, + "job_cluster_key": "default", + "task_key": "dbt_example" + }, + { + "job_cluster_key": "default", + "sql_task": { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/sql.sql" + }, + "warehouse_id": "cafef00d" + }, + "task_key": "sql_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "whl": "dist/wheel1.whl" + }, + { + "whl": "dist/wheel2.whl" + } + ], + "python_wheel_task": { + "package_name": "my_package" + }, + "task_key": "python_wheel_example" + }, + { + "job_cluster_key": "default", + "libraries": [ + { + "jar": "target/jar1.jar" + }, + { + "jar": "target/jar2.jar" + } + ], + "spark_jar_task": { + "main_class_name": "com.example.Main" + }, + "task_key": "spark_jar_example" + }, + { + "for_each_task": { + "task": { + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook" + } + } + }, + "job_cluster_key": "default", + "task_key": "for_each_notebook_example" + }, + { + "for_each_task": { + "task": { + "job_cluster_key": "default", + "spark_python_task": { + "python_file": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file.py" + } + } + }, + "task_key": "for_each_spark_python_example" + } +] diff --git a/acceptance/bundle/paths/nominal/output.pipeline.json b/acceptance/bundle/paths/nominal/output.pipeline.json new file mode 100644 index 000000000..277b0c4a1 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.pipeline.json @@ -0,0 +1,22 @@ +[ + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file1.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook1" + } + }, + { + "file": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file2.py" + } + }, + { + "notebook": { + "path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook2" + } + } +] diff --git a/acceptance/bundle/paths/nominal/output.txt b/acceptance/bundle/paths/nominal/output.txt new file mode 100644 index 000000000..189170335 --- /dev/null +++ b/acceptance/bundle/paths/nominal/output.txt @@ -0,0 +1,18 @@ + +>>> $CLI bundle validate -t development -o json + +Exit code: 0 + +>>> $CLI bundle validate -t error +Error: notebook this value is overridden not found. Local notebook references are expected +to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb] + +Name: nominal +Target: error +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/nominal/error + +Found 1 error + +Exit code: 1 diff --git a/bundle/tests/path_translation/nominal/override_job.yml b/acceptance/bundle/paths/nominal/override_job.yml similarity index 100% rename from bundle/tests/path_translation/nominal/override_job.yml rename to acceptance/bundle/paths/nominal/override_job.yml diff --git a/bundle/tests/path_translation/nominal/override_pipeline.yml b/acceptance/bundle/paths/nominal/override_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/nominal/override_pipeline.yml rename to acceptance/bundle/paths/nominal/override_pipeline.yml diff --git a/bundle/tests/path_translation/nominal/resources/my_job.yml b/acceptance/bundle/paths/nominal/resources/my_job.yml similarity index 74% rename from bundle/tests/path_translation/nominal/resources/my_job.yml rename to acceptance/bundle/paths/nominal/resources/my_job.yml index 2020c9dc8..13996a20c 100644 --- a/bundle/tests/path_translation/nominal/resources/my_job.yml +++ b/acceptance/bundle/paths/nominal/resources/my_job.yml @@ -4,38 +4,45 @@ resources: name: "placeholder" tasks: - task_key: notebook_example + job_cluster_key: default notebook_task: notebook_path: "this value is overridden" - task_key: spark_python_example + job_cluster_key: default spark_python_task: python_file: "this value is overridden" - task_key: dbt_example + job_cluster_key: default dbt_task: project_directory: "this value is overridden" commands: - "dbt run" - task_key: sql_example + job_cluster_key: default sql_task: file: path: "this value is overridden" warehouse_id: cafef00d - task_key: python_wheel_example + job_cluster_key: default python_wheel_task: package_name: my_package libraries: - whl: ../dist/wheel1.whl - task_key: spark_jar_example + job_cluster_key: default spark_jar_task: main_class_name: com.example.Main libraries: - jar: ../target/jar1.jar - task_key: for_each_notebook_example + job_cluster_key: default for_each_task: task: notebook_task: @@ -44,5 +51,12 @@ resources: - task_key: for_each_spark_python_example for_each_task: task: + job_cluster_key: default spark_python_task: python_file: "this value is overridden" + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/bundle/tests/path_translation/nominal/resources/my_pipeline.yml b/acceptance/bundle/paths/nominal/resources/my_pipeline.yml similarity index 100% rename from bundle/tests/path_translation/nominal/resources/my_pipeline.yml rename to acceptance/bundle/paths/nominal/resources/my_pipeline.yml diff --git a/acceptance/bundle/paths/nominal/script b/acceptance/bundle/paths/nominal/script new file mode 100644 index 000000000..29aa420c5 --- /dev/null +++ b/acceptance/bundle/paths/nominal/script @@ -0,0 +1,10 @@ +errcode trace $CLI bundle validate -t development -o json > output.tmp.json + +# Capture job tasks +jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json + +# Capture pipeline libraries +jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json + +# Expect failure for the "error" target +errcode trace $CLI bundle validate -t error diff --git a/acceptance/bundle/paths/nominal/script.cleanup b/acceptance/bundle/paths/nominal/script.cleanup new file mode 100644 index 000000000..f93425dff --- /dev/null +++ b/acceptance/bundle/paths/nominal/script.cleanup @@ -0,0 +1 @@ +rm -f output.tmp.json diff --git a/bundle/tests/path_translation/nominal/src/dbt_project/.gitkeep b/acceptance/bundle/paths/nominal/src/dbt_project/.gitkeep similarity index 100% rename from bundle/tests/path_translation/nominal/src/dbt_project/.gitkeep rename to acceptance/bundle/paths/nominal/src/dbt_project/.gitkeep diff --git a/bundle/tests/path_translation/nominal/src/file.py b/acceptance/bundle/paths/nominal/src/file.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file.py rename to acceptance/bundle/paths/nominal/src/file.py diff --git a/bundle/tests/path_translation/nominal/src/file1.py b/acceptance/bundle/paths/nominal/src/file1.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file1.py rename to acceptance/bundle/paths/nominal/src/file1.py diff --git a/bundle/tests/path_translation/nominal/src/file2.py b/acceptance/bundle/paths/nominal/src/file2.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/file2.py rename to acceptance/bundle/paths/nominal/src/file2.py diff --git a/bundle/tests/path_translation/nominal/src/notebook.py b/acceptance/bundle/paths/nominal/src/notebook.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook.py rename to acceptance/bundle/paths/nominal/src/notebook.py diff --git a/bundle/tests/path_translation/nominal/src/notebook1.py b/acceptance/bundle/paths/nominal/src/notebook1.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook1.py rename to acceptance/bundle/paths/nominal/src/notebook1.py diff --git a/bundle/tests/path_translation/nominal/src/notebook2.py b/acceptance/bundle/paths/nominal/src/notebook2.py similarity index 100% rename from bundle/tests/path_translation/nominal/src/notebook2.py rename to acceptance/bundle/paths/nominal/src/notebook2.py diff --git a/bundle/tests/path_translation/nominal/src/sql.sql b/acceptance/bundle/paths/nominal/src/sql.sql similarity index 100% rename from bundle/tests/path_translation/nominal/src/sql.sql rename to acceptance/bundle/paths/nominal/src/sql.sql diff --git a/bundle/tests/relative_path_translation/databricks.yml b/acceptance/bundle/paths/relative_path_translation/databricks.yml similarity index 100% rename from bundle/tests/relative_path_translation/databricks.yml rename to acceptance/bundle/paths/relative_path_translation/databricks.yml diff --git a/acceptance/bundle/paths/relative_path_translation/output.default.json b/acceptance/bundle/paths/relative_path_translation/output.default.json new file mode 100644 index 000000000..e2514b392 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.default.json @@ -0,0 +1,6 @@ +{ + "paths": [ + "/Workspace/remote/src/file1.py", + "/Workspace/remote/src/file1.py" + ] +} diff --git a/acceptance/bundle/paths/relative_path_translation/output.override.json b/acceptance/bundle/paths/relative_path_translation/output.override.json new file mode 100644 index 000000000..729d2eaa0 --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.override.json @@ -0,0 +1,6 @@ +{ + "paths": [ + "/Workspace/remote/src/file2.py", + "/Workspace/remote/src/file2.py" + ] +} diff --git a/acceptance/bundle/paths/relative_path_translation/output.txt b/acceptance/bundle/paths/relative_path_translation/output.txt new file mode 100644 index 000000000..362f2ec7b --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/output.txt @@ -0,0 +1,4 @@ + +>>> $CLI bundle validate -t default -o json + +>>> $CLI bundle validate -t override -o json diff --git a/bundle/tests/relative_path_translation/resources/job.yml b/acceptance/bundle/paths/relative_path_translation/resources/job.yml similarity index 66% rename from bundle/tests/relative_path_translation/resources/job.yml rename to acceptance/bundle/paths/relative_path_translation/resources/job.yml index 93f121f25..9540ff1ad 100644 --- a/bundle/tests/relative_path_translation/resources/job.yml +++ b/acceptance/bundle/paths/relative_path_translation/resources/job.yml @@ -3,12 +3,20 @@ resources: job: tasks: - task_key: local + job_cluster_key: default spark_python_task: python_file: ../src/file1.py - task_key: variable_reference + job_cluster_key: default spark_python_task: # Note: this is a pure variable reference yet needs to persist the location # of the reference, not the location of the variable value. # Also see https://github.com/databricks/cli/issues/1330. python_file: ${var.file_path} + + # Include a job cluster for completeness + job_clusters: + - job_cluster_key: default + new_cluster: + spark_version: 15.4.x-scala2.12 diff --git a/acceptance/bundle/paths/relative_path_translation/script b/acceptance/bundle/paths/relative_path_translation/script new file mode 100644 index 000000000..252e9a07f --- /dev/null +++ b/acceptance/bundle/paths/relative_path_translation/script @@ -0,0 +1,4 @@ +trace $CLI bundle validate -t default -o json | \ + jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.default.json +trace $CLI bundle validate -t override -o json | \ + jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.override.json diff --git a/bundle/tests/relative_path_translation/src/file1.py b/acceptance/bundle/paths/relative_path_translation/src/file1.py similarity index 100% rename from bundle/tests/relative_path_translation/src/file1.py rename to acceptance/bundle/paths/relative_path_translation/src/file1.py diff --git a/bundle/tests/relative_path_translation/src/file2.py b/acceptance/bundle/paths/relative_path_translation/src/file2.py similarity index 100% rename from bundle/tests/relative_path_translation/src/file2.py rename to acceptance/bundle/paths/relative_path_translation/src/file2.py diff --git a/acceptance/bundle/init/dbt-sql/input.json b/acceptance/bundle/templates/dbt-sql/input.json similarity index 100% rename from acceptance/bundle/init/dbt-sql/input.json rename to acceptance/bundle/templates/dbt-sql/input.json diff --git a/acceptance/bundle/init/dbt-sql/output.txt b/acceptance/bundle/templates/dbt-sql/output.txt similarity index 92% rename from acceptance/bundle/init/dbt-sql/output.txt rename to acceptance/bundle/templates/dbt-sql/output.txt index 4f6f83693..972c7e152 100644 --- a/acceptance/bundle/init/dbt-sql/output.txt +++ b/acceptance/bundle/templates/dbt-sql/output.txt @@ -1,5 +1,5 @@ ->>> $CLI bundle init dbt-sql --config-file ./input.json +>>> $CLI bundle init dbt-sql --config-file ./input.json --output-dir output Welcome to the dbt template for Databricks Asset Bundles! diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore new file mode 100644 index 000000000..de811f118 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.gitignore @@ -0,0 +1,2 @@ + +.databricks diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json new file mode 100644 index 000000000..28fe943fd --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "redhat.vscode-yaml", + "innoverio.vscode-dbt-power-user", + ] +} diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json new file mode 100644 index 000000000..e8dcd1a83 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/.vscode/settings.json @@ -0,0 +1,32 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "python.envFile": "${workspaceFolder}/.databricks/.databricks.env", + "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python", + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "hive_metastore", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/2.0/warehouses/f00dcafe" + } + ], + "sqltools.autoConnectTo": "", + "[jinja-sql]": { + "editor.defaultFormatter": "innoverio.vscode-dbt-power-user" + } +} diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md new file mode 100644 index 000000000..756a2eda4 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md @@ -0,0 +1,138 @@ +# my_dbt_sql + +The 'my_dbt_sql' project was generated by using the dbt template for +Databricks Asset Bundles. It follows the standard dbt project structure +and has an additional `resources` directory to define Databricks resources such as jobs +that run dbt models. + +* Learn more about dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects. +* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html + +The remainder of this file includes instructions for local development (using dbt) +and deployment to production (using Databricks Asset Bundles). + +## Development setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. Install dbt + + To install dbt, you need a recent version of Python. For the instructions below, + we assume `python3` refers to the Python version you want to use. On some systems, + you may need to refer to a different Python version, e.g. `python` or `/usr/bin/python`. + + Run these instructions from the `my_dbt_sql` directory. We recommend making + use of a Python virtual environment and installing dbt as follows: + + ``` + $ python3 -m venv .venv + $ . .venv/bin/activate + $ pip install -r requirements-dev.txt + ``` + +4. Initialize your dbt profile + + Use `dbt init` to initialize your profile. + + ``` + $ dbt init + ``` + + Note that dbt authentication uses personal access tokens by default + (see https://docs.databricks.com/dev-tools/auth/pat.html). + You can use OAuth as an alternative, but this currently requires manual configuration. + See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md + for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605 + for advice on setting up OAuth for Azure Databricks. + + To setup up additional profiles, such as a 'prod' profile, + see https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles. + +5. Activate dbt so it can be used from the terminal + + ``` + $ . .venv/bin/activate + ``` + +## Local development with dbt + +Use `dbt` to [run this project locally using a SQL warehouse](https://docs.databricks.com/partners/prep/dbt.html): + +``` +$ dbt seed +$ dbt run +``` + +(Did you get an error that the dbt command could not be found? You may need +to try the last step from the development setup above to re-activate +your Python virtual environment!) + + +To just evaluate a single model defined in a file called orders.sql, use: + +``` +$ dbt run --model orders +``` + +Use `dbt test` to run tests generated from yml files such as `models/schema.yml` +and any SQL tests from `tests/` + +``` +$ dbt test +``` + +## Production setup + +Your production dbt profiles are defined in dbt_profiles/profiles.yml. +These profiles define the default catalog, schema, and any other +target-specific settings. Read more about dbt profiles on Databricks at +https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. + +The target workspaces for staging and prod are defined in databricks.yml. +You can manually deploy based on these configurations (see below). +Or you can use CI/CD to automate deployment. See +https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation +on CI/CD setup. + +## Manually deploying to Databricks with Databricks Asset Bundles + +Databricks Asset Bundles can be used to deploy to Databricks and to execute +dbt commands as a job using Databricks Workflows. See +https://docs.databricks.com/dev-tools/bundles/index.html to learn more. + +Use the Databricks CLI to deploy a development copy of this project to a workspace: + +``` +$ databricks bundle deploy --target dev +``` + +(Note that "dev" is the default target, so the `--target` parameter +is optional here.) + +This deploys everything that's defined for this project. +For example, the default template would deploy a job called +`[dev yourname] my_dbt_sql_job` to your workspace. +You can find that job by opening your workpace and clicking on **Workflows**. + +You can also deploy to your production target directly from the command-line. +The warehouse, catalog, and schema for that target are configured in databricks.yml. +When deploying to this target, note that the default job at resources/my_dbt_sql.job.yml +has a schedule set that runs every day. The schedule is paused when deploying in development mode +(see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +To deploy a production copy, type: + +``` +$ databricks bundle deploy --target prod +``` + +## IDE support + +Optionally, install developer tools such as the Databricks extension for Visual Studio Code from +https://docs.databricks.com/dev-tools/vscode-ext.html. Third-party extensions +related to dbt may further enhance your dbt development experience! diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml new file mode 100644 index 000000000..1962bc543 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml @@ -0,0 +1,34 @@ +# This file defines the structure of this project and how it is deployed +# to production using Databricks Asset Bundles. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_dbt_sql + uuid: + +include: + - resources/*.yml + +# Deployment targets. +# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml +targets: + dev: + default: true + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml new file mode 100644 index 000000000..fdaf30dda --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_profiles/profiles.yml @@ -0,0 +1,38 @@ + +# This file defines dbt profiles for deployed dbt jobs. +my_dbt_sql: + target: dev # default target + outputs: + + # Doing local development with the dbt CLI? + # Then you should create your own profile in your .dbt/profiles.yml using 'dbt init' + # (See README.md) + + # The default target when deployed with the Databricks CLI + # N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml + dev: + type: databricks + method: http + catalog: main + schema: "{{ var('dev_schema') }}" + + http_path: /sql/2.0/warehouses/f00dcafe + + # The workspace host / token are provided by Databricks + # see databricks.yml for the workspace host used for 'dev' + host: "{{ env_var('DBT_HOST') }}" + token: "{{ env_var('DBT_ACCESS_TOKEN') }}" + + # The production target when deployed with the Databricks CLI + prod: + type: databricks + method: http + catalog: main + schema: default + + http_path: /sql/2.0/warehouses/f00dcafe + + # The workspace host / token are provided by Databricks + # see databricks.yml for the workspace host used for 'prod' + host: "{{ env_var('DBT_HOST') }}" + token: "{{ env_var('DBT_ACCESS_TOKEN') }}" diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml new file mode 100644 index 000000000..4218640d8 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/dbt_project.yml @@ -0,0 +1,32 @@ +name: 'my_dbt_sql' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'my_dbt_sql' + +# These configurations specify where dbt should look for different types of files. +# For Databricks asset bundles, we put everything in src, as you may have +# non-dbt resources in your project. +model-paths: ["src/models"] +analysis-paths: ["src/analyses"] +test-paths: ["src/tests"] +seed-paths: ["src/seeds"] +macro-paths: ["src/macros"] +snapshot-paths: ["src/snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views by default. These settings can be overridden in the +# individual model files using the `{{ config(...) }}` macro. +models: + my_dbt_sql: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml new file mode 100644 index 000000000..5e0f0fc29 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/profile_template.yml @@ -0,0 +1,23 @@ +# This file defines prompts with defaults for dbt initializaton. +# It is used when the `dbt init` command is invoked. +# +fixed: + type: databricks +prompts: + host: + default: $DATABRICKS_HOST + token: + hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' + hide_input: true + http_path: + hint: 'HTTP path of SQL warehouse to use' + default: /sql/2.0/warehouses/f00dcafe + catalog: + hint: 'initial catalog' + default: main + schema: + hint: 'personal schema where dbt will build objects during development, example: $USERNAME' + threads: + hint: 'threads to use during development, 1 or more' + type: 'int' + default: 4 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt new file mode 100644 index 000000000..e6b861203 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/requirements-dev.txt @@ -0,0 +1,3 @@ +## requirements-dev.txt: dependencies for local development. + +dbt-databricks>=1.8.0,<2.0.0 diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml new file mode 100644 index 000000000..d52f8ed50 --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/resources/my_dbt_sql.job.yml @@ -0,0 +1,43 @@ +resources: + jobs: + my_dbt_sql_job: + name: my_dbt_sql_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + + tasks: + - task_key: dbt + + dbt_task: + project_directory: ../ + # The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml + profiles_directory: dbt_profiles/ + commands: + # The dbt commands to run (see also dbt_profiles/profiles.yml; dev_schema is used in the dev profile) + - 'dbt deps --target=${bundle.target}' + - 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + - 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + + libraries: + - pypi: + package: dbt-databricks>=1.8.0,<2.0.0 + + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/analyses/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/macros/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql new file mode 100644 index 000000000..e32736ceb --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_daily.sql @@ -0,0 +1,17 @@ + +-- This model file defines a materialized view called 'orders_daily' +-- +-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'materialized_view') }} + +select order_date, count(*) AS number_of_orders + +from {{ ref('orders_raw') }} + +-- During development, only process a smaller range of data +{% if target.name != 'prod' %} +where order_date >= '2019-08-01' and order_date < '2019-09-01' +{% endif %} + +group by order_date diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql new file mode 100644 index 000000000..8faf8f38b --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/orders_raw.sql @@ -0,0 +1,16 @@ +-- This model file defines a streaming table called 'orders_raw' +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ +-- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables +-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561. +{{ config(materialized = 'streaming_table') }} + +select + customer_name, + date(timestamp(from_unixtime(try_cast(order_datetime as bigint)))) as order_date, + order_number +from stream read_files( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml new file mode 100644 index 000000000..c64f1bfce --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/models/example/schema.yml @@ -0,0 +1,21 @@ + +version: 2 + +models: + - name: orders_raw + description: "Raw ingested orders" + columns: + - name: customer_name + description: "The name of a customer" + data_tests: + - unique + - not_null + + - name: orders_daily + description: "Number of orders by day" + columns: + - name: order_date + description: "The date on which orders took place" + data_tests: + - unique + - not_null diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/seeds/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/snapshots/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/tests/.gitkeep b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/src/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/dbt-sql/script b/acceptance/bundle/templates/dbt-sql/script new file mode 100644 index 000000000..c4ca817fe --- /dev/null +++ b/acceptance/bundle/templates/dbt-sql/script @@ -0,0 +1,5 @@ +trace $CLI bundle init dbt-sql --config-file ./input.json --output-dir output + +cd output/my_dbt_sql +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/init/default-python/input.json b/acceptance/bundle/templates/default-python/input.json similarity index 100% rename from acceptance/bundle/init/default-python/input.json rename to acceptance/bundle/templates/default-python/input.json diff --git a/acceptance/bundle/init/default-python/output.txt b/acceptance/bundle/templates/default-python/output.txt similarity index 90% rename from acceptance/bundle/init/default-python/output.txt rename to acceptance/bundle/templates/default-python/output.txt index a02a7d41b..5493ac2cf 100644 --- a/acceptance/bundle/init/default-python/output.txt +++ b/acceptance/bundle/templates/default-python/output.txt @@ -1,5 +1,5 @@ ->>> $CLI bundle init default-python --config-file ./input.json +>>> $CLI bundle init default-python --config-file ./input.json --output-dir output Welcome to the default Python template for Databricks Asset Bundles! Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): $DATABRICKS_URL diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore b/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi new file mode 100644 index 000000000..0edd5181b --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json new file mode 100644 index 000000000..5d15eba36 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json new file mode 100644 index 000000000..8ee87c30d --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/.vscode/settings.json @@ -0,0 +1,16 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/README.md b/acceptance/bundle/templates/default-python/output/my_default_python/README.md new file mode 100644 index 000000000..97d7d7949 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/README.md @@ -0,0 +1,47 @@ +# my_default_python + +The 'my_default_python' project was generated by using the default-python template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_python_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_default_python.job.yml). The schedule + is paused when deploying in development mode (see + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). + +5. To run a job or pipeline, use the "run" command: + ``` + $ databricks bundle run + ``` + +6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + +7. For documentation on the Databricks asset bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml new file mode 100644 index 000000000..9deca9cf5 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml @@ -0,0 +1,31 @@ +# This is a Databricks asset bundle definition for my_default_python. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_python + uuid: + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep b/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini b/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini new file mode 100644 index 000000000..80432c220 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = src diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt b/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt new file mode 100644 index 000000000..0ffbf6aed --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/requirements-dev.txt @@ -0,0 +1,29 @@ +## requirements-dev.txt: dependencies for local development. +## +## For defining dependencies used by jobs in Databricks Workflows, see +## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + +## Add code completion support for DLT +databricks-dlt + +## pytest is the default package used for testing +pytest + +## Dependencies for building wheel files +setuptools +wheel + +## databricks-connect can be used to run parts of this project locally. +## See https://docs.databricks.com/dev-tools/databricks-connect.html. +## +## databricks-connect is automatically installed if you're using Databricks +## extension for Visual Studio Code +## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). +## +## To manually install databricks-connect, either follow the instructions +## at https://docs.databricks.com/dev-tools/databricks-connect.html +## to install the package system-wide. Or uncomment the line below to install a +## version of db-connect that corresponds to the Databricks Runtime version used +## for this project. +# +# databricks-connect>=15.4,<15.5 diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml new file mode 100644 index 000000000..e6148a4ad --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.job.yml @@ -0,0 +1,49 @@ +# The main job for my_default_python. +resources: + jobs: + my_default_python_job: + name: my_default_python_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + tasks: + - task_key: notebook_task + job_cluster_key: job_cluster + notebook_task: + notebook_path: ../src/notebook.ipynb + + - task_key: refresh_pipeline + depends_on: + - task_key: notebook_task + pipeline_task: + pipeline_id: ${resources.pipelines.my_default_python_pipeline.id} + + - task_key: main_task + depends_on: + - task_key: refresh_pipeline + job_cluster_key: job_cluster + python_wheel_task: + package_name: my_default_python + entry_point: main + libraries: + # By default we just include the .whl file generated for the my_default_python package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + - whl: ../dist/*.whl + + job_clusters: + - job_cluster_key: job_cluster + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + autoscale: + min_workers: 1 + max_workers: 4 diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml new file mode 100644 index 000000000..f9e083f4f --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/resources/my_default_python.pipeline.yml @@ -0,0 +1,13 @@ +# The main pipeline for my_default_python +resources: + pipelines: + my_default_python_pipeline: + name: my_default_python_pipeline + catalog: main + target: my_default_python_${bundle.target} + libraries: + - notebook: + path: ../src/dlt_pipeline.ipynb + + configuration: + bundle.sourcePath: ${workspace.file_path}/src diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb new file mode 100644 index 000000000..3b2fef4b4 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/scratch/exploration.ipynb @@ -0,0 +1,61 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append(\"../src\")\n", + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "ipynb-notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/setup.py b/acceptance/bundle/templates/default-python/output/my_default_python/setup.py new file mode 100644 index 000000000..84b24ecb8 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/setup.py @@ -0,0 +1,41 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_default_python project. +""" + +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import datetime +import my_default_python + +local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S") + +setup( + name="my_default_python", + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + version=my_default_python.__version__ + "+" + local_version, + url="https://databricks.com", + author="$USERNAME", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb new file mode 100644 index 000000000..36e993af7 --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/dlt_pipeline.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Import DLT and src/my_default_python\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from my_default_python import main" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/my_default_python/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb b/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb new file mode 100644 index 000000000..0d560443b --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_default_python import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py b/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py new file mode 100644 index 000000000..dc449154a --- /dev/null +++ b/acceptance/bundle/templates/default-python/output/my_default_python/tests/main_test.py @@ -0,0 +1,6 @@ +from my_default_python.main import get_taxis, get_spark + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/init/default-python/script b/acceptance/bundle/templates/default-python/script similarity index 73% rename from acceptance/bundle/init/default-python/script rename to acceptance/bundle/templates/default-python/script index 84834ce3f..b11a7ea21 100644 --- a/acceptance/bundle/init/default-python/script +++ b/acceptance/bundle/templates/default-python/script @@ -1,5 +1,5 @@ -trace $CLI bundle init default-python --config-file ./input.json +trace $CLI bundle init default-python --config-file ./input.json --output-dir output -cd my_default_python +cd output/my_default_python trace $CLI bundle validate -t dev trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/default-sql/.ruff.toml b/acceptance/bundle/templates/default-sql/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/acceptance/bundle/templates/default-sql/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/acceptance/bundle/init/default-sql/input.json b/acceptance/bundle/templates/default-sql/input.json similarity index 100% rename from acceptance/bundle/init/default-sql/input.json rename to acceptance/bundle/templates/default-sql/input.json diff --git a/acceptance/bundle/init/default-sql/output.txt b/acceptance/bundle/templates/default-sql/output.txt similarity index 91% rename from acceptance/bundle/init/default-sql/output.txt rename to acceptance/bundle/templates/default-sql/output.txt index ff107eae0..fe0139093 100644 --- a/acceptance/bundle/init/default-sql/output.txt +++ b/acceptance/bundle/templates/default-sql/output.txt @@ -1,5 +1,5 @@ ->>> $CLI bundle init default-sql --config-file ./input.json +>>> $CLI bundle init default-sql --config-file ./input.json --output-dir output Welcome to the default SQL template for Databricks Asset Bundles! diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore b/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore new file mode 100644 index 000000000..de811f118 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.gitignore @@ -0,0 +1,2 @@ + +.databricks diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json new file mode 100644 index 000000000..8e1023465 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "redhat.vscode-yaml", + "databricks.sqltools-databricks-driver", + ] +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json new file mode 100644 index 000000000..c641abe39 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.vscode/settings.json @@ -0,0 +1,27 @@ +{ + "python.analysis.stubPath": ".vscode", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "sqltools.connections": [ + { + "connectionMethod": "VS Code Extension (beta)", + "catalog": "main", + "previewLimit": 50, + "driver": "Databricks", + "name": "databricks", + "path": "/sql/2.0/warehouses/f00dcafe" + } + ], + "sqltools.autoConnectTo": "", +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md new file mode 100644 index 000000000..67ded153f --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md @@ -0,0 +1,41 @@ +# my_default_sql + +The 'my_default_sql' project was generated by using the default-sql template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/install.html + +2. Authenticate to your Databricks workspace (if you have not done so already): + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_default_sql_job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + +5. To run a job, use the "run" command: + ``` + $ databricks bundle run + ``` + +6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. + +7. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml new file mode 100644 index 000000000..ab857287e --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks asset bundle definition for my_default_sql. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_default_sql + uuid: + +include: + - resources/*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + warehouse_id: + description: The warehouse to use + catalog: + description: The catalog to use + schema: + description: The schema to use + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + variables: + warehouse_id: f00dcafe + catalog: main + schema: ${workspace.current_user.short_name} + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + variables: + warehouse_id: f00dcafe + catalog: main + schema: default + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml new file mode 100644 index 000000000..86de0f9db --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/resources/my_default_sql_sql.job.yml @@ -0,0 +1,38 @@ +# A job running SQL queries on a SQL warehouse +resources: + jobs: + my_default_sql_sql_job: + name: my_default_sql_sql_job + + trigger: + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + periodic: + interval: 1 + unit: DAYS + + email_notifications: + on_failure: + - $USERNAME + + parameters: + - name: catalog + default: ${var.catalog} + - name: schema + default: ${var.schema} + - name: bundle_target + default: ${bundle.target} + + tasks: + - task_key: orders_raw + sql_task: + warehouse_id: ${var.warehouse_id} + file: + path: ../src/orders_raw.sql + + - task_key: orders_daily + depends_on: + - task_key: orders_raw + sql_task: + warehouse_id: ${var.warehouse_id} + file: + path: ../src/orders_daily.sql diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md new file mode 100644 index 000000000..5350d09cf --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks and SQL files. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb new file mode 100644 index 000000000..c3fd072e5 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/scratch/exploration.ipynb @@ -0,0 +1,35 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "%sql\n", + "SELECT * FROM json.`/databricks-datasets/nyctaxi/sample/json/`" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "exploration", + "widgets": {} + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql new file mode 100644 index 000000000..ea7b80b54 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_daily.sql @@ -0,0 +1,21 @@ +-- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) + +USE CATALOG {{catalog}}; +USE IDENTIFIER({{schema}}); + +CREATE OR REPLACE MATERIALIZED VIEW + orders_daily +AS SELECT + order_date, count(*) AS number_of_orders +FROM + orders_raw + +WHERE if( + {{bundle_target}} = "prod", + true, + + -- During development, only process a smaller range of data + order_date >= '2019-08-01' AND order_date < '2019-09-01' +) + +GROUP BY order_date diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql new file mode 100644 index 000000000..79b1354cf --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/src/orders_raw.sql @@ -0,0 +1,19 @@ +-- This query is executed using Databricks Workflows (see resources/my_default_sql_sql.job.yml) +-- +-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ +-- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html + +USE CATALOG {{catalog}}; +USE IDENTIFIER({{schema}}); + +CREATE OR REFRESH STREAMING TABLE + orders_raw +AS SELECT + customer_name, + DATE(TIMESTAMP(FROM_UNIXTIME(TRY_CAST(order_datetime AS BIGINT)))) AS order_date, + order_number +FROM STREAM READ_FILES( + "/databricks-datasets/retail-org/sales_orders/", + format => "json", + header => true +) diff --git a/acceptance/bundle/templates/default-sql/script b/acceptance/bundle/templates/default-sql/script new file mode 100644 index 000000000..66e7a14a2 --- /dev/null +++ b/acceptance/bundle/templates/default-sql/script @@ -0,0 +1,5 @@ +trace $CLI bundle init default-sql --config-file ./input.json --output-dir output + +cd output/my_default_sql +trace $CLI bundle validate -t dev +trace $CLI bundle validate -t prod diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/input.json b/acceptance/bundle/templates/experimental-jobs-as-code/input.json new file mode 100644 index 000000000..748076c75 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/input.json @@ -0,0 +1,5 @@ +{ + "project_name": "my_jobs_as_code", + "include_notebook": "yes", + "include_python": "yes" +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt new file mode 100644 index 000000000..1aa8a94d5 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt @@ -0,0 +1,85 @@ + +>>> $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output + +Welcome to (EXPERIMENTAL) "Jobs as code" template for Databricks Asset Bundles! +Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): $DATABRICKS_URL + +✨ Your new project has been created in the 'my_jobs_as_code' directory! + +Please refer to the README.md file for "getting started" instructions. +See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. + +>>> $CLI bundle validate -t dev --output json +{ + "jobs": { + "my_jobs_as_code_job": { + "deployment": { + "kind": "BUNDLE", + "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/state/metadata.json" + }, + "edit_mode": "UI_LOCKED", + "email_notifications": { + "on_failure": [ + "$USERNAME" + ] + }, + "format": "MULTI_TASK", + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "autoscale": { + "max_workers": 4, + "min_workers": 1 + }, + "node_type_id": "i3.xlarge", + "spark_version": "15.4.x-scala2.12" + } + } + ], + "max_concurrent_runs": 4, + "name": "[dev $USERNAME] my_jobs_as_code_job", + "permissions": [], + "queue": { + "enabled": true + }, + "tags": { + "dev": "$USERNAME" + }, + "tasks": [ + { + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/files/src/notebook" + }, + "task_key": "notebook_task" + }, + { + "depends_on": [ + { + "task_key": "notebook_task" + } + ], + "job_cluster_key": "job_cluster", + "libraries": [ + { + "whl": "dist/*.whl" + } + ], + "python_wheel_task": { + "entry_point": "main", + "package_name": "my_jobs_as_code" + }, + "task_key": "main_task" + } + ], + "trigger": { + "pause_status": "PAUSED", + "periodic": { + "interval": 1, + "unit": "DAYS" + } + } + } + } +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md new file mode 100644 index 000000000..8c429c6e5 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md @@ -0,0 +1,58 @@ +# my_jobs_as_code + +The 'my_jobs_as_code' project was generated by using the "Jobs as code" template. + +## Prerequisites + +1. Install Databricks CLI 0.238 or later. + See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). + +2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). + We use uv to create a virtual environment and install the required dependencies. + +3. Authenticate to your Databricks workspace if you have not done so already: + ``` + $ databricks configure + ``` + +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + +5. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. + +## Deploy and run jobs + +1. Create a new virtual environment and install the required dependencies: + ``` + $ uv sync + ``` + +2. To deploy the bundle to the development target: + ``` + $ databricks bundle deploy --target dev + ``` + + *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] my_jobs_as_code_job` to your workspace. + You can find that job by opening your workspace and clicking on **Workflows**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/my_jobs_as_code_job.py). The schedule + is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). + +4. To run a job: + ``` + $ databricks bundle run + ``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml new file mode 100644 index 000000000..fd87aa381 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks asset bundle definition for my_jobs_as_code. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: my_jobs_as_code + uuid: + +experimental: + python: + # Activate virtual environment before loading resources defined in Python. + # If disabled, defaults to using the Python interpreter available in the current shell. + venv_path: .venv + # Functions called to load resources defined in Python. See resources/__init__.py + resources: + - "resources:load_resources" + +artifacts: + default: + type: whl + path: . + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build + +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: $DATABRICKS_URL + + prod: + mode: production + workspace: + host: $DATABRICKS_URL + # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. + root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: $USERNAME + level: CAN_MANAGE + run_as: + user_name: $USERNAME diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep new file mode 100644 index 000000000..fa25d2745 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep @@ -0,0 +1,22 @@ +# Fixtures + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml new file mode 100644 index 000000000..28240e3ec --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml @@ -0,0 +1,49 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "my_jobs_as_code" +requires-python = ">=3.10" +description = "wheel file based on my_jobs_as_code" + +# Dependencies in case the output wheel file is used as a library dependency. +# For defining dependencies, when this package is used in Databricks, see: +# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +# +# Example: +# dependencies = [ +# "requests==x.y.z", +# ] +dependencies = [ +] + +# see setup.py +dynamic = ["version"] + +[project.entry-points.packages] +main = "my_jobs_as_code.main:main" + +[tool.setuptools] +py-modules = ["resources", "my_jobs_as_code"] + +[tool.uv] +## Dependencies for local development +dev-dependencies = [ + "databricks-bundles==0.7.0", + + ## Add code completion support for DLT + # "databricks-dlt", + + ## databricks-connect can be used to run parts of this project locally. + ## See https://docs.databricks.com/dev-tools/databricks-connect.html. + ## + ## Uncomment line below to install a version of db-connect that corresponds to + ## the Databricks Runtime version used for this project. + # "databricks-connect>=15.4,<15.5", +] + +override-dependencies = [ + # pyspark package conflicts with 'databricks-connect' + "pyspark; sys_platform == 'never'", +] diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py new file mode 100644 index 000000000..fbcb9dc5f --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import ( + Bundle, + Resources, + load_resources_from_current_package_module, +) + + +def load_resources(bundle: Bundle) -> Resources: + """ + 'load_resources' function is referenced in databricks.yml and is responsible for loading + bundle resources defined in Python code. This function is called by Databricks CLI during + bundle deployment. After deployment, this function is not used. + """ + + # the default implementation loads all Python files in 'resources' directory + return load_resources_from_current_package_module() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py new file mode 100644 index 000000000..4854d656f --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py @@ -0,0 +1,67 @@ +from databricks.bundles.jobs import Job + +""" +The main job for my_jobs_as_code. +""" + + +my_jobs_as_code_job = Job.from_dict( + { + "name": "my_jobs_as_code_job", + "trigger": { + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + "periodic": { + "interval": 1, + "unit": "DAYS", + }, + }, + "email_notifications": { + "on_failure": [ + "$USERNAME", + ], + }, + "tasks": [ + { + "task_key": "notebook_task", + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "src/notebook.ipynb", + }, + }, + { + "task_key": "main_task", + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + "job_cluster_key": "job_cluster", + "python_wheel_task": { + "package_name": "my_jobs_as_code", + "entry_point": "main", + }, + "libraries": [ + # By default we just include the .whl file generated for the my_jobs_as_code package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + { + "whl": "dist/*.whl", + }, + ], + }, + ], + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "spark_version": "15.4.x-scala2.12", + "node_type_id": "i3.xlarge", + "autoscale": { + "min_workers": 1, + "max_workers": 4, + }, + }, + }, + ], + } +) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py new file mode 100644 index 000000000..ba284ba82 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py @@ -0,0 +1,18 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the my_jobs_as_code project. +""" + +import os + +from setuptools import setup + +local_version = os.getenv("LOCAL_VERSION") +version = "0.0.1" + +setup( + version=f"{version}+{local_version}" if local_version else version, +) diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb new file mode 100644 index 000000000..9bc3f1560 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from my_jobs_as_code import main\n", + "\n", + "main.get_taxis(spark).show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py new file mode 100644 index 000000000..13e100ee2 --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py @@ -0,0 +1,8 @@ +from my_jobs_as_code.main import get_taxis, get_spark + +# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/script b/acceptance/bundle/templates/experimental-jobs-as-code/script new file mode 100644 index 000000000..2209aa7ab --- /dev/null +++ b/acceptance/bundle/templates/experimental-jobs-as-code/script @@ -0,0 +1,12 @@ +trace $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output + +cd output/my_jobs_as_code + +# silence uv output because it's non-deterministic +uv sync 2> /dev/null + +# remove version constraint because it always creates a warning on dev builds +cat databricks.yml | grep -v databricks_cli_version > databricks.yml.new +mv databricks.yml.new databricks.yml + +trace $CLI bundle validate -t dev --output json | jq ".resources" diff --git a/acceptance/bundle/variables/arg-repeat/databricks.yml b/acceptance/bundle/variables/arg-repeat/databricks.yml new file mode 100644 index 000000000..377c6cfab --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: arg-repeat + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/arg-repeat/output.txt b/acceptance/bundle/variables/arg-repeat/output.txt new file mode 100644 index 000000000..48bd2033f --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/output.txt @@ -0,0 +1,20 @@ + +>>> errcode $CLI bundle validate --var a=one -o json + +Exit code: 0 +{ + "a": { + "default": "hello", + "value": "one" + } +} + +>>> errcode $CLI bundle validate --var a=one --var a=two +Error: failed to assign two to a: variable has already been assigned value: one + +Name: arg-repeat +Target: default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/variables/arg-repeat/script b/acceptance/bundle/variables/arg-repeat/script new file mode 100644 index 000000000..3e03dbcb1 --- /dev/null +++ b/acceptance/bundle/variables/arg-repeat/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle validate --var a=one -o json | jq .variables +trace errcode $CLI bundle validate --var a=one --var a=two diff --git a/acceptance/bundle/variables/complex-cross-ref/databricks.yml b/acceptance/bundle/variables/complex-cross-ref/databricks.yml new file mode 100644 index 000000000..4459f44df --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/databricks.yml @@ -0,0 +1,12 @@ +bundle: + name: complex-cross-ref + +variables: + a: + default: + a_1: 500 + a_2: ${var.b.b_2} + b: + default: + b_1: ${var.a.a_1} + b_2: 2.5 diff --git a/acceptance/bundle/variables/complex-cross-ref/output.txt b/acceptance/bundle/variables/complex-cross-ref/output.txt new file mode 100644 index 000000000..f1b624d29 --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/output.txt @@ -0,0 +1,22 @@ +{ + "a": { + "default": { + "a_1": 500, + "a_2": 2.5 + }, + "value": { + "a_1": 500, + "a_2": 2.5 + } + }, + "b": { + "default": { + "b_1": 500, + "b_2": 2.5 + }, + "value": { + "b_1": 500, + "b_2": 2.5 + } + } +} diff --git a/acceptance/bundle/variables/complex-cross-ref/script b/acceptance/bundle/variables/complex-cross-ref/script new file mode 100644 index 000000000..0e53f237e --- /dev/null +++ b/acceptance/bundle/variables/complex-cross-ref/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .variables diff --git a/acceptance/bundle/variables/complex-cycle-self/databricks.yml b/acceptance/bundle/variables/complex-cycle-self/databricks.yml new file mode 100644 index 000000000..bb461795c --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/databricks.yml @@ -0,0 +1,7 @@ +bundle: + name: cycle + +variables: + a: + default: + hello: ${var.a} diff --git a/acceptance/bundle/variables/complex-cycle-self/output.txt b/acceptance/bundle/variables/complex-cycle-self/output.txt new file mode 100644 index 000000000..fa80154ca --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/output.txt @@ -0,0 +1,9 @@ +Warning: Detected unresolved variables after 11 resolution rounds + +Name: cycle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/cycle/default + +Found 1 warning diff --git a/acceptance/bundle/variables/complex-cycle-self/script b/acceptance/bundle/variables/complex-cycle-self/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle-self/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/variables/complex-cycle/databricks.yml b/acceptance/bundle/variables/complex-cycle/databricks.yml new file mode 100644 index 000000000..9784a4e25 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: cycle + +variables: + a: + default: + hello: ${var.b} + b: + default: + hello: ${var.a} diff --git a/acceptance/bundle/variables/complex-cycle/output.txt b/acceptance/bundle/variables/complex-cycle/output.txt new file mode 100644 index 000000000..fa80154ca --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/output.txt @@ -0,0 +1,9 @@ +Warning: Detected unresolved variables after 11 resolution rounds + +Name: cycle +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/cycle/default + +Found 1 warning diff --git a/acceptance/bundle/variables/complex-cycle/script b/acceptance/bundle/variables/complex-cycle/script new file mode 100644 index 000000000..72555b332 --- /dev/null +++ b/acceptance/bundle/variables/complex-cycle/script @@ -0,0 +1 @@ +$CLI bundle validate diff --git a/acceptance/bundle/variables/complex-transitive-deep/databricks.yml b/acceptance/bundle/variables/complex-transitive-deep/databricks.yml new file mode 100644 index 000000000..1357c291a --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: complex-transitive + +variables: + catalog: + default: hive_metastore + spark_conf_1: + default: + "spark.databricks.sql.initial.catalog.name": ${var.catalog} + spark_conf: + default: ${var.spark_conf_1} + etl_cluster_config: + type: complex + default: + spark_version: 14.3.x-scala2.12 + runtime_engine: PHOTON + spark_conf: ${var.spark_conf} + +resources: + clusters: + my_cluster: ${var.etl_cluster_config} diff --git a/acceptance/bundle/variables/complex-transitive-deep/output.txt b/acceptance/bundle/variables/complex-transitive-deep/output.txt new file mode 100644 index 000000000..29c41cda5 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/output.txt @@ -0,0 +1,3 @@ +{ + "spark.databricks.sql.initial.catalog.name": "hive_metastore" +} diff --git a/acceptance/bundle/variables/complex-transitive-deep/script b/acceptance/bundle/variables/complex-transitive-deep/script new file mode 100644 index 000000000..52bb08ed4 --- /dev/null +++ b/acceptance/bundle/variables/complex-transitive-deep/script @@ -0,0 +1,2 @@ +# Currently, this incorrectly outputs variable reference instead of resolved value +$CLI bundle validate -o json | jq '.resources.clusters.my_cluster.spark_conf' diff --git a/acceptance/bundle/variables/cycle/databricks.yml b/acceptance/bundle/variables/cycle/databricks.yml new file mode 100644 index 000000000..b35196671 --- /dev/null +++ b/acceptance/bundle/variables/cycle/databricks.yml @@ -0,0 +1,8 @@ +bundle: + name: cycle + +variables: + a: + default: ${var.b} + b: + default: ${var.a} diff --git a/acceptance/bundle/variables/cycle/output.txt b/acceptance/bundle/variables/cycle/output.txt new file mode 100644 index 000000000..ea9c95cd4 --- /dev/null +++ b/acceptance/bundle/variables/cycle/output.txt @@ -0,0 +1,14 @@ +Error: cycle detected in field resolution: variables.a.default -> var.b -> var.a -> var.b + +{ + "a": { + "default": "${var.b}", + "value": "${var.b}" + }, + "b": { + "default": "${var.a}", + "value": "${var.a}" + } +} + +Exit code: 1 diff --git a/acceptance/bundle/variables/cycle/script b/acceptance/bundle/variables/cycle/script new file mode 100644 index 000000000..0e53f237e --- /dev/null +++ b/acceptance/bundle/variables/cycle/script @@ -0,0 +1 @@ +$CLI bundle validate -o json | jq .variables diff --git a/bundle/apps/slow_deploy_message.go b/bundle/apps/slow_deploy_message.go index 6eda39d81..87275980a 100644 --- a/bundle/apps/slow_deploy_message.go +++ b/bundle/apps/slow_deploy_message.go @@ -14,7 +14,7 @@ type slowDeployMessage struct{} // See https://github.com/databricks/cli/pull/2144 func (v *slowDeployMessage) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { if len(b.Config.Resources.Apps) > 0 { - cmdio.LogString(ctx, "Databricks apps in your bundle can slow initial deployment as they wait for compute provisioning.") + cmdio.LogString(ctx, "Note: Databricks apps included in this bundle may increase initial deployment time due to compute provisioning.") } return nil diff --git a/bundle/config/mutator/capture_schema_dependency.go b/bundle/config/mutator/capture_schema_dependency.go new file mode 100644 index 000000000..5025c9a0d --- /dev/null +++ b/bundle/config/mutator/capture_schema_dependency.go @@ -0,0 +1,100 @@ +package mutator + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/diag" +) + +type captureSchemaDependency struct{} + +// If a user defines a UC schema in the bundle, they can refer to it in DLT pipelines +// or UC Volumes using the `${resources.schemas..name}` syntax. Using this +// syntax allows TF to capture the deploy time dependency this DLT pipeline or UC Volume +// has on the schema and deploy changes to the schema before deploying the pipeline or volume. +// +// This mutator translates any implicit schema references in DLT pipelines or UC Volumes +// to the explicit syntax. +func CaptureSchemaDependency() bundle.Mutator { + return &captureSchemaDependency{} +} + +func (m *captureSchemaDependency) Name() string { + return "CaptureSchemaDependency" +} + +func schemaNameRef(key string) string { + return fmt.Sprintf("${resources.schemas.%s.name}", key) +} + +func findSchema(b *bundle.Bundle, catalogName, schemaName string) (string, *resources.Schema) { + if catalogName == "" || schemaName == "" { + return "", nil + } + + for k, s := range b.Config.Resources.Schemas { + if s != nil && s.CreateSchema != nil && s.CatalogName == catalogName && s.Name == schemaName { + return k, s + } + } + return "", nil +} + +func resolveVolume(v *resources.Volume, b *bundle.Bundle) { + if v == nil || v.CreateVolumeRequestContent == nil { + return + } + schemaK, schema := findSchema(b, v.CatalogName, v.SchemaName) + if schema == nil { + return + } + + v.SchemaName = schemaNameRef(schemaK) +} + +func resolvePipelineSchema(p *resources.Pipeline, b *bundle.Bundle) { + if p == nil || p.PipelineSpec == nil { + return + } + if p.Schema == "" { + return + } + schemaK, schema := findSchema(b, p.Catalog, p.Schema) + if schema == nil { + return + } + + p.Schema = schemaNameRef(schemaK) +} + +func resolvePipelineTarget(p *resources.Pipeline, b *bundle.Bundle) { + if p == nil || p.PipelineSpec == nil { + return + } + if p.Target == "" { + return + } + schemaK, schema := findSchema(b, p.Catalog, p.Target) + if schema == nil { + return + } + p.Target = schemaNameRef(schemaK) +} + +func (m *captureSchemaDependency) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + for _, p := range b.Config.Resources.Pipelines { + // "schema" and "target" have the same semantics in the DLT API but are mutually + // exclusive i.e. only one can be set at a time. If schema is set, the pipeline + // is in direct publishing mode and can write tables to multiple schemas + // (vs target which is limited to a single schema). + resolvePipelineTarget(p, b) + resolvePipelineSchema(p, b) + } + for _, v := range b.Config.Resources.Volumes { + resolveVolume(v, b) + } + return nil +} diff --git a/bundle/config/mutator/capture_schema_dependency_test.go b/bundle/config/mutator/capture_schema_dependency_test.go new file mode 100644 index 000000000..0a94e7748 --- /dev/null +++ b/bundle/config/mutator/capture_schema_dependency_test.go @@ -0,0 +1,277 @@ +package mutator + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/pipelines" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCaptureSchemaDependencyForVolume(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Volumes: map[string]*resources.Volume{ + "volume1": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "foobar", + }, + }, + "volume2": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog2", + SchemaName: "foobar", + }, + }, + "volume3": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "barfoo", + }, + }, + "volume4": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalogX", + SchemaName: "foobar", + }, + }, + "volume5": { + CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{ + CatalogName: "catalog1", + SchemaName: "schemaX", + }, + }, + "nilVolume": nil, + "emptyVolume": {}, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Volumes["volume1"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Volumes["volume2"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Volumes["volume3"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "foobar", b.Config.Resources.Volumes["volume4"].CreateVolumeRequestContent.SchemaName) + assert.Equal(t, "schemaX", b.Config.Resources.Volumes["volume5"].CreateVolumeRequestContent.SchemaName) + + assert.Nil(t, b.Config.Resources.Volumes["nilVolume"]) + assert.Nil(t, b.Config.Resources.Volumes["emptyVolume"].CreateVolumeRequestContent) +} + +func TestCaptureSchemaDependencyForPipelinesWithTarget(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "foobar", + }, + }, + "pipeline2": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog2", + Schema: "foobar", + }, + }, + "pipeline3": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "barfoo", + }, + }, + "pipeline4": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalogX", + Schema: "foobar", + }, + }, + "pipeline5": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Schema: "schemaX", + }, + }, + "pipeline6": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Schema: "foobar", + }, + }, + "pipeline7": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Schema: "", + Name: "whatever", + }, + }, + "nilPipeline": nil, + "emptyPipeline": {}, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Pipelines["pipeline1"].Schema) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Pipelines["pipeline2"].Schema) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Pipelines["pipeline3"].Schema) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline4"].Schema) + assert.Equal(t, "schemaX", b.Config.Resources.Pipelines["pipeline5"].Schema) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline6"].Schema) + assert.Equal(t, "", b.Config.Resources.Pipelines["pipeline7"].Schema) + + assert.Nil(t, b.Config.Resources.Pipelines["nilPipeline"]) + assert.Nil(t, b.Config.Resources.Pipelines["emptyPipeline"].PipelineSpec) + + for _, k := range []string{"pipeline1", "pipeline2", "pipeline3", "pipeline4", "pipeline5", "pipeline6", "pipeline7"} { + assert.Empty(t, b.Config.Resources.Pipelines[k].Target) + } +} + +func TestCaptureSchemaDependencyForPipelinesWithSchema(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "foobar", + }, + }, + "schema2": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog2", + Name: "foobar", + }, + }, + "schema3": { + CreateSchema: &catalog.CreateSchema{ + CatalogName: "catalog1", + Name: "barfoo", + }, + }, + "nilschema": nil, + "emptyschema": {}, + }, + Pipelines: map[string]*resources.Pipeline{ + "pipeline1": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "foobar", + }, + }, + "pipeline2": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog2", + Target: "foobar", + }, + }, + "pipeline3": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "barfoo", + }, + }, + "pipeline4": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalogX", + Target: "foobar", + }, + }, + "pipeline5": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "catalog1", + Target: "schemaX", + }, + }, + "pipeline6": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Target: "foobar", + }, + }, + "pipeline7": { + PipelineSpec: &pipelines.PipelineSpec{ + Catalog: "", + Target: "", + Name: "whatever", + }, + }, + }, + }, + }, + } + + d := bundle.Apply(context.Background(), b, CaptureSchemaDependency()) + require.Nil(t, d) + assert.Equal(t, "${resources.schemas.schema1.name}", b.Config.Resources.Pipelines["pipeline1"].Target) + assert.Equal(t, "${resources.schemas.schema2.name}", b.Config.Resources.Pipelines["pipeline2"].Target) + assert.Equal(t, "${resources.schemas.schema3.name}", b.Config.Resources.Pipelines["pipeline3"].Target) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline4"].Target) + assert.Equal(t, "schemaX", b.Config.Resources.Pipelines["pipeline5"].Target) + assert.Equal(t, "foobar", b.Config.Resources.Pipelines["pipeline6"].Target) + assert.Equal(t, "", b.Config.Resources.Pipelines["pipeline7"].Target) + + for _, k := range []string{"pipeline1", "pipeline2", "pipeline3", "pipeline4", "pipeline5", "pipeline6", "pipeline7"} { + assert.Empty(t, b.Config.Resources.Pipelines[k].Schema) + } +} diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 11ac529d0..9aa93791f 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -3,6 +3,7 @@ package mutator import ( "context" "errors" + "fmt" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" @@ -13,15 +14,37 @@ import ( "github.com/databricks/cli/libs/dyn/dynvar" ) +/* +For pathological cases, output and time grow exponentially. + +On my laptop, timings for acceptance/bundle/variables/complex-cycle: +rounds time + + 9 0.10s + 10 0.13s + 11 0.27s + 12 0.68s + 13 1.98s + 14 6.28s + 15 21.70s + 16 78.16s +*/ +const maxResolutionRounds = 11 + type resolveVariableReferences struct { - prefixes []string - pattern dyn.Pattern - lookupFn func(dyn.Value, dyn.Path, *bundle.Bundle) (dyn.Value, error) - skipFn func(dyn.Value) bool + prefixes []string + pattern dyn.Pattern + lookupFn func(dyn.Value, dyn.Path, *bundle.Bundle) (dyn.Value, error) + skipFn func(dyn.Value) bool + extraRounds int } func ResolveVariableReferences(prefixes ...string) bundle.Mutator { - return &resolveVariableReferences{prefixes: prefixes, lookupFn: lookup} + return &resolveVariableReferences{ + prefixes: prefixes, + lookupFn: lookup, + extraRounds: maxResolutionRounds - 1, + } } func ResolveVariableReferencesInLookup() bundle.Mutator { @@ -86,7 +109,36 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) varPath := dyn.NewPath(dyn.Key("var")) var diags diag.Diagnostics + maxRounds := 1 + m.extraRounds + for round := range maxRounds { + hasUpdates, newDiags := m.resolveOnce(b, prefixes, varPath) + + diags = diags.Extend(newDiags) + + if diags.HasError() { + break + } + + if !hasUpdates { + break + } + + if round >= maxRounds-1 { + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: fmt.Sprintf("Detected unresolved variables after %d resolution rounds", round+1), + // Would be nice to include names of the variables there, but that would complicate things more + }) + break + } + } + return diags +} + +func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn.Path, varPath dyn.Path) (bool, diag.Diagnostics) { + var diags diag.Diagnostics + hasUpdates := false err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { // Synthesize a copy of the root that has all fields that are present in the type // but not set in the dynamic value set to their corresponding empty value. @@ -129,6 +181,7 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) if m.skipFn != nil && m.skipFn(v) { return dyn.InvalidValue, dynvar.ErrSkipResolution } + hasUpdates = true return m.lookupFn(normalized, path, b) } } @@ -149,5 +202,6 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) if err != nil { diags = diags.Extend(diag.FromErr(err)) } - return diags + + return hasUpdates, diags } diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index a2c830be3..1eda578fa 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -136,7 +136,7 @@ func (t *translateContext) rewritePath( } // Local path is relative to the directory the resource was defined in. - localPath := filepath.Join(dir, filepath.FromSlash(input)) + localPath := filepath.Join(dir, input) if interp, ok := t.seen[localPath]; ok { return interp, nil } @@ -151,6 +151,10 @@ func (t *translateContext) rewritePath( return "", fmt.Errorf("path %s is not contained in sync root path", localPath) } + // Normalize paths to separated by forward slashes. + localPath = filepath.ToSlash(localPath) + localRelPath = filepath.ToSlash(localRelPath) + // Convert local path into workspace path via specified function. var interp string switch opts.Mode { @@ -180,9 +184,9 @@ func (t *translateContext) rewritePath( } func (t *translateContext) translateNotebookPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, localRelPath) if errors.Is(err, fs.ErrNotExist) { - if filepath.Ext(localFullPath) != notebook.ExtensionNone { + if path.Ext(localFullPath) != notebook.ExtensionNone { return "", fmt.Errorf("notebook %s not found", literal) } @@ -198,7 +202,7 @@ func (t *translateContext) translateNotebookPath(ctx context.Context, literal, l // way we can provide a more targeted error message. for _, ext := range extensions { literalWithExt := literal + ext - localRelPathWithExt := filepath.ToSlash(localRelPath + ext) + localRelPathWithExt := localRelPath + ext if _, err := fs.Stat(t.b.SyncRoot, localRelPathWithExt); err == nil { return "", fmt.Errorf(`notebook %s not found. Did you mean %s? Local notebook references are expected to contain one of the following @@ -218,42 +222,42 @@ to contain one of the following file extensions: [%s]`, literal, strings.Join(ex } // Upon import, notebooks are stripped of their extension. - localRelPathNoExt := strings.TrimSuffix(localRelPath, filepath.Ext(localRelPath)) - return path.Join(t.remoteRoot, filepath.ToSlash(localRelPathNoExt)), nil + localRelPathNoExt := strings.TrimSuffix(localRelPath, path.Ext(localRelPath)) + return path.Join(t.remoteRoot, localRelPathNoExt), nil } func (t *translateContext) translateFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, localRelPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", filepath.FromSlash(localFullPath), err) } if nb { return "", ErrIsNotebook{localFullPath} } - return path.Join(t.remoteRoot, filepath.ToSlash(localRelPath)), nil + return path.Join(t.remoteRoot, localRelPath), nil } func (t *translateContext) translateDirectoryPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { - info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) + info, err := t.b.SyncRoot.Stat(localRelPath) if err != nil { return "", err } if !info.IsDir() { - return "", fmt.Errorf("%s is not a directory", localFullPath) + return "", fmt.Errorf("%s is not a directory", filepath.FromSlash(localFullPath)) } - return path.Join(t.remoteRoot, filepath.ToSlash(localRelPath)), nil + return path.Join(t.remoteRoot, localRelPath), nil } func (t *translateContext) translateLocalAbsoluteFilePath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { - info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) + info, err := t.b.SyncRoot.Stat(localRelPath) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is a file: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is a file: %w", filepath.FromSlash(localFullPath), err) } if info.IsDir() { return "", fmt.Errorf("expected %s to be a file but found a directory", literal) @@ -262,12 +266,12 @@ func (t *translateContext) translateLocalAbsoluteFilePath(ctx context.Context, l } func (t *translateContext) translateLocalAbsoluteDirectoryPath(ctx context.Context, literal, localFullPath, _ string) (string, error) { - info, err := os.Stat(localFullPath) + info, err := os.Stat(filepath.FromSlash(localFullPath)) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("directory %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to determine if %s is a directory: %w", localFullPath, err) + return "", fmt.Errorf("unable to determine if %s is a directory: %w", filepath.FromSlash(localFullPath), err) } if !info.IsDir() { return "", fmt.Errorf("expected %s to be a directory but found a file", literal) @@ -281,7 +285,7 @@ func (t *translateContext) translateLocalRelativePath(ctx context.Context, liter func (t *translateContext) translateLocalRelativeWithPrefixPath(ctx context.Context, literal, localFullPath, localRelPath string) (string, error) { if !strings.HasPrefix(localRelPath, ".") { - localRelPath = "." + string(filepath.Separator) + localRelPath + localRelPath = "./" + localRelPath } return localRelPath, nil } diff --git a/bundle/config/mutator/translate_paths_artifacts_test.go b/bundle/config/mutator/translate_paths_artifacts_test.go index fb402b488..0d1af6156 100644 --- a/bundle/config/mutator/translate_paths_artifacts_test.go +++ b/bundle/config/mutator/translate_paths_artifacts_test.go @@ -46,7 +46,7 @@ func TestTranslatePathsArtifacts_InsideSyncRoot(t *testing.T) { require.NoError(t, diags.Error()) // Assert that the artifact path has been converted to a local absolute path. - assert.Equal(t, lib, b.Config.Artifacts["my_artifact"].Path) + assert.Equal(t, filepath.ToSlash(lib), b.Config.Artifacts["my_artifact"].Path) } func TestTranslatePathsArtifacts_OutsideSyncRoot(t *testing.T) { @@ -79,5 +79,5 @@ func TestTranslatePathsArtifacts_OutsideSyncRoot(t *testing.T) { require.NoError(t, diags.Error()) // Assert that the artifact path has been converted to a local absolute path. - assert.Equal(t, lib, b.Config.Artifacts["my_artifact"].Path) + assert.Equal(t, filepath.ToSlash(lib), b.Config.Artifacts["my_artifact"].Path) } diff --git a/bundle/config/mutator/translate_paths_dashboards_test.go b/bundle/config/mutator/translate_paths_dashboards_test.go index 5e4e69f5d..02fba92e0 100644 --- a/bundle/config/mutator/translate_paths_dashboards_test.go +++ b/bundle/config/mutator/translate_paths_dashboards_test.go @@ -48,7 +48,7 @@ func TestTranslatePathsDashboards_FilePathRelativeSubDirectory(t *testing.T) { // Assert that the file path for the dashboard has been converted to its local absolute path. assert.Equal( t, - filepath.Join(dir, "src", "my_dashboard.lvdash.json"), + filepath.ToSlash(filepath.Join(dir, "src", "my_dashboard.lvdash.json")), b.Config.Resources.Dashboards["dashboard"].FilePath, ) } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 493abb8c5..aa6488ab0 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -6,7 +6,6 @@ import ( "os" "path/filepath" "runtime" - "strings" "testing" "github.com/databricks/cli/bundle" @@ -226,7 +225,7 @@ func TestTranslatePaths(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.whl"), + "dist/task.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) assert.Equal( @@ -251,7 +250,7 @@ func TestTranslatePaths(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.jar"), + "dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[5].Libraries[0].Jar, ) assert.Equal( @@ -362,7 +361,7 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { ) assert.Equal( t, - filepath.Join("job", "dist", "task.jar"), + "job/dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[1].Libraries[0].Jar, ) assert.Equal( @@ -774,8 +773,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) - assert.Equal(t, strings.Join([]string{".", "job", "dist", "env1.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0]) - assert.Equal(t, strings.Join([]string{".", "dist", "env2.whl"}, string(os.PathSeparator)), b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) + assert.Equal(t, "./job/dist/env1.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0]) + assert.Equal(t, "./dist/env2.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1]) assert.Equal(t, "simplejson", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[2]) assert.Equal(t, "/Workspace/Users/foo@bar.com/test.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[3]) assert.Equal(t, "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[4]) @@ -839,7 +838,7 @@ func TestTranslatePathWithComplexVariables(t *testing.T) { assert.Equal( t, - filepath.Join("variables", "local", "whl.whl"), + "variables/local/whl.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) } @@ -952,34 +951,34 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { // updated to source path assert.Equal( t, - filepath.Join(dir, "my_job_notebook"), + dir+"/my_job_notebook", b.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath, ) assert.Equal( t, - filepath.Join(dir, "requirements.txt"), + dir+"/requirements.txt", b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements, ) assert.Equal( t, - filepath.Join(dir, "my_python_file.py"), + dir+"/my_python_file.py", b.Config.Resources.Jobs["job"].Tasks[3].SparkPythonTask.PythonFile, ) assert.Equal( t, - filepath.Join(dir, "my_pipeline_notebook"), + dir+"/my_pipeline_notebook", b.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path, ) assert.Equal( t, - filepath.Join(dir, "my_python_file.py"), + dir+"/my_python_file.py", b.Config.Resources.Pipelines["pipeline"].Libraries[2].File.Path, ) // left as is assert.Equal( t, - filepath.Join("dist", "task.whl"), + "dist/task.whl", b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl, ) assert.Equal( @@ -989,7 +988,7 @@ func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) { ) assert.Equal( t, - filepath.Join("dist", "task.jar"), + "dist/task.jar", b.Config.Resources.Jobs["job"].Tasks[4].Libraries[0].Jar, ) assert.Equal( diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index 27c4b16cd..677b8fc10 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.62.0" +const ProviderVersion = "1.63.0" diff --git a/bundle/internal/tf/schema/resource_external_location.go b/bundle/internal/tf/schema/resource_external_location.go index da28271bc..72411f4dc 100644 --- a/bundle/internal/tf/schema/resource_external_location.go +++ b/bundle/internal/tf/schema/resource_external_location.go @@ -13,8 +13,13 @@ type ResourceExternalLocationEncryptionDetails struct { type ResourceExternalLocation struct { AccessPoint string `json:"access_point,omitempty"` + BrowseOnly bool `json:"browse_only,omitempty"` Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + CredentialId string `json:"credential_id,omitempty"` CredentialName string `json:"credential_name"` + Fallback bool `json:"fallback,omitempty"` ForceDestroy bool `json:"force_destroy,omitempty"` ForceUpdate bool `json:"force_update,omitempty"` Id string `json:"id,omitempty"` @@ -24,6 +29,8 @@ type ResourceExternalLocation struct { Owner string `json:"owner,omitempty"` ReadOnly bool `json:"read_only,omitempty"` SkipValidation bool `json:"skip_validation,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` Url string `json:"url"` EncryptionDetails *ResourceExternalLocationEncryptionDetails `json:"encryption_details,omitempty"` } diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index 1f89dc64d..7dd3f9210 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.62.0" +const ProviderVersion = "1.63.0" func NewRoot() *Root { return &Root{ diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 1a1ccd47b..b59ce9f89 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -130,13 +130,13 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { // mutators need informed consent if they are potentially destructive. deployCore := bundle.Defer( bundle.Seq( + apps.SlowDeployMessage(), bundle.LogString("Deploying resources..."), terraform.Apply(), ), bundle.Seq( terraform.StatePush(), terraform.Load(), - apps.SlowDeployMessage(), apps.InterpolateVariables(), apps.UploadConfig(), metadata.Compute(), diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 0328cc2ff..c5b875196 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -66,11 +66,6 @@ func Initialize() bundle.Mutator { "workspace", "variables", ), - mutator.ResolveVariableReferences( - "bundle", - "workspace", - "variables", - ), mutator.MergeJobClusters(), mutator.MergeJobParameters(), @@ -78,6 +73,8 @@ func Initialize() bundle.Mutator { mutator.MergePipelineClusters(), mutator.MergeApps(), + mutator.CaptureSchemaDependency(), + // Provide permission config errors & warnings after initializing all variables permissions.PermissionDiagnostics(), mutator.SetRunAs(), diff --git a/bundle/tests/path_translation_test.go b/bundle/tests/path_translation_test.go deleted file mode 100644 index 05702d2a2..000000000 --- a/bundle/tests/path_translation_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package config_tests - -import ( - "context" - "path/filepath" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config/mutator" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestPathTranslationFallback(t *testing.T) { - b := loadTarget(t, "./path_translation/fallback", "development") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - require.NoError(t, diags.Error()) - - j := b.Config.Resources.Jobs["my_job"] - assert.Len(t, j.Tasks, 6) - - assert.Equal(t, "notebook_example", filepath.ToSlash(j.Tasks[0].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[0].NotebookTask.NotebookPath)) - - assert.Equal(t, "spark_python_example", filepath.ToSlash(j.Tasks[1].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[1].SparkPythonTask.PythonFile)) - - assert.Equal(t, "dbt_example", filepath.ToSlash(j.Tasks[2].TaskKey)) - assert.Equal(t, "src/dbt_project", filepath.ToSlash(j.Tasks[2].DbtTask.ProjectDirectory)) - - assert.Equal(t, "sql_example", filepath.ToSlash(j.Tasks[3].TaskKey)) - assert.Equal(t, "src/sql.sql", filepath.ToSlash(j.Tasks[3].SqlTask.File.Path)) - - assert.Equal(t, "python_wheel_example", filepath.ToSlash(j.Tasks[4].TaskKey)) - assert.Equal(t, "dist/wheel1.whl", filepath.ToSlash(j.Tasks[4].Libraries[0].Whl)) - assert.Equal(t, "dist/wheel2.whl", filepath.ToSlash(j.Tasks[4].Libraries[1].Whl)) - - assert.Equal(t, "spark_jar_example", filepath.ToSlash(j.Tasks[5].TaskKey)) - assert.Equal(t, "target/jar1.jar", filepath.ToSlash(j.Tasks[5].Libraries[0].Jar)) - assert.Equal(t, "target/jar2.jar", filepath.ToSlash(j.Tasks[5].Libraries[1].Jar)) - - p := b.Config.Resources.Pipelines["my_pipeline"] - assert.Len(t, p.Libraries, 4) - - assert.Equal(t, "src/file1.py", filepath.ToSlash(p.Libraries[0].File.Path)) - assert.Equal(t, "src/notebook1", filepath.ToSlash(p.Libraries[1].Notebook.Path)) - assert.Equal(t, "src/file2.py", filepath.ToSlash(p.Libraries[2].File.Path)) - assert.Equal(t, "src/notebook2", filepath.ToSlash(p.Libraries[3].Notebook.Path)) -} - -func TestPathTranslationFallbackError(t *testing.T) { - b := loadTarget(t, "./path_translation/fallback", "error") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.ErrorContains(t, diags.Error(), `notebook this value is overridden not found`) -} - -func TestPathTranslationNominal(t *testing.T) { - b := loadTarget(t, "./path_translation/nominal", "development") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.NoError(t, diags.Error()) - - j := b.Config.Resources.Jobs["my_job"] - assert.Len(t, j.Tasks, 8) - - assert.Equal(t, "notebook_example", filepath.ToSlash(j.Tasks[0].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[0].NotebookTask.NotebookPath)) - - assert.Equal(t, "spark_python_example", filepath.ToSlash(j.Tasks[1].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[1].SparkPythonTask.PythonFile)) - - assert.Equal(t, "dbt_example", filepath.ToSlash(j.Tasks[2].TaskKey)) - assert.Equal(t, "src/dbt_project", filepath.ToSlash(j.Tasks[2].DbtTask.ProjectDirectory)) - - assert.Equal(t, "sql_example", filepath.ToSlash(j.Tasks[3].TaskKey)) - assert.Equal(t, "src/sql.sql", filepath.ToSlash(j.Tasks[3].SqlTask.File.Path)) - - assert.Equal(t, "python_wheel_example", filepath.ToSlash(j.Tasks[4].TaskKey)) - assert.Equal(t, "dist/wheel1.whl", filepath.ToSlash(j.Tasks[4].Libraries[0].Whl)) - assert.Equal(t, "dist/wheel2.whl", filepath.ToSlash(j.Tasks[4].Libraries[1].Whl)) - - assert.Equal(t, "spark_jar_example", filepath.ToSlash(j.Tasks[5].TaskKey)) - assert.Equal(t, "target/jar1.jar", filepath.ToSlash(j.Tasks[5].Libraries[0].Jar)) - assert.Equal(t, "target/jar2.jar", filepath.ToSlash(j.Tasks[5].Libraries[1].Jar)) - - assert.Equal(t, "for_each_notebook_example", filepath.ToSlash(j.Tasks[6].TaskKey)) - assert.Equal(t, "src/notebook", filepath.ToSlash(j.Tasks[6].ForEachTask.Task.NotebookTask.NotebookPath)) - - assert.Equal(t, "for_each_spark_python_example", filepath.ToSlash(j.Tasks[7].TaskKey)) - assert.Equal(t, "src/file.py", filepath.ToSlash(j.Tasks[7].ForEachTask.Task.SparkPythonTask.PythonFile)) - - p := b.Config.Resources.Pipelines["my_pipeline"] - assert.Len(t, p.Libraries, 4) - - assert.Equal(t, "src/file1.py", filepath.ToSlash(p.Libraries[0].File.Path)) - assert.Equal(t, "src/notebook1", filepath.ToSlash(p.Libraries[1].Notebook.Path)) - assert.Equal(t, "src/file2.py", filepath.ToSlash(p.Libraries[2].File.Path)) - assert.Equal(t, "src/notebook2", filepath.ToSlash(p.Libraries[3].Notebook.Path)) -} - -func TestPathTranslationNominalError(t *testing.T) { - b := loadTarget(t, "./path_translation/nominal", "error") - - m := mutator.TranslatePaths() - diags := bundle.Apply(context.Background(), b, m) - assert.ErrorContains(t, diags.Error(), `notebook this value is overridden not found`) -} diff --git a/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py b/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/environment_key/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/python_wheel/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py b/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_no_artifact/my_test_code/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py b/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py index 73d045afb..ea918ce2d 100644 --- a/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py +++ b/bundle/tests/python_wheel/python_wheel_no_artifact_notebook/my_test_code/__main__.py @@ -7,10 +7,10 @@ import sys def main(): # This method will print the provided arguments - print('Hello from my func') - print('Got arguments:') + print("Hello from my func") + print("Got arguments:") print(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/bundle/tests/relative_path_translation_test.go b/bundle/tests/relative_path_translation_test.go deleted file mode 100644 index 0f553ac3d..000000000 --- a/bundle/tests/relative_path_translation_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package config_tests - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestRelativePathTranslationDefault(t *testing.T) { - b, diags := initializeTarget(t, "./relative_path_translation", "default") - require.NoError(t, diags.Error()) - - t0 := b.Config.Resources.Jobs["job"].Tasks[0] - assert.Equal(t, "/Workspace/remote/src/file1.py", t0.SparkPythonTask.PythonFile) - t1 := b.Config.Resources.Jobs["job"].Tasks[1] - assert.Equal(t, "/Workspace/remote/src/file1.py", t1.SparkPythonTask.PythonFile) -} - -func TestRelativePathTranslationOverride(t *testing.T) { - b, diags := initializeTarget(t, "./relative_path_translation", "override") - require.NoError(t, diags.Error()) - - t0 := b.Config.Resources.Jobs["job"].Tasks[0] - assert.Equal(t, "/Workspace/remote/src/file2.py", t0.SparkPythonTask.PythonFile) - t1 := b.Config.Resources.Jobs["job"].Tasks[1] - assert.Equal(t, "/Workspace/remote/src/file2.py", t1.SparkPythonTask.PythonFile) -} diff --git a/bundle/tests/relative_path_with_includes_test.go b/bundle/tests/relative_path_with_includes_test.go index 8efac0039..7249cac1f 100644 --- a/bundle/tests/relative_path_with_includes_test.go +++ b/bundle/tests/relative_path_with_includes_test.go @@ -17,8 +17,8 @@ func TestRelativePathsWithIncludes(t *testing.T) { diags := bundle.Apply(context.Background(), b, m) assert.NoError(t, diags.Error()) - assert.Equal(t, filepath.Join(b.SyncRootPath, "artifact_a"), b.Config.Artifacts["test_a"].Path) - assert.Equal(t, filepath.Join(b.SyncRootPath, "subfolder", "artifact_b"), b.Config.Artifacts["test_b"].Path) + assert.Equal(t, b.SyncRootPath+"/artifact_a", b.Config.Artifacts["test_a"].Path) + assert.Equal(t, b.SyncRootPath+"/subfolder/artifact_b", b.Config.Artifacts["test_b"].Path) assert.ElementsMatch( t, @@ -37,6 +37,6 @@ func TestRelativePathsWithIncludes(t *testing.T) { b.Config.Sync.Exclude, ) - assert.Equal(t, filepath.Join("dist", "job_a.whl"), b.Config.Resources.Jobs["job_a"].Tasks[0].Libraries[0].Whl) - assert.Equal(t, filepath.Join("subfolder", "dist", "job_b.whl"), b.Config.Resources.Jobs["job_b"].Tasks[0].Libraries[0].Whl) + assert.Equal(t, "dist/job_a.whl", b.Config.Resources.Jobs["job_a"].Tasks[0].Libraries[0].Whl) + assert.Equal(t, "subfolder/dist/job_b.whl", b.Config.Resources.Jobs["job_b"].Tasks[0].Libraries[0].Whl) } diff --git a/cmd/bundle/generate/app.go b/cmd/bundle/generate/app.go index 819b62b38..9dbd4fe46 100644 --- a/cmd/bundle/generate/app.go +++ b/cmd/bundle/generate/app.go @@ -36,8 +36,8 @@ func NewGenerateAppCommand() *cobra.Command { cmd.Flags().StringVar(&appName, "existing-app-name", "", `App name to generate config for`) cmd.MarkFlagRequired("existing-app-name") - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join("resources"), `Directory path where the output bundle config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join("src", "app"), `Directory path where the app files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Directory path where the output bundle config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src/app", `Directory path where the app files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/generate/dashboard.go b/cmd/bundle/generate/dashboard.go index fa3c91b2a..d56d246c2 100644 --- a/cmd/bundle/generate/dashboard.go +++ b/cmd/bundle/generate/dashboard.go @@ -441,8 +441,8 @@ func NewGenerateDashboardCommand() *cobra.Command { cmd.Flags().MarkHidden("existing-dashboard-id") // Output flags. - cmd.Flags().StringVarP(&d.resourceDir, "resource-dir", "d", "./resources", `directory to write the configuration to`) - cmd.Flags().StringVarP(&d.dashboardDir, "dashboard-dir", "s", "./src", `directory to write the dashboard representation to`) + cmd.Flags().StringVarP(&d.resourceDir, "resource-dir", "d", "resources", `directory to write the configuration to`) + cmd.Flags().StringVarP(&d.dashboardDir, "dashboard-dir", "s", "src", `directory to write the dashboard representation to`) cmd.Flags().BoolVarP(&d.force, "force", "f", false, `force overwrite existing files in the output directory`) // Exactly one of the lookup flags must be provided. diff --git a/cmd/bundle/generate/job.go b/cmd/bundle/generate/job.go index 827d270e5..d97891cd5 100644 --- a/cmd/bundle/generate/job.go +++ b/cmd/bundle/generate/job.go @@ -32,13 +32,8 @@ func NewGenerateJobCommand() *cobra.Command { cmd.Flags().Int64Var(&jobId, "existing-job-id", 0, `Job ID of the job to generate config for`) cmd.MarkFlagRequired("existing-job-id") - wd, err := os.Getwd() - if err != nil { - wd = "." - } - - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join(wd, "resources"), `Dir path where the output config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join(wd, "src"), `Dir path where the downloaded files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Dir path where the output config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src", `Dir path where the downloaded files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/generate/pipeline.go b/cmd/bundle/generate/pipeline.go index 863b0b2f7..1d2c345d6 100644 --- a/cmd/bundle/generate/pipeline.go +++ b/cmd/bundle/generate/pipeline.go @@ -32,13 +32,8 @@ func NewGeneratePipelineCommand() *cobra.Command { cmd.Flags().StringVar(&pipelineId, "existing-pipeline-id", "", `ID of the pipeline to generate config for`) cmd.MarkFlagRequired("existing-pipeline-id") - wd, err := os.Getwd() - if err != nil { - wd = "." - } - - cmd.Flags().StringVarP(&configDir, "config-dir", "d", filepath.Join(wd, "resources"), `Dir path where the output config will be stored`) - cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", filepath.Join(wd, "src"), `Dir path where the downloaded files will be stored`) + cmd.Flags().StringVarP(&configDir, "config-dir", "d", "resources", `Dir path where the output config will be stored`) + cmd.Flags().StringVarP(&sourceDir, "source-dir", "s", "src", `Dir path where the downloaded files will be stored`) cmd.Flags().BoolVarP(&force, "force", "f", false, `Force overwrite existing files in the output directory`) cmd.RunE = func(cmd *cobra.Command, args []string) error { diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 687c141ec..6b93fd1e5 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -59,6 +59,11 @@ var nativeTemplates = []nativeTemplate{ hidden: true, description: "The default PyDABs template", }, + { + name: "experimental-jobs-as-code", + hidden: true, + description: "Jobs as code template (experimental)", + }, { name: customTemplate, description: "Bring your own template", diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py index 6873257d5..a162da342 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py @@ -1 +1 @@ -print(f'setting up important infrastructure') +print(f"setting up important infrastructure") diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py index 769ee73ee..e5866d6ae 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py @@ -2,26 +2,34 @@ import os, sys, json payload = json.loads(sys.argv[1]) -if 'echo' == payload['command']: - json.dump({ - 'command': payload['command'], - 'flags': payload['flags'], - 'env': {k:v for k,v in os.environ.items()} - }, sys.stdout) +if "echo" == payload["command"]: + json.dump( + { + "command": payload["command"], + "flags": payload["flags"], + "env": {k: v for k, v in os.environ.items()}, + }, + sys.stdout, + ) sys.exit(0) -if 'table' == payload['command']: +if "table" == payload["command"]: sys.stderr.write("some intermediate info\n") - json.dump({'records': [ - {'key': 'First', 'value': 'Second'}, - {'key': 'Third', 'value': 'Fourth'}, - ]}, sys.stdout) + json.dump( + { + "records": [ + {"key": "First", "value": "Second"}, + {"key": "Third", "value": "Fourth"}, + ] + }, + sys.stdout, + ) sys.exit(0) -print(f'host is {os.environ["DATABRICKS_HOST"]}') +print(f"host is {os.environ['DATABRICKS_HOST']}") -print(f'[{payload["command"]}] command flags are {payload["flags"]}') +print(f"[{payload['command']}] command flags are {payload['flags']}") -answer = input('What is your name? ') +answer = input("What is your name? ") -print(f'Hello, {answer}!') +print(f"Hello, {answer}!") diff --git a/cmd/workspace/apps/overrides.go b/cmd/workspace/apps/overrides.go new file mode 100644 index 000000000..e14068717 --- /dev/null +++ b/cmd/workspace/apps/overrides.go @@ -0,0 +1,28 @@ +package apps + +import ( + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/apps" + "github.com/spf13/cobra" +) + +func listOverride(listCmd *cobra.Command, listReq *apps.ListAppsRequest) { + listCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` + {{header "Name"}} {{header "Url"}} {{header "ComputeStatus"}} {{header "DeploymentStatus"}}`) + listCmd.Annotations["template"] = cmdio.Heredoc(` + {{range .}}{{.Name | green}} {{.Url}} {{if .ComputeStatus}}{{if eq .ComputeStatus.State "ACTIVE"}}{{green "%s" .ComputeStatus.State }}{{else}}{{blue "%s" .ComputeStatus.State}}{{end}}{{end}} {{if .ActiveDeployment}}{{if eq .ActiveDeployment.Status.State "SUCCEEDED"}}{{green "%s" .ActiveDeployment.Status.State }}{{else}}{{blue "%s" .ActiveDeployment.Status.State}}{{end}}{{end}} + {{end}}`) +} + +func listDeploymentsOverride(listDeploymentsCmd *cobra.Command, listDeploymentsReq *apps.ListAppDeploymentsRequest) { + listDeploymentsCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` + {{header "DeploymentId"}} {{header "State"}} {{header "CreatedAt"}}`) + listDeploymentsCmd.Annotations["template"] = cmdio.Heredoc(` + {{range .}}{{.DeploymentId}} {{if eq .Status.State "SUCCEEDED"}}{{green "%s" .Status.State }}{{else}}{{blue "%s" .Status.State}}{{end}} {{.CreateTime}} + {{end}}`) +} + +func init() { + listOverrides = append(listOverrides, listOverride) + listDeploymentsOverrides = append(listDeploymentsOverrides, listDeploymentsOverride) +} diff --git a/integration/bundle/apps_test.go b/integration/bundle/apps_test.go index 2da764ed8..23cd784be 100644 --- a/integration/bundle/apps_test.go +++ b/integration/bundle/apps_test.go @@ -6,8 +6,10 @@ import ( "testing" "github.com/databricks/cli/integration/internal/acc" + "github.com/databricks/cli/internal/testcli" "github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/testdiff" "github.com/databricks/databricks-sdk-go/service/apps" "github.com/google/uuid" "github.com/stretchr/testify/require" @@ -49,7 +51,31 @@ func TestDeployBundleWithApp(t *testing.T) { } }) - deployBundle(t, ctx, root) + ctx, replacements := testdiff.WithReplacementsMap(ctx) + replacements.Set(uniqueId, "$UNIQUE_PRJ") + + user, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + require.NotNil(t, user) + testdiff.PrepareReplacementsUser(t, replacements, *user) + testdiff.PrepareReplacementsWorkspaceClient(t, replacements, wt.W) + testdiff.PrepareReplacementsUUID(t, replacements) + testdiff.PrepareReplacementsNumber(t, replacements) + testdiff.PrepareReplacementsTemporaryDirectory(t, replacements) + + testutil.Chdir(t, root) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "validate"}, + testutil.TestData("testdata/apps/bundle_validate.txt"), + ) + testcli.AssertOutput( + t, + ctx, + []string{"bundle", "deploy", "--force-lock", "--auto-approve"}, + testutil.TestData("testdata/apps/bundle_deploy.txt"), + ) // App should exists after bundle deployment app, err := wt.W.Apps.Get(ctx, apps.GetAppRequest{Name: appId}) diff --git a/integration/bundle/testdata/apps/bundle_deploy.txt b/integration/bundle/testdata/apps/bundle_deploy.txt new file mode 100644 index 000000000..b077f327d --- /dev/null +++ b/integration/bundle/testdata/apps/bundle_deploy.txt @@ -0,0 +1,5 @@ +Uploading bundle files to /Workspace/Users/$USERNAME/.bundle/$UNIQUE_PRJ/files... +Note: Databricks apps included in this bundle may increase initial deployment time due to compute provisioning. +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/integration/bundle/testdata/apps/bundle_validate.txt b/integration/bundle/testdata/apps/bundle_validate.txt new file mode 100644 index 000000000..dc9016a0f --- /dev/null +++ b/integration/bundle/testdata/apps/bundle_validate.txt @@ -0,0 +1,7 @@ +Name: basic +Target: default +Workspace: + User: $USERNAME + Path: /Workspace/Users/$USERNAME/.bundle/$UNIQUE_PRJ + +Validation OK! diff --git a/internal/testcli/runner.go b/internal/testcli/runner.go index d32fa3947..f462f44fc 100644 --- a/internal/testcli/runner.go +++ b/internal/testcli/runner.go @@ -39,6 +39,8 @@ type Runner struct { StderrLines <-chan string errch <-chan error + + Verbose bool } func consumeLines(ctx context.Context, wg *sync.WaitGroup, r io.Reader) <-chan string { @@ -139,7 +141,9 @@ func (r *Runner) RunBackground() { go func() { err := root.Execute(ctx, cli) if err != nil { - r.Logf("Error running command: %s", err) + if r.Verbose { + r.Logf("Error running command: %s", err) + } } // Close pipes to signal EOF. @@ -154,7 +158,9 @@ func (r *Runner) RunBackground() { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(r.stdout.Bytes())) for scanner.Scan() { - r.Logf("[databricks stdout]: %s", scanner.Text()) + if r.Verbose { + r.Logf("[databricks stdout]: %s", scanner.Text()) + } } } @@ -162,7 +168,9 @@ func (r *Runner) RunBackground() { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(r.stderr.Bytes())) for scanner.Scan() { - r.Logf("[databricks stderr]: %s", scanner.Text()) + if r.Verbose { + r.Logf("[databricks stderr]: %s", scanner.Text()) + } } } @@ -196,18 +204,24 @@ func (r *Runner) Run() (bytes.Buffer, bytes.Buffer, error) { cli.SetErr(&stderr) cli.SetArgs(r.args) - r.Logf(" args: %s", strings.Join(r.args, ", ")) + if r.Verbose { + r.Logf(" args: %s", strings.Join(r.args, ", ")) + } err := root.Execute(ctx, cli) if err != nil { - r.Logf(" error: %s", err) + if r.Verbose { + r.Logf(" error: %s", err) + } } if stdout.Len() > 0 { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(stdout.Bytes())) for scanner.Scan() { - r.Logf("stdout: %s", scanner.Text()) + if r.Verbose { + r.Logf("stdout: %s", scanner.Text()) + } } } @@ -215,7 +229,9 @@ func (r *Runner) Run() (bytes.Buffer, bytes.Buffer, error) { // Make a copy of the buffer such that it remains "unread". scanner := bufio.NewScanner(bytes.NewBuffer(stderr.Bytes())) for scanner.Scan() { - r.Logf("stderr: %s", scanner.Text()) + if r.Verbose { + r.Logf("stderr: %s", scanner.Text()) + } } } @@ -275,8 +291,9 @@ func NewRunner(t testutil.TestingT, ctx context.Context, args ...string) *Runner return &Runner{ TestingT: t, - ctx: ctx, - args: args, + ctx: ctx, + args: args, + Verbose: true, } } diff --git a/libs/notebook/testdata/.ruff.toml b/libs/notebook/testdata/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/libs/notebook/testdata/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/libs/sync/testdata/.ruff.toml b/libs/sync/testdata/.ruff.toml new file mode 100644 index 000000000..43f86042e --- /dev/null +++ b/libs/sync/testdata/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.ipynb"] diff --git a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json new file mode 100644 index 000000000..00d59af5f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json @@ -0,0 +1,28 @@ +{ + "welcome_message": "\nWelcome to (EXPERIMENTAL) \"Jobs as code\" template for Databricks Asset Bundles!", + "properties": { + "project_name": { + "type": "string", + "default": "jobs_as_code_project", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project", + "order": 1, + "pattern": "^[A-Za-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." + }, + "include_notebook": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", + "order": 2 + }, + "include_python": { + "type": "string", + "default": "yes", + "enum": ["yes", "no"], + "description": "Include a stub (sample) Python package in '{{.project_name}}/src'", + "order": 3 + } + }, + "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." +} diff --git a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl new file mode 100644 index 000000000..7d0c88e7d --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl @@ -0,0 +1,7 @@ +{{define "latest_lts_dbr_version" -}} + 15.4.x-scala2.12 +{{- end}} + +{{define "latest_lts_db_connect_version_spec" -}} + >=15.4,<15.5 +{{- end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl new file mode 100644 index 000000000..2f8e8ae3e --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl @@ -0,0 +1,30 @@ +# Preamble + +This file only template directives; it is skipped for the actual output. + +{{skip "__preamble"}} + +# TODO add DLT support, placeholder for now +{{$notDLT := true }} +{{$notNotebook := not (eq .include_notebook "yes")}} +{{$notPython := not (eq .include_python "yes")}} + +{{if $notPython}} + {{skip "{{.project_name}}/src/{{.project_name}}"}} + {{skip "{{.project_name}}/tests/main_test.py"}} +{{end}} + +{{if $notDLT}} + {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.py"}} +{{end}} + +{{if $notNotebook}} + {{skip "{{.project_name}}/src/notebook.ipynb"}} +{{end}} + +{{if (and $notDLT $notNotebook $notPython)}} + {{skip "{{.project_name}}/resources/{{.project_name}}_job.py"}} +{{else}} + {{skip "{{.project_name}}/resources/.gitkeep"}} +{{end}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore new file mode 100644 index 000000000..0dab7f499 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl new file mode 100644 index 000000000..497ce3723 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl @@ -0,0 +1,60 @@ +# {{.project_name}} + +The '{{.project_name}}' project was generated by using the "Jobs as code" template. + +## Prerequisites + +1. Install Databricks CLI 0.238 or later. + See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). + +2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). + We use uv to create a virtual environment and install the required dependencies. + +3. Authenticate to your Databricks workspace if you have not done so already: + ``` + $ databricks configure + ``` + +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. + {{- if (eq .include_python "yes") }} Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + {{- end}} + +5. For documentation on the Databricks Asset Bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. + +## Deploy and run jobs + +1. Create a new virtual environment and install the required dependencies: + ``` + $ uv sync + ``` + +2. To deploy the bundle to the development target: + ``` + $ databricks bundle deploy --target dev + ``` + + *(Note that "dev" is the default target, so the `--target` parameter is optional here.)* + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] {{.project_name}}_job` to your workspace. + You can find that job by opening your workspace and clicking on **Workflows**. + +3. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + + Note that the default job from the template has a schedule that runs every day + (defined in resources/{{.project_name}}_job.py). The schedule + is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( + https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). + +4. To run a job: + ``` + $ databricks bundle run + ``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 000000000..758ec3f16 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,51 @@ +# This is a Databricks asset bundle definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: {{.project_name}} + uuid: {{bundle_uuid}} + databricks_cli_version: ">= 0.238.0" + +experimental: + python: + # Activate virtual environment before loading resources defined in Python. + # If disabled, defaults to using the Python interpreter available in the current shell. + venv_path: .venv + # Functions called to load resources defined in Python. See resources/__init__.py + resources: + - "resources:load_resources" + +{{ if .include_python -}} +artifacts: + default: + type: whl + path: . + # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) + # to ensure that changes to wheel package are picked up when used on all-purpose clusters + build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build + +{{ end -}} +include: + - resources/*.yml + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + workspace: + host: {{workspace_host}} + + prod: + mode: production + workspace: + host: {{workspace_host}} + # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE + run_as: + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl new file mode 100644 index 000000000..ee9570302 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl @@ -0,0 +1,27 @@ +# Fixtures +{{- /* +We don't want to have too many README.md files, since they +stand out so much. But we do need to have a file here to make +sure the folder is added to Git. +*/}} + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else "/Workspace" + path + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl new file mode 100644 index 000000000..cee0d8946 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl @@ -0,0 +1,57 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "{{.project_name}}" +requires-python = ">=3.10" +description = "wheel file based on {{.project_name}}" + +# Dependencies in case the output wheel file is used as a library dependency. +# For defining dependencies, when this package is used in Databricks, see: +# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +# +# Example: +# dependencies = [ +# "requests==x.y.z", +# ] +dependencies = [ +] + +# see setup.py +dynamic = ["version"] + +{{ if .include_python -}} +[project.entry-points.packages] +main = "{{.project_name}}.main:main" + +{{ end -}} + +[tool.setuptools] +{{ if .include_python -}} +py-modules = ["resources", "{{.project_name}}"] + +{{ else }} +py-modules = ["resources"] + +{{ end -}} +[tool.uv] +## Dependencies for local development +dev-dependencies = [ + "databricks-bundles==0.7.0", + + ## Add code completion support for DLT + # "databricks-dlt", + + ## databricks-connect can be used to run parts of this project locally. + ## See https://docs.databricks.com/dev-tools/databricks-connect.html. + ## + ## Uncomment line below to install a version of db-connect that corresponds to + ## the Databricks Runtime version used for this project. + # "databricks-connect{{template "latest_lts_db_connect_version_spec"}}", +] + +override-dependencies = [ + # pyspark package conflicts with 'databricks-connect' + "pyspark; sys_platform == 'never'", +] diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py new file mode 100644 index 000000000..fbcb9dc5f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/__init__.py @@ -0,0 +1,16 @@ +from databricks.bundles.core import ( + Bundle, + Resources, + load_resources_from_current_package_module, +) + + +def load_resources(bundle: Bundle) -> Resources: + """ + 'load_resources' function is referenced in databricks.yml and is responsible for loading + bundle resources defined in Python code. This function is called by Databricks CLI during + bundle deployment. After deployment, this function is not used. + """ + + # the default implementation loads all Python files in 'resources' directory + return load_resources_from_current_package_module() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl new file mode 100644 index 000000000..7c7a0d33f --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl @@ -0,0 +1,108 @@ +{{$include_dlt := "no" -}} +from databricks.bundles.jobs import Job + +""" +The main job for {{.project_name}}. + +{{- /* Clarify what this job is for for DLT-only users. */}} +{{if and (eq $include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} +This job runs {{.project_name}}_pipeline on a schedule. +{{end -}} +""" + + +{{.project_name}}_job = Job.from_dict( + { + "name": "{{.project_name}}_job", + "trigger": { + # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger + "periodic": { + "interval": 1, + "unit": "DAYS", + }, + }, + {{- if not is_service_principal}} + "email_notifications": { + "on_failure": [ + "{{user_name}}", + ], + }, + {{else}} + {{- end -}} + "tasks": [ + {{- if eq .include_notebook "yes" -}} + {{- "\n " -}} + { + "task_key": "notebook_task", + "job_cluster_key": "job_cluster", + "notebook_task": { + "notebook_path": "src/notebook.ipynb", + }, + }, + {{- end -}} + {{- if (eq $include_dlt "yes") -}} + {{- "\n " -}} + { + "task_key": "refresh_pipeline", + {{- if (eq .include_notebook "yes" )}} + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + {{- end}} + "pipeline_task": { + {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}} + "pipeline_id": "${resources.pipelines.{{.project_name}}_pipeline.id}", + }, + }, + {{- end -}} + {{- if (eq .include_python "yes") -}} + {{- "\n " -}} + { + "task_key": "main_task", + {{- if (eq $include_dlt "yes") }} + "depends_on": [ + { + "task_key": "refresh_pipeline", + }, + ], + {{- else if (eq .include_notebook "yes" )}} + "depends_on": [ + { + "task_key": "notebook_task", + }, + ], + {{- end}} + "job_cluster_key": "job_cluster", + "python_wheel_task": { + "package_name": "{{.project_name}}", + "entry_point": "main", + }, + "libraries": [ + # By default we just include the .whl file generated for the {{.project_name}} package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. + { + "whl": "dist/*.whl", + }, + ], + }, + {{- end -}} + {{""}} + ], + "job_clusters": [ + { + "job_cluster_key": "job_cluster", + "new_cluster": { + "spark_version": "{{template "latest_lts_dbr_version"}}", + "node_type_id": "{{smallest_node_type}}", + "autoscale": { + "min_workers": 1, + "max_workers": 4, + }, + }, + }, + ], + } +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl new file mode 100644 index 000000000..c8579ae65 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl @@ -0,0 +1,24 @@ +from databricks.bundles.pipelines import Pipeline + +{{.project_name}}_pipeline = Pipeline.from_dict( + { + "name": "{{.project_name}}_pipeline", + "target": "{{.project_name}}_${bundle.target}", + {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + "catalog": "catalog_name", + {{- else}} + "catalog": "{{default_catalog}}", + {{- end}} + "libraries": [ + { + "notebook": { + "path": "src/dlt_pipeline.ipynb", + }, + }, + ], + "configuration": { + "bundle.sourcePath": "${workspace.file_path}/src", + }, + } +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md new file mode 100644 index 000000000..e6cfb81b4 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl new file mode 100644 index 000000000..19c9d0ebe --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl @@ -0,0 +1,18 @@ +""" +setup.py configuration script describing how to build and package this project. + +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the {{.project_name}} project. +""" + +import os + +from setuptools import setup + +local_version = os.getenv("LOCAL_VERSION") +version = "0.0.1" + +setup( + version=f"{version}+{local_version}" if local_version else version, +) diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl new file mode 100644 index 000000000..629106dbf --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "# Import DLT and src/{{.project_name}}\n", + "import dlt\n", + "import sys\n", + "\n", + "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n", + "from pyspark.sql.functions import expr\n", + "from {{.project_name}} import main" + {{else}} + "import dlt\n", + "from pyspark.sql.functions import expr\n", + "from pyspark.sql import SparkSession\n", + "\n", + "spark = SparkSession.builder.getOrCreate()" + {{end -}} + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis(spark)\n", + {{else}} + "@dlt.view\n", + "def taxi_raw():\n", + " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", + {{end -}} + "\n", + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl new file mode 100644 index 000000000..6782a053b --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -0,0 +1,79 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "ee353e42-ff58-4955-9608-12865bd0950e", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "from {{.project_name}} import main\n", + "\n", + "main.get_taxis(spark).show(10)" + {{else}} + "spark.range(10)" + {{end -}} + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/__init__.py.tmpl new file mode 100644 index 000000000..e69de29bb diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl new file mode 100644 index 000000000..5ae344c7e --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession, DataFrame + + +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +def get_spark() -> SparkSession: + try: + from databricks.connect import DatabricksSession + + return DatabricksSession.builder.getOrCreate() + except ImportError: + return SparkSession.builder.getOrCreate() + + +def main(): + get_taxis(get_spark()).show(5) + + +if __name__ == "__main__": + main() diff --git a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl new file mode 100644 index 000000000..6f89fca53 --- /dev/null +++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl @@ -0,0 +1,8 @@ +from {{.project_name}}.main import get_taxis, get_spark + +# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml + + +def test_main(): + taxis = get_taxis(get_spark()) + assert taxis.count() > 5 diff --git a/libs/testdiff/testdiff.go b/libs/testdiff/testdiff.go index fef1d5ae2..f65adf7f7 100644 --- a/libs/testdiff/testdiff.go +++ b/libs/testdiff/testdiff.go @@ -17,18 +17,20 @@ func UnifiedDiff(filename1, filename2, s1, s2 string) string { return fmt.Sprint(gotextdiff.ToUnified(filename1, filename2, s1, edits)) } -func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out string) { +func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out string) bool { t.Helper() if len(out) < 1000 && len(expected) < 1000 { // This shows full strings + diff which could be useful when debugging newlines - assert.Equal(t, expected, out, "%s vs %s", filename1, filename2) + return assert.Equal(t, expected, out, "%s vs %s", filename1, filename2) } else { // only show diff for large texts diff := UnifiedDiff(filename1, filename2, expected, out) if diff != "" { - t.Errorf("Diff:\n" + diff) + t.Error("Diff:\n" + diff) + return false } } + return true } func AssertEqualJQ(t testutil.TestingT, expectedName, outName, expected, out string, ignorePaths []string) {