Merge branch 'main' of github.com:databricks/cli into feat/config-reference-doc-autogen

This commit is contained in:
Ilya Kuznetsov 2025-01-20 12:26:20 +01:00
commit f04a841e15
No known key found for this signature in database
GPG Key ID: 91F3DDCF5D21CDDF
312 changed files with 5582 additions and 1170 deletions

View File

@ -1 +1 @@
a6a317df8327c9b1e5cb59a03a42ffa2aabeef6d 779817ed8d63031f5ea761fbd25ee84f38feec0d

View File

@ -140,9 +140,9 @@ func new{{.PascalName}}() *cobra.Command {
{{- end}} {{- end}}
{{$method := .}} {{$method := .}}
{{ if not .IsJsonOnly }} {{ if not .IsJsonOnly }}
{{range $request.Fields -}} {{range .AllFields -}}
{{- if not .Required -}} {{- if not .Required -}}
{{if .Entity.IsObject }}// TODO: complex arg: {{.Name}} {{if .Entity.IsObject}}{{if not (eq . $method.RequestBodyField) }}// TODO: complex arg: {{.Name}}{{end}}
{{else if .Entity.IsAny }}// TODO: any: {{.Name}} {{else if .Entity.IsAny }}// TODO: any: {{.Name}}
{{else if .Entity.ArrayValue }}// TODO: array: {{.Name}} {{else if .Entity.ArrayValue }}// TODO: array: {{.Name}}
{{else if .Entity.MapValue }}// TODO: map via StringToStringVar: {{.Name}} {{else if .Entity.MapValue }}// TODO: map via StringToStringVar: {{.Name}}

View File

@ -60,6 +60,12 @@ jobs:
- name: Install uv - name: Install uv
uses: astral-sh/setup-uv@887a942a15af3a7626099df99e897a18d9e5ab3a # v5.1.0 uses: astral-sh/setup-uv@887a942a15af3a7626099df99e897a18d9e5ab3a # v5.1.0
- name: Run ruff
uses: astral-sh/ruff-action@31a518504640beb4897d0b9f9e50a2a9196e75ba # v3.0.1
with:
version: "0.9.1"
args: "format --check"
- name: Set go env - name: Set go env
run: | run: |
echo "GOPATH=$(go env GOPATH)" >> $GITHUB_ENV echo "GOPATH=$(go env GOPATH)" >> $GITHUB_ENV

2
.gitignore vendored
View File

@ -20,6 +20,7 @@ dist/
*.log *.log
coverage.txt coverage.txt
coverage-acceptance.txt
__pycache__ __pycache__
*.pyc *.pyc
@ -31,3 +32,4 @@ __pycache__
.vscode/tasks.json .vscode/tasks.json
.databricks .databricks
.ruff_cache

View File

@ -15,12 +15,20 @@ linters:
- intrange - intrange
- mirror - mirror
- perfsprint - perfsprint
- unconvert
linters-settings: linters-settings:
govet: govet:
enable-all: true enable-all: true
disable: disable:
- fieldalignment - fieldalignment
- shadow - shadow
settings:
printf:
funcs:
- (github.com/databricks/cli/internal/testutil.TestingT).Infof
- (github.com/databricks/cli/internal/testutil.TestingT).Errorf
- (github.com/databricks/cli/internal/testutil.TestingT).Fatalf
- (github.com/databricks/cli/internal/testutil.TestingT).Skipf
gofmt: gofmt:
rewrite-rules: rewrite-rules:
- pattern: 'a[b:len(a)]' - pattern: 'a[b:len(a)]'
@ -41,6 +49,8 @@ linters-settings:
disable: disable:
# good check, but we have too many assert.(No)?Errorf? so excluding for now # good check, but we have too many assert.(No)?Errorf? so excluding for now
- require-error - require-error
copyloopvar:
check-alias: true
issues: issues:
exclude-dirs-use-default: false # recommended by docs https://golangci-lint.run/usage/false-positives/ exclude-dirs-use-default: false # recommended by docs https://golangci-lint.run/usage/false-positives/
max-issues-per-linter: 1000 max-issues-per-linter: 1000

View File

@ -1,5 +1,49 @@
# Version changelog # Version changelog
## [Release] Release v0.239.0
### New feature announcement
#### Databricks Apps support
You can now manage Databricks Apps using DABs by defining an `app` resource in your bundle configuration.
For more information see Databricks documentation https://docs.databricks.com/en/dev-tools/bundles/resources.html#app
#### Referencing complex variables in complex variables
You can now reference complex variables within other complex variables.
For more details see https://github.com/databricks/cli/pull/2157
CLI:
* Filter out system clusters in cluster picker ([#2131](https://github.com/databricks/cli/pull/2131)).
* Add command line flags for fields that are not in the API request body ([#2155](https://github.com/databricks/cli/pull/2155)).
Bundles:
* Added support for Databricks Apps in DABs ([#1928](https://github.com/databricks/cli/pull/1928)).
* Allow artifact path to be located outside the sync root ([#2128](https://github.com/databricks/cli/pull/2128)).
* Retry app deployment if there is an active deployment in progress ([#2153](https://github.com/databricks/cli/pull/2153)).
* Resolve variables in a loop ([#2164](https://github.com/databricks/cli/pull/2164)).
* Improve resolution of complex variables within complex variables ([#2157](https://github.com/databricks/cli/pull/2157)).
* Added output message to warn about slower deployments with apps ([#2161](https://github.com/databricks/cli/pull/2161)).
* Patch references to UC schemas to capture dependencies automatically ([#1989](https://github.com/databricks/cli/pull/1989)).
* Format default-python template ([#2110](https://github.com/databricks/cli/pull/2110)).
* Encourage the use of root_path in production to ensure single deployment ([#1712](https://github.com/databricks/cli/pull/1712)).
* Log warnings to stderr for "bundle validate -o json" ([#2109](https://github.com/databricks/cli/pull/2109)).
API Changes:
* Changed `databricks account federation-policy update` command with new required argument order.
* Changed `databricks account service-principal-federation-policy update` command with new required argument order.
OpenAPI commit 779817ed8d63031f5ea761fbd25ee84f38feec0d (2025-01-08)
Dependency updates:
* Upgrade TF provider to 1.63.0 ([#2162](https://github.com/databricks/cli/pull/2162)).
* Bump golangci-lint version to v1.63.4 from v1.63.1 ([#2114](https://github.com/databricks/cli/pull/2114)).
* Bump astral-sh/setup-uv from 4 to 5 ([#2116](https://github.com/databricks/cli/pull/2116)).
* Bump golang.org/x/oauth2 from 0.24.0 to 0.25.0 ([#2080](https://github.com/databricks/cli/pull/2080)).
* Bump github.com/hashicorp/hc-install from 0.9.0 to 0.9.1 ([#2079](https://github.com/databricks/cli/pull/2079)).
* Bump golang.org/x/term from 0.27.0 to 0.28.0 ([#2078](https://github.com/databricks/cli/pull/2078)).
* Bump github.com/databricks/databricks-sdk-go from 0.54.0 to 0.55.0 ([#2126](https://github.com/databricks/cli/pull/2126)).
## [Release] Release v0.238.0 ## [Release] Release v0.238.0
Bundles: Bundles:

View File

@ -1,4 +1,4 @@
default: build default: vendor fmt lint
PACKAGES=./acceptance/... ./libs/... ./internal/... ./cmd/... ./bundle/... . PACKAGES=./acceptance/... ./libs/... ./internal/... ./cmd/... ./bundle/... .
@ -14,6 +14,7 @@ lintcheck:
# formatting/goimports will not be applied by 'make lint'. However, it will be applied by 'make fmt'. # formatting/goimports will not be applied by 'make lint'. However, it will be applied by 'make fmt'.
# If you need to ensure that formatting & imports are always fixed, do "make fmt lint" # If you need to ensure that formatting & imports are always fixed, do "make fmt lint"
fmt: fmt:
ruff format -q
golangci-lint run --enable-only="gofmt,gofumpt,goimports" --fix ./... golangci-lint run --enable-only="gofmt,gofumpt,goimports" --fix ./...
test: test:
@ -25,6 +26,17 @@ cover:
showcover: showcover:
go tool cover -html=coverage.txt go tool cover -html=coverage.txt
acc-cover:
rm -fr ./acceptance/build/cover/
CLI_GOCOVERDIR=build/cover go test ./acceptance
rm -fr ./acceptance/build/cover-merged/
mkdir -p acceptance/build/cover-merged/
go tool covdata merge -i $$(printf '%s,' acceptance/build/cover/* | sed 's/,$$//') -o acceptance/build/cover-merged/
go tool covdata textfmt -i acceptance/build/cover-merged -o coverage-acceptance.txt
acc-showcover:
go tool cover -html=coverage-acceptance.txt
build: vendor build: vendor
go build -mod vendor go build -mod vendor
@ -48,4 +60,4 @@ integration:
integration-short: integration-short:
$(INTEGRATION) -short $(INTEGRATION) -short
.PHONY: lint lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short docs .PHONY: lint lintcheck fmt test cover showcover build snapshot vendor schema integration integration-short acc-cover acc-showcover docs

View File

@ -1,12 +1,14 @@
package acceptance_test package acceptance_test
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"io" "io"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"regexp"
"runtime" "runtime"
"slices" "slices"
"sort" "sort"
@ -17,6 +19,7 @@ import (
"github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/testdiff" "github.com/databricks/cli/libs/testdiff"
"github.com/databricks/databricks-sdk-go"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -38,32 +41,65 @@ func TestAccept(t *testing.T) {
cwd, err := os.Getwd() cwd, err := os.Getwd()
require.NoError(t, err) require.NoError(t, err)
execPath := BuildCLI(t, cwd) coverDir := os.Getenv("CLI_GOCOVERDIR")
if coverDir != "" {
require.NoError(t, os.MkdirAll(coverDir, os.ModePerm))
coverDir, err = filepath.Abs(coverDir)
require.NoError(t, err)
t.Logf("Writing coverage to %s", coverDir)
}
execPath := BuildCLI(t, cwd, coverDir)
// $CLI is what test scripts are using // $CLI is what test scripts are using
t.Setenv("CLI", execPath) t.Setenv("CLI", execPath)
// Make helper scripts available // Make helper scripts available
t.Setenv("PATH", fmt.Sprintf("%s%c%s", filepath.Join(cwd, "bin"), os.PathListSeparator, os.Getenv("PATH"))) t.Setenv("PATH", fmt.Sprintf("%s%c%s", filepath.Join(cwd, "bin"), os.PathListSeparator, os.Getenv("PATH")))
server := StartServer(t)
AddHandlers(server)
// Redirect API access to local server:
t.Setenv("DATABRICKS_HOST", fmt.Sprintf("http://127.0.0.1:%d", server.Port))
t.Setenv("DATABRICKS_TOKEN", "dapi1234")
homeDir := t.TempDir()
// Do not read user's ~/.databrickscfg
t.Setenv(env.HomeEnvVar(), homeDir)
repls := testdiff.ReplacementsContext{} repls := testdiff.ReplacementsContext{}
repls.Set(execPath, "$CLI") repls.Set(execPath, "$CLI")
tempHomeDir := t.TempDir()
repls.Set(tempHomeDir, "$TMPHOME")
t.Logf("$TMPHOME=%v", tempHomeDir)
// Prevent CLI from downloading terraform in each test:
t.Setenv("DATABRICKS_TF_EXEC_PATH", tempHomeDir)
ctx := context.Background()
cloudEnv := os.Getenv("CLOUD_ENV")
if cloudEnv == "" {
server := StartServer(t)
AddHandlers(server)
// Redirect API access to local server:
t.Setenv("DATABRICKS_HOST", server.URL)
t.Setenv("DATABRICKS_TOKEN", "dapi1234")
homeDir := t.TempDir()
// Do not read user's ~/.databrickscfg
t.Setenv(env.HomeEnvVar(), homeDir)
}
workspaceClient, err := databricks.NewWorkspaceClient()
require.NoError(t, err)
user, err := workspaceClient.CurrentUser.Me(ctx)
require.NoError(t, err)
require.NotNil(t, user)
testdiff.PrepareReplacementsUser(t, &repls, *user)
testdiff.PrepareReplacementsWorkspaceClient(t, &repls, workspaceClient)
testdiff.PrepareReplacementsUUID(t, &repls)
testDirs := getTests(t) testDirs := getTests(t)
require.NotEmpty(t, testDirs) require.NotEmpty(t, testDirs)
for _, dir := range testDirs { for _, dir := range testDirs {
t.Run(dir, func(t *testing.T) { testName := strings.ReplaceAll(dir, "\\", "/")
t.Run(testName, func(t *testing.T) {
t.Parallel() t.Parallel()
runTest(t, dir, repls) runTest(t, dir, coverDir, repls)
}) })
} }
} }
@ -88,7 +124,7 @@ func getTests(t *testing.T) []string {
return testDirs return testDirs
} }
func runTest(t *testing.T, dir string, repls testdiff.ReplacementsContext) { func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsContext) {
var tmpDir string var tmpDir string
var err error var err error
if KeepTmp { if KeepTmp {
@ -111,70 +147,95 @@ func runTest(t *testing.T, dir string, repls testdiff.ReplacementsContext) {
args := []string{"bash", "-euo", "pipefail", EntryPointScript} args := []string{"bash", "-euo", "pipefail", EntryPointScript}
cmd := exec.Command(args[0], args[1:]...) cmd := exec.Command(args[0], args[1:]...)
if coverDir != "" {
// Creating individual coverage directory for each test, because writing to the same one
// results in sporadic failures like this one (only if tests are running in parallel):
// +error: coverage meta-data emit failed: writing ... rename .../tmp.covmeta.b3f... .../covmeta.b3f2c...: no such file or directory
coverDir = filepath.Join(coverDir, strings.ReplaceAll(dir, string(os.PathSeparator), "--"))
err := os.MkdirAll(coverDir, os.ModePerm)
require.NoError(t, err)
cmd.Env = append(os.Environ(), "GOCOVERDIR="+coverDir)
}
// Write combined output to a file
out, err := os.Create(filepath.Join(tmpDir, "output.txt"))
require.NoError(t, err)
cmd.Stdout = out
cmd.Stderr = out
cmd.Dir = tmpDir cmd.Dir = tmpDir
outB, err := cmd.CombinedOutput() err = cmd.Run()
out := formatOutput(string(outB), err) // Include exit code in output (if non-zero)
out = repls.Replace(out) formatOutput(out, err)
doComparison(t, filepath.Join(dir, "output.txt"), "script output", out) require.NoError(t, out.Close())
for key := range outputs { // Compare expected outputs
if key == "output.txt" { for relPath := range outputs {
// handled above doComparison(t, repls, dir, tmpDir, relPath)
continue
}
pathNew := filepath.Join(tmpDir, key)
newValBytes, err := os.ReadFile(pathNew)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
t.Errorf("%s: expected to find this file but could not (%s)", key, tmpDir)
} else {
t.Errorf("%s: could not read: %s", key, err)
}
continue
}
pathExpected := filepath.Join(dir, key)
newVal := repls.Replace(string(newValBytes))
doComparison(t, pathExpected, pathNew, newVal)
} }
// Make sure there are not unaccounted for new files // Make sure there are not unaccounted for new files
files, err := os.ReadDir(tmpDir) files, err := ListDir(t, tmpDir)
require.NoError(t, err) require.NoError(t, err)
for _, relPath := range files {
for _, f := range files { if _, ok := inputs[relPath]; ok {
name := f.Name()
if _, ok := inputs[name]; ok {
continue continue
} }
if _, ok := outputs[name]; ok { if _, ok := outputs[relPath]; ok {
continue continue
} }
t.Errorf("Unexpected output: %s", f) if strings.HasPrefix(relPath, "out") {
if strings.HasPrefix(name, "out") {
// We have a new file starting with "out" // We have a new file starting with "out"
// Show the contents & support overwrite mode for it: // Show the contents & support overwrite mode for it:
pathNew := filepath.Join(tmpDir, name) doComparison(t, repls, dir, tmpDir, relPath)
newVal := testutil.ReadFile(t, pathNew)
newVal = repls.Replace(newVal)
doComparison(t, filepath.Join(dir, name), filepath.Join(tmpDir, name), newVal)
} }
} }
} }
func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) { func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirNew, relPath string) {
valueNew = testdiff.NormalizeNewlines(valueNew) pathRef := filepath.Join(dirRef, relPath)
valueExpected := string(readIfExists(t, pathExpected)) pathNew := filepath.Join(dirNew, relPath)
valueExpected = testdiff.NormalizeNewlines(valueExpected) bufRef, okRef := readIfExists(t, pathRef)
testdiff.AssertEqualTexts(t, pathExpected, pathNew, valueExpected, valueNew) bufNew, okNew := readIfExists(t, pathNew)
if testdiff.OverwriteMode { if !okRef && !okNew {
if valueNew != "" { t.Errorf("Both files are missing: %s, %s", pathRef, pathNew)
t.Logf("Overwriting: %s", pathExpected) return
testutil.WriteFile(t, pathExpected, valueNew) }
} else {
t.Logf("Removing: %s", pathExpected) valueRef := testdiff.NormalizeNewlines(string(bufRef))
_ = os.Remove(pathExpected) valueNew := testdiff.NormalizeNewlines(string(bufNew))
// Apply replacements to the new value only.
// The reference value is stored after applying replacements.
valueNew = repls.Replace(valueNew)
// The test did not produce an expected output file.
if okRef && !okNew {
t.Errorf("Missing output file: %s", relPath)
testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew)
if testdiff.OverwriteMode {
t.Logf("Removing output file: %s", relPath)
require.NoError(t, os.Remove(pathRef))
} }
return
}
// The test produced an unexpected output file.
if !okRef && okNew {
t.Errorf("Unexpected output file: %s", relPath)
testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew)
if testdiff.OverwriteMode {
t.Logf("Writing output file: %s", relPath)
testutil.WriteFile(t, pathRef, valueNew)
}
return
}
// Compare the reference and new values.
equal := testdiff.AssertEqualTexts(t, pathRef, pathNew, valueRef, valueNew)
if !equal && testdiff.OverwriteMode {
t.Logf("Overwriting existing output file: %s", relPath)
testutil.WriteFile(t, pathRef, valueNew)
} }
} }
@ -182,17 +243,22 @@ func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) {
// Note, cleanups are not executed if main script fails; that's not a huge issue, since it runs it temp dir. // Note, cleanups are not executed if main script fails; that's not a huge issue, since it runs it temp dir.
func readMergedScriptContents(t *testing.T, dir string) string { func readMergedScriptContents(t *testing.T, dir string) string {
scriptContents := testutil.ReadFile(t, filepath.Join(dir, EntryPointScript)) scriptContents := testutil.ReadFile(t, filepath.Join(dir, EntryPointScript))
// Wrap script contents in a subshell such that changing the working
// directory only affects the main script and not cleanup.
scriptContents = "(\n" + scriptContents + ")\n"
prepares := []string{} prepares := []string{}
cleanups := []string{} cleanups := []string{}
for { for {
x := readIfExists(t, filepath.Join(dir, CleanupScript)) x, ok := readIfExists(t, filepath.Join(dir, CleanupScript))
if len(x) > 0 { if ok {
cleanups = append(cleanups, string(x)) cleanups = append(cleanups, string(x))
} }
x = readIfExists(t, filepath.Join(dir, PrepareScript)) x, ok = readIfExists(t, filepath.Join(dir, PrepareScript))
if len(x) > 0 { if ok {
prepares = append(prepares, string(x)) prepares = append(prepares, string(x))
} }
@ -210,14 +276,30 @@ func readMergedScriptContents(t *testing.T, dir string) string {
return strings.Join(prepares, "\n") return strings.Join(prepares, "\n")
} }
func BuildCLI(t *testing.T, cwd string) string { func BuildCLI(t *testing.T, cwd, coverDir string) string {
execPath := filepath.Join(cwd, "build", "databricks") execPath := filepath.Join(cwd, "build", "databricks")
if runtime.GOOS == "windows" { if runtime.GOOS == "windows" {
execPath += ".exe" execPath += ".exe"
} }
start := time.Now() start := time.Now()
args := []string{"go", "build", "-mod", "vendor", "-o", execPath} args := []string{
"go", "build",
"-mod", "vendor",
"-o", execPath,
}
if coverDir != "" {
args = append(args, "-cover")
}
if runtime.GOOS == "windows" {
// Get this error on my local Windows:
// error obtaining VCS status: exit status 128
// Use -buildvcs=false to disable VCS stamping.
args = append(args, "-buildvcs=false")
}
cmd := exec.Command(args[0], args[1:]...) cmd := exec.Command(args[0], args[1:]...)
cmd.Dir = ".." cmd.Dir = ".."
out, err := cmd.CombinedOutput() out, err := cmd.CombinedOutput()
@ -252,29 +334,28 @@ func copyFile(src, dst string) error {
return err return err
} }
func formatOutput(out string, err error) string { func formatOutput(w io.Writer, err error) {
if err == nil { if err == nil {
return out return
} }
if exiterr, ok := err.(*exec.ExitError); ok { if exiterr, ok := err.(*exec.ExitError); ok {
exitCode := exiterr.ExitCode() exitCode := exiterr.ExitCode()
out += fmt.Sprintf("\nExit code: %d\n", exitCode) fmt.Fprintf(w, "\nExit code: %d\n", exitCode)
} else { } else {
out += fmt.Sprintf("\nError: %s\n", err) fmt.Fprintf(w, "\nError: %s\n", err)
} }
return out
} }
func readIfExists(t *testing.T, path string) []byte { func readIfExists(t *testing.T, path string) ([]byte, bool) {
data, err := os.ReadFile(path) data, err := os.ReadFile(path)
if err == nil { if err == nil {
return data return data, true
} }
if !errors.Is(err, os.ErrNotExist) { if !errors.Is(err, os.ErrNotExist) {
t.Fatalf("%s: %s", path, err) t.Fatalf("%s: %s", path, err)
} }
return []byte{} return []byte{}, false
} }
func CopyDir(src, dst string, inputs, outputs map[string]bool) error { func CopyDir(src, dst string, inputs, outputs map[string]bool) error {
@ -289,8 +370,10 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error {
return err return err
} }
if strings.HasPrefix(name, "out") { if strings.HasPrefix(relPath, "out") {
outputs[relPath] = true if !info.IsDir() {
outputs[relPath] = true
}
return nil return nil
} else { } else {
inputs[relPath] = true inputs[relPath] = true
@ -309,3 +392,47 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error {
return copyFile(path, destPath) return copyFile(path, destPath)
}) })
} }
func ListDir(t *testing.T, src string) ([]string, error) {
// exclude folders in .gitignore from comparison
ignored := []string{
"\\.ruff_cache",
"\\.venv",
".*\\.egg-info",
"__pycache__",
// depends on uv version
"uv.lock",
}
var files []string
err := filepath.Walk(src, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
for _, ignoredFolder := range ignored {
if matched, _ := regexp.MatchString(ignoredFolder, info.Name()); matched {
return filepath.SkipDir
}
}
return nil
} else {
for _, ignoredFolder := range ignored {
if matched, _ := regexp.MatchString(ignoredFolder, info.Name()); matched {
return nil
}
}
}
relPath, err := filepath.Rel(src, path)
if err != nil {
return err
}
files = append(files, relPath)
return nil
})
return files, err
}

View File

@ -4,6 +4,7 @@ Helper to sort blocks in text file. A block is a set of lines separated from oth
This is to workaround non-determinism in the output. This is to workaround non-determinism in the output.
""" """
import sys import sys
blocks = [] blocks = []
@ -11,10 +12,10 @@ blocks = []
for line in sys.stdin: for line in sys.stdin:
if not line.strip(): if not line.strip():
if blocks and blocks[-1]: if blocks and blocks[-1]:
blocks.append('') blocks.append("")
continue continue
if not blocks: if not blocks:
blocks.append('') blocks.append("")
blocks[-1] += line blocks[-1] += line
blocks.sort() blocks.sort()

View File

@ -0,0 +1,21 @@
>>> $CLI bundle deploy --help
Deploy bundle
Usage:
databricks bundle deploy [flags]
Flags:
--auto-approve Skip interactive approvals that might be required for deployment.
-c, --cluster-id string Override cluster in the deployment with the given cluster ID.
--fail-on-active-runs Fail if there are running jobs or pipelines in the deployment.
--force Force-override Git branch validation.
--force-lock Force acquisition of deployment lock.
-h, --help help for deploy
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle deploy --help

View File

@ -0,0 +1,22 @@
>>> $CLI bundle deployment --help
Deployment related commands
Usage:
databricks bundle deployment [command]
Available Commands:
bind Bind bundle-defined resources to existing resources
unbind Unbind bundle-defined resources from its managed remote resource
Flags:
-h, --help help for deployment
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"
Use "databricks bundle deployment [command] --help" for more information about a command.

View File

@ -0,0 +1 @@
trace $CLI bundle deployment --help

View File

@ -0,0 +1,18 @@
>>> $CLI bundle destroy --help
Destroy deployed bundle resources
Usage:
databricks bundle destroy [flags]
Flags:
--auto-approve Skip interactive approvals for deleting resources and files
--force-lock Force acquisition of deployment lock.
-h, --help help for destroy
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle destroy --help

View File

@ -0,0 +1,24 @@
>>> $CLI bundle generate dashboard --help
Generate configuration for a dashboard
Usage:
databricks bundle generate dashboard [flags]
Flags:
-s, --dashboard-dir string directory to write the dashboard representation to (default "src")
--existing-id string ID of the dashboard to generate configuration for
--existing-path string workspace path of the dashboard to generate configuration for
-f, --force force overwrite existing files in the output directory
-h, --help help for dashboard
--resource string resource key of dashboard to watch for changes
-d, --resource-dir string directory to write the configuration to (default "resources")
--watch watch for changes to the dashboard and update the configuration
Global Flags:
--debug enable debug logging
--key string resource key to use for the generated configuration
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle generate dashboard --help

View File

@ -0,0 +1,21 @@
>>> $CLI bundle generate job --help
Generate bundle configuration for a job
Usage:
databricks bundle generate job [flags]
Flags:
-d, --config-dir string Dir path where the output config will be stored (default "resources")
--existing-job-id int Job ID of the job to generate config for
-f, --force Force overwrite existing files in the output directory
-h, --help help for job
-s, --source-dir string Dir path where the downloaded files will be stored (default "src")
Global Flags:
--debug enable debug logging
--key string resource key to use for the generated configuration
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle generate job --help

View File

@ -0,0 +1,21 @@
>>> $CLI bundle generate pipeline --help
Generate bundle configuration for a pipeline
Usage:
databricks bundle generate pipeline [flags]
Flags:
-d, --config-dir string Dir path where the output config will be stored (default "resources")
--existing-pipeline-id string ID of the pipeline to generate config for
-f, --force Force overwrite existing files in the output directory
-h, --help help for pipeline
-s, --source-dir string Dir path where the downloaded files will be stored (default "src")
Global Flags:
--debug enable debug logging
--key string resource key to use for the generated configuration
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle generate pipeline --help

View File

@ -0,0 +1,25 @@
>>> $CLI bundle generate --help
Generate bundle configuration
Usage:
databricks bundle generate [command]
Available Commands:
app Generate bundle configuration for a Databricks app
dashboard Generate configuration for a dashboard
job Generate bundle configuration for a job
pipeline Generate bundle configuration for a pipeline
Flags:
-h, --help help for generate
--key string resource key to use for the generated configuration
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"
Use "databricks bundle generate [command] --help" for more information about a command.

View File

@ -0,0 +1 @@
trace $CLI bundle generate --help

View File

@ -0,0 +1,31 @@
>>> $CLI bundle init --help
Initialize using a bundle template.
TEMPLATE_PATH optionally specifies which template to use. It can be one of the following:
- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows
- default-sql: The default SQL template for .sql files that run with Databricks SQL
- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)
- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)
- a local file system path with a template directory
- a Git repository URL, e.g. https://github.com/my/repository
See https://docs.databricks.com/en/dev-tools/bundles/templates.html for more information on templates.
Usage:
databricks bundle init [TEMPLATE_PATH] [flags]
Flags:
--branch string Git branch to use for template initialization
--config-file string JSON file containing key value pairs of input parameters required for template initialization.
-h, --help help for init
--output-dir string Directory to write the initialized template to.
--tag string Git tag to use for template initialization
--template-dir string Directory path within a Git repository containing the template.
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle init --help

View File

@ -0,0 +1,17 @@
>>> $CLI bundle open --help
Open a resource in the browser
Usage:
databricks bundle open [flags]
Flags:
--force-pull Skip local cache and load the state from the remote workspace
-h, --help help for open
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle open --help

View File

@ -0,0 +1,57 @@
>>> $CLI bundle run --help
Run the job or pipeline identified by KEY.
The KEY is the unique identifier of the resource to run. In addition to
customizing the run using any of the available flags, you can also specify
keyword or positional arguments as shown in these examples:
databricks bundle run my_job -- --key1 value1 --key2 value2
Or:
databricks bundle run my_job -- value1 value2 value3
If the specified job uses job parameters or the job has a notebook task with
parameters, the first example applies and flag names are mapped to the
parameter names.
If the specified job does not use job parameters and the job has a Python file
task or a Python wheel task, the second example applies.
Usage:
databricks bundle run [flags] KEY
Job Flags:
--params stringToString comma separated k=v pairs for job parameters (default [])
Job Task Flags:
Note: please prefer use of job-level parameters (--param) over task-level parameters.
For more information, see https://docs.databricks.com/en/workflows/jobs/create-run-jobs.html#pass-parameters-to-a-databricks-job-task
--dbt-commands strings A list of commands to execute for jobs with DBT tasks.
--jar-params strings A list of parameters for jobs with Spark JAR tasks.
--notebook-params stringToString A map from keys to values for jobs with notebook tasks. (default [])
--pipeline-params stringToString A map from keys to values for jobs with pipeline tasks. (default [])
--python-named-params stringToString A map from keys to values for jobs with Python wheel tasks. (default [])
--python-params strings A list of parameters for jobs with Python tasks.
--spark-submit-params strings A list of parameters for jobs with Spark submit tasks.
--sql-params stringToString A map from keys to values for jobs with SQL tasks. (default [])
Pipeline Flags:
--full-refresh strings List of tables to reset and recompute.
--full-refresh-all Perform a full graph reset and recompute.
--refresh strings List of tables to update.
--refresh-all Perform a full graph update.
--validate-only Perform an update to validate graph correctness.
Flags:
-h, --help help for run
--no-wait Don't wait for the run to complete.
--restart Restart the run if it is already running.
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle run --help

View File

@ -0,0 +1,16 @@
>>> $CLI bundle schema --help
Generate JSON Schema for bundle configuration
Usage:
databricks bundle schema [flags]
Flags:
-h, --help help for schema
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle schema --help

View File

@ -0,0 +1,17 @@
>>> $CLI bundle summary --help
Summarize resources deployed by this bundle
Usage:
databricks bundle summary [flags]
Flags:
--force-pull Skip local cache and load the state from the remote workspace
-h, --help help for summary
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle summary --help

View File

@ -0,0 +1,19 @@
>>> $CLI bundle sync --help
Synchronize bundle tree to the workspace
Usage:
databricks bundle sync [flags]
Flags:
--full perform full synchronization (default is incremental)
-h, --help help for sync
--interval duration file system polling interval (for --watch) (default 1s)
--output type type of the output format
--watch watch local file system for changes
Global Flags:
--debug enable debug logging
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle sync --help

View File

@ -0,0 +1,16 @@
>>> $CLI bundle validate --help
Validate configuration
Usage:
databricks bundle validate [flags]
Flags:
-h, --help help for validate
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"

View File

@ -0,0 +1 @@
trace $CLI bundle validate --help

View File

@ -0,0 +1,33 @@
>>> $CLI bundle --help
Databricks Asset Bundles let you express data/AI/analytics projects as code.
Online documentation: https://docs.databricks.com/en/dev-tools/bundles/index.html
Usage:
databricks bundle [command]
Available Commands:
deploy Deploy bundle
deployment Deployment related commands
destroy Destroy deployed bundle resources
generate Generate bundle configuration
init Initialize using a bundle template
open Open a resource in the browser
run Run a job or pipeline update
schema Generate JSON Schema for bundle configuration
summary Summarize resources deployed by this bundle
sync Synchronize bundle tree to the workspace
validate Validate configuration
Flags:
-h, --help help for bundle
--var strings set values for variables defined in bundle config. Example: --var="foo=bar"
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
Use "databricks bundle [command] --help" for more information about a command.

View File

@ -0,0 +1 @@
trace $CLI bundle --help

View File

@ -4,7 +4,7 @@
"foo": { "foo": {
"deployment": { "deployment": {
"kind": "BUNDLE", "kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development/state/metadata.json" "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/development/state/metadata.json"
}, },
"edit_mode": "UI_LOCKED", "edit_mode": "UI_LOCKED",
"format": "MULTI_TASK", "format": "MULTI_TASK",
@ -32,7 +32,7 @@
"foo": { "foo": {
"deployment": { "deployment": {
"kind": "BUNDLE", "kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging/state/metadata.json" "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging/state/metadata.json"
}, },
"edit_mode": "UI_LOCKED", "edit_mode": "UI_LOCKED",
"format": "MULTI_TASK", "format": "MULTI_TASK",

View File

@ -4,7 +4,7 @@
"foo": { "foo": {
"deployment": { "deployment": {
"kind": "BUNDLE", "kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development/state/metadata.json" "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/development/state/metadata.json"
}, },
"edit_mode": "UI_LOCKED", "edit_mode": "UI_LOCKED",
"format": "MULTI_TASK", "format": "MULTI_TASK",
@ -31,8 +31,8 @@
Name: override_job_cluster Name: override_job_cluster
Target: development Target: development
Workspace: Workspace:
User: tester@databricks.com User: $USERNAME
Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/development Path: /Workspace/Users/$USERNAME/.bundle/override_job_cluster/development
Validation OK! Validation OK!
@ -41,7 +41,7 @@ Validation OK!
"foo": { "foo": {
"deployment": { "deployment": {
"kind": "BUNDLE", "kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging/state/metadata.json" "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging/state/metadata.json"
}, },
"edit_mode": "UI_LOCKED", "edit_mode": "UI_LOCKED",
"format": "MULTI_TASK", "format": "MULTI_TASK",
@ -68,7 +68,7 @@ Validation OK!
Name: override_job_cluster Name: override_job_cluster
Target: staging Target: staging
Workspace: Workspace:
User: tester@databricks.com User: $USERNAME
Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_cluster/staging Path: /Workspace/Users/$USERNAME/.bundle/override_job_cluster/staging
Validation OK! Validation OK!

View File

@ -69,8 +69,8 @@ Error: file ./test1.py not found
Name: override_job_tasks Name: override_job_tasks
Target: staging Target: staging
Workspace: Workspace:
User: tester@databricks.com User: $USERNAME
Path: /Workspace/Users/tester@databricks.com/.bundle/override_job_tasks/staging Path: /Workspace/Users/$USERNAME/.bundle/override_job_tasks/staging
Found 1 error Found 1 error

View File

@ -21,7 +21,7 @@ Warning: expected map, found string
Name: merge-string-map Name: merge-string-map
Target: dev Target: dev
Workspace: Workspace:
User: tester@databricks.com User: $USERNAME
Path: /Workspace/Users/tester@databricks.com/.bundle/merge-string-map/dev Path: /Workspace/Users/$USERNAME/.bundle/merge-string-map/dev
Found 1 warning Found 1 warning

View File

@ -14,7 +14,7 @@
], ],
"deployment": { "deployment": {
"kind": "BUNDLE", "kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_pipeline_cluster/development/state/metadata.json" "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_pipeline_cluster/development/state/metadata.json"
}, },
"name": "job", "name": "job",
"permissions": [] "permissions": []
@ -36,7 +36,7 @@
], ],
"deployment": { "deployment": {
"kind": "BUNDLE", "kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_pipeline_cluster/staging/state/metadata.json" "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/override_pipeline_cluster/staging/state/metadata.json"
}, },
"name": "job", "name": "job",
"permissions": [] "permissions": []

View File

@ -1,5 +1,5 @@
bundle: bundle:
name: path_translation_nominal name: fallback
include: include:
- "resources/*.yml" - "resources/*.yml"

View File

@ -0,0 +1,67 @@
[
{
"job_cluster_key": "default",
"notebook_task": {
"notebook_path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook"
},
"task_key": "notebook_example"
},
{
"job_cluster_key": "default",
"spark_python_task": {
"python_file": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file.py"
},
"task_key": "spark_python_example"
},
{
"dbt_task": {
"commands": [
"dbt run",
"dbt run"
],
"project_directory": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/dbt_project"
},
"job_cluster_key": "default",
"task_key": "dbt_example"
},
{
"job_cluster_key": "default",
"sql_task": {
"file": {
"path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/sql.sql"
},
"warehouse_id": "cafef00d"
},
"task_key": "sql_example"
},
{
"job_cluster_key": "default",
"libraries": [
{
"whl": "dist/wheel1.whl"
},
{
"whl": "dist/wheel2.whl"
}
],
"python_wheel_task": {
"package_name": "my_package"
},
"task_key": "python_wheel_example"
},
{
"job_cluster_key": "default",
"libraries": [
{
"jar": "target/jar1.jar"
},
{
"jar": "target/jar2.jar"
}
],
"spark_jar_task": {
"main_class_name": "com.example.Main"
},
"task_key": "spark_jar_example"
}
]

View File

@ -0,0 +1,22 @@
[
{
"file": {
"path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file1.py"
}
},
{
"notebook": {
"path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook1"
}
},
{
"file": {
"path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/file2.py"
}
},
{
"notebook": {
"path": "/Workspace/Users/$USERNAME/.bundle/fallback/development/files/src/notebook2"
}
}
]

View File

@ -0,0 +1,18 @@
>>> $CLI bundle validate -t development -o json
Exit code: 0
>>> $CLI bundle validate -t error
Error: notebook this value is overridden not found. Local notebook references are expected
to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb]
Name: fallback
Target: error
Workspace:
User: $USERNAME
Path: /Workspace/Users/$USERNAME/.bundle/fallback/error
Found 1 error
Exit code: 1

View File

@ -4,33 +4,45 @@ resources:
name: "placeholder" name: "placeholder"
tasks: tasks:
- task_key: notebook_example - task_key: notebook_example
job_cluster_key: default
notebook_task: notebook_task:
notebook_path: "this value is overridden" notebook_path: "this value is overridden"
- task_key: spark_python_example - task_key: spark_python_example
job_cluster_key: default
spark_python_task: spark_python_task:
python_file: "this value is overridden" python_file: "this value is overridden"
- task_key: dbt_example - task_key: dbt_example
job_cluster_key: default
dbt_task: dbt_task:
project_directory: "this value is overridden" project_directory: "this value is overridden"
commands: commands:
- "dbt run" - "dbt run"
- task_key: sql_example - task_key: sql_example
job_cluster_key: default
sql_task: sql_task:
file: file:
path: "this value is overridden" path: "this value is overridden"
warehouse_id: cafef00d warehouse_id: cafef00d
- task_key: python_wheel_example - task_key: python_wheel_example
job_cluster_key: default
python_wheel_task: python_wheel_task:
package_name: my_package package_name: my_package
libraries: libraries:
- whl: ../dist/wheel1.whl - whl: ../dist/wheel1.whl
- task_key: spark_jar_example - task_key: spark_jar_example
job_cluster_key: default
spark_jar_task: spark_jar_task:
main_class_name: com.example.Main main_class_name: com.example.Main
libraries: libraries:
- jar: ../target/jar1.jar - jar: ../target/jar1.jar
# Include a job cluster for completeness
job_clusters:
- job_cluster_key: default
new_cluster:
spark_version: 15.4.x-scala2.12

View File

@ -0,0 +1,10 @@
errcode trace $CLI bundle validate -t development -o json > output.tmp.json
# Capture job tasks
jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json
# Capture pipeline libraries
jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json
# Expect failure for the "error" target
errcode trace $CLI bundle validate -t error

View File

@ -0,0 +1 @@
rm -f output.tmp.json

View File

@ -1,5 +1,5 @@
bundle: bundle:
name: path_translation_fallback name: nominal
include: include:
- "resources/*.yml" - "resources/*.yml"

View File

@ -0,0 +1,89 @@
[
{
"job_cluster_key": "default",
"notebook_task": {
"notebook_path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook"
},
"task_key": "notebook_example"
},
{
"job_cluster_key": "default",
"spark_python_task": {
"python_file": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file.py"
},
"task_key": "spark_python_example"
},
{
"dbt_task": {
"commands": [
"dbt run",
"dbt run"
],
"project_directory": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/dbt_project"
},
"job_cluster_key": "default",
"task_key": "dbt_example"
},
{
"job_cluster_key": "default",
"sql_task": {
"file": {
"path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/sql.sql"
},
"warehouse_id": "cafef00d"
},
"task_key": "sql_example"
},
{
"job_cluster_key": "default",
"libraries": [
{
"whl": "dist/wheel1.whl"
},
{
"whl": "dist/wheel2.whl"
}
],
"python_wheel_task": {
"package_name": "my_package"
},
"task_key": "python_wheel_example"
},
{
"job_cluster_key": "default",
"libraries": [
{
"jar": "target/jar1.jar"
},
{
"jar": "target/jar2.jar"
}
],
"spark_jar_task": {
"main_class_name": "com.example.Main"
},
"task_key": "spark_jar_example"
},
{
"for_each_task": {
"task": {
"notebook_task": {
"notebook_path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook"
}
}
},
"job_cluster_key": "default",
"task_key": "for_each_notebook_example"
},
{
"for_each_task": {
"task": {
"job_cluster_key": "default",
"spark_python_task": {
"python_file": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file.py"
}
}
},
"task_key": "for_each_spark_python_example"
}
]

View File

@ -0,0 +1,22 @@
[
{
"file": {
"path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file1.py"
}
},
{
"notebook": {
"path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook1"
}
},
{
"file": {
"path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/file2.py"
}
},
{
"notebook": {
"path": "/Workspace/Users/$USERNAME/.bundle/nominal/development/files/src/notebook2"
}
}
]

View File

@ -0,0 +1,18 @@
>>> $CLI bundle validate -t development -o json
Exit code: 0
>>> $CLI bundle validate -t error
Error: notebook this value is overridden not found. Local notebook references are expected
to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb]
Name: nominal
Target: error
Workspace:
User: $USERNAME
Path: /Workspace/Users/$USERNAME/.bundle/nominal/error
Found 1 error
Exit code: 1

View File

@ -4,38 +4,45 @@ resources:
name: "placeholder" name: "placeholder"
tasks: tasks:
- task_key: notebook_example - task_key: notebook_example
job_cluster_key: default
notebook_task: notebook_task:
notebook_path: "this value is overridden" notebook_path: "this value is overridden"
- task_key: spark_python_example - task_key: spark_python_example
job_cluster_key: default
spark_python_task: spark_python_task:
python_file: "this value is overridden" python_file: "this value is overridden"
- task_key: dbt_example - task_key: dbt_example
job_cluster_key: default
dbt_task: dbt_task:
project_directory: "this value is overridden" project_directory: "this value is overridden"
commands: commands:
- "dbt run" - "dbt run"
- task_key: sql_example - task_key: sql_example
job_cluster_key: default
sql_task: sql_task:
file: file:
path: "this value is overridden" path: "this value is overridden"
warehouse_id: cafef00d warehouse_id: cafef00d
- task_key: python_wheel_example - task_key: python_wheel_example
job_cluster_key: default
python_wheel_task: python_wheel_task:
package_name: my_package package_name: my_package
libraries: libraries:
- whl: ../dist/wheel1.whl - whl: ../dist/wheel1.whl
- task_key: spark_jar_example - task_key: spark_jar_example
job_cluster_key: default
spark_jar_task: spark_jar_task:
main_class_name: com.example.Main main_class_name: com.example.Main
libraries: libraries:
- jar: ../target/jar1.jar - jar: ../target/jar1.jar
- task_key: for_each_notebook_example - task_key: for_each_notebook_example
job_cluster_key: default
for_each_task: for_each_task:
task: task:
notebook_task: notebook_task:
@ -44,5 +51,12 @@ resources:
- task_key: for_each_spark_python_example - task_key: for_each_spark_python_example
for_each_task: for_each_task:
task: task:
job_cluster_key: default
spark_python_task: spark_python_task:
python_file: "this value is overridden" python_file: "this value is overridden"
# Include a job cluster for completeness
job_clusters:
- job_cluster_key: default
new_cluster:
spark_version: 15.4.x-scala2.12

View File

@ -0,0 +1,10 @@
errcode trace $CLI bundle validate -t development -o json > output.tmp.json
# Capture job tasks
jq '.resources.jobs.my_job.tasks' output.tmp.json > output.job.json
# Capture pipeline libraries
jq '.resources.pipelines.my_pipeline.libraries' output.tmp.json > output.pipeline.json
# Expect failure for the "error" target
errcode trace $CLI bundle validate -t error

View File

@ -0,0 +1 @@
rm -f output.tmp.json

View File

@ -0,0 +1,6 @@
{
"paths": [
"/Workspace/remote/src/file1.py",
"/Workspace/remote/src/file1.py"
]
}

View File

@ -0,0 +1,6 @@
{
"paths": [
"/Workspace/remote/src/file2.py",
"/Workspace/remote/src/file2.py"
]
}

View File

@ -0,0 +1,4 @@
>>> $CLI bundle validate -t default -o json
>>> $CLI bundle validate -t override -o json

View File

@ -3,12 +3,20 @@ resources:
job: job:
tasks: tasks:
- task_key: local - task_key: local
job_cluster_key: default
spark_python_task: spark_python_task:
python_file: ../src/file1.py python_file: ../src/file1.py
- task_key: variable_reference - task_key: variable_reference
job_cluster_key: default
spark_python_task: spark_python_task:
# Note: this is a pure variable reference yet needs to persist the location # Note: this is a pure variable reference yet needs to persist the location
# of the reference, not the location of the variable value. # of the reference, not the location of the variable value.
# Also see https://github.com/databricks/cli/issues/1330. # Also see https://github.com/databricks/cli/issues/1330.
python_file: ${var.file_path} python_file: ${var.file_path}
# Include a job cluster for completeness
job_clusters:
- job_cluster_key: default
new_cluster:
spark_version: 15.4.x-scala2.12

View File

@ -0,0 +1,4 @@
trace $CLI bundle validate -t default -o json | \
jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.default.json
trace $CLI bundle validate -t override -o json | \
jq '{ paths: [.resources.jobs.job.tasks[].spark_python_task.python_file] }' > output.override.json

View File

@ -0,0 +1,6 @@
{
"project_name": "my_dbt_sql",
"http_path": "/sql/2.0/warehouses/f00dcafe",
"default_catalog": "main",
"personal_schemas": "yes, use a schema based on the current user name during development"
}

View File

@ -0,0 +1,32 @@
>>> $CLI bundle init dbt-sql --config-file ./input.json --output-dir output
Welcome to the dbt template for Databricks Asset Bundles!
A workspace was selected based on your current profile. For information about how to change this, see https://docs.databricks.com/dev-tools/cli/profiles.html.
workspace_host: $DATABRICKS_URL
📊 Your new project has been created in the 'my_dbt_sql' directory!
If you already have dbt installed, just type 'cd my_dbt_sql; dbt init' to get started.
Refer to the README.md file for full "getting started" guide and production setup instructions.
>>> $CLI bundle validate -t dev
Name: my_dbt_sql
Target: dev
Workspace:
Host: $DATABRICKS_URL
User: $USERNAME
Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/dev
Validation OK!
>>> $CLI bundle validate -t prod
Name: my_dbt_sql
Target: prod
Workspace:
Host: $DATABRICKS_URL
User: $USERNAME
Path: /Workspace/Users/$USERNAME/.bundle/my_dbt_sql/prod
Validation OK!

View File

@ -0,0 +1,2 @@
.databricks

View File

@ -0,0 +1,3 @@
# Typings for Pylance in Visual Studio Code
# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
from databricks.sdk.runtime import *

View File

@ -0,0 +1,6 @@
{
"recommendations": [
"redhat.vscode-yaml",
"innoverio.vscode-dbt-power-user",
]
}

View File

@ -0,0 +1,32 @@
{
"python.analysis.stubPath": ".vscode",
"jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
"jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.analysis.extraPaths": ["src"],
"files.exclude": {
"**/*.egg-info": true,
"**/__pycache__": true,
".pytest_cache": true,
},
"python.envFile": "${workspaceFolder}/.databricks/.databricks.env",
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
"sqltools.connections": [
{
"connectionMethod": "VS Code Extension (beta)",
"catalog": "hive_metastore",
"previewLimit": 50,
"driver": "Databricks",
"name": "databricks",
"path": "/sql/2.0/warehouses/f00dcafe"
}
],
"sqltools.autoConnectTo": "",
"[jinja-sql]": {
"editor.defaultFormatter": "innoverio.vscode-dbt-power-user"
}
}

View File

@ -0,0 +1,138 @@
# my_dbt_sql
The 'my_dbt_sql' project was generated by using the dbt template for
Databricks Asset Bundles. It follows the standard dbt project structure
and has an additional `resources` directory to define Databricks resources such as jobs
that run dbt models.
* Learn more about dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects.
* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html
The remainder of this file includes instructions for local development (using dbt)
and deployment to production (using Databricks Asset Bundles).
## Development setup
1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
2. Authenticate to your Databricks workspace, if you have not done so already:
```
$ databricks configure
```
3. Install dbt
To install dbt, you need a recent version of Python. For the instructions below,
we assume `python3` refers to the Python version you want to use. On some systems,
you may need to refer to a different Python version, e.g. `python` or `/usr/bin/python`.
Run these instructions from the `my_dbt_sql` directory. We recommend making
use of a Python virtual environment and installing dbt as follows:
```
$ python3 -m venv .venv
$ . .venv/bin/activate
$ pip install -r requirements-dev.txt
```
4. Initialize your dbt profile
Use `dbt init` to initialize your profile.
```
$ dbt init
```
Note that dbt authentication uses personal access tokens by default
(see https://docs.databricks.com/dev-tools/auth/pat.html).
You can use OAuth as an alternative, but this currently requires manual configuration.
See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md
for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605
for advice on setting up OAuth for Azure Databricks.
To setup up additional profiles, such as a 'prod' profile,
see https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles.
5. Activate dbt so it can be used from the terminal
```
$ . .venv/bin/activate
```
## Local development with dbt
Use `dbt` to [run this project locally using a SQL warehouse](https://docs.databricks.com/partners/prep/dbt.html):
```
$ dbt seed
$ dbt run
```
(Did you get an error that the dbt command could not be found? You may need
to try the last step from the development setup above to re-activate
your Python virtual environment!)
To just evaluate a single model defined in a file called orders.sql, use:
```
$ dbt run --model orders
```
Use `dbt test` to run tests generated from yml files such as `models/schema.yml`
and any SQL tests from `tests/`
```
$ dbt test
```
## Production setup
Your production dbt profiles are defined in dbt_profiles/profiles.yml.
These profiles define the default catalog, schema, and any other
target-specific settings. Read more about dbt profiles on Databricks at
https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile.
The target workspaces for staging and prod are defined in databricks.yml.
You can manually deploy based on these configurations (see below).
Or you can use CI/CD to automate deployment. See
https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation
on CI/CD setup.
## Manually deploying to Databricks with Databricks Asset Bundles
Databricks Asset Bundles can be used to deploy to Databricks and to execute
dbt commands as a job using Databricks Workflows. See
https://docs.databricks.com/dev-tools/bundles/index.html to learn more.
Use the Databricks CLI to deploy a development copy of this project to a workspace:
```
$ databricks bundle deploy --target dev
```
(Note that "dev" is the default target, so the `--target` parameter
is optional here.)
This deploys everything that's defined for this project.
For example, the default template would deploy a job called
`[dev yourname] my_dbt_sql_job` to your workspace.
You can find that job by opening your workpace and clicking on **Workflows**.
You can also deploy to your production target directly from the command-line.
The warehouse, catalog, and schema for that target are configured in databricks.yml.
When deploying to this target, note that the default job at resources/my_dbt_sql.job.yml
has a schedule set that runs every day. The schedule is paused when deploying in development mode
(see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
To deploy a production copy, type:
```
$ databricks bundle deploy --target prod
```
## IDE support
Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
https://docs.databricks.com/dev-tools/vscode-ext.html. Third-party extensions
related to dbt may further enhance your dbt development experience!

View File

@ -0,0 +1,34 @@
# This file defines the structure of this project and how it is deployed
# to production using Databricks Asset Bundles.
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
bundle:
name: my_dbt_sql
uuid: <UUID>
include:
- resources/*.yml
# Deployment targets.
# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml
targets:
dev:
default: true
# The default target uses 'mode: development' to create a development copy.
# - Deployed resources get prefixed with '[dev my_user_name]'
# - Any job schedules and triggers are paused by default.
# See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
mode: development
workspace:
host: $DATABRICKS_URL
prod:
mode: production
workspace:
host: $DATABRICKS_URL
# We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy.
root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target}
permissions:
- user_name: $USERNAME
level: CAN_MANAGE
run_as:
user_name: $USERNAME

View File

@ -0,0 +1,38 @@
# This file defines dbt profiles for deployed dbt jobs.
my_dbt_sql:
target: dev # default target
outputs:
# Doing local development with the dbt CLI?
# Then you should create your own profile in your .dbt/profiles.yml using 'dbt init'
# (See README.md)
# The default target when deployed with the Databricks CLI
# N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml
dev:
type: databricks
method: http
catalog: main
schema: "{{ var('dev_schema') }}"
http_path: /sql/2.0/warehouses/f00dcafe
# The workspace host / token are provided by Databricks
# see databricks.yml for the workspace host used for 'dev'
host: "{{ env_var('DBT_HOST') }}"
token: "{{ env_var('DBT_ACCESS_TOKEN') }}"
# The production target when deployed with the Databricks CLI
prod:
type: databricks
method: http
catalog: main
schema: default
http_path: /sql/2.0/warehouses/f00dcafe
# The workspace host / token are provided by Databricks
# see databricks.yml for the workspace host used for 'prod'
host: "{{ env_var('DBT_HOST') }}"
token: "{{ env_var('DBT_ACCESS_TOKEN') }}"

View File

@ -0,0 +1,32 @@
name: 'my_dbt_sql'
version: '1.0.0'
config-version: 2
# This setting configures which "profile" dbt uses for this project.
profile: 'my_dbt_sql'
# These configurations specify where dbt should look for different types of files.
# For Databricks asset bundles, we put everything in src, as you may have
# non-dbt resources in your project.
model-paths: ["src/models"]
analysis-paths: ["src/analyses"]
test-paths: ["src/tests"]
seed-paths: ["src/seeds"]
macro-paths: ["src/macros"]
snapshot-paths: ["src/snapshots"]
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"
# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models
# In this example config, we tell dbt to build all models in the example/
# directory as views by default. These settings can be overridden in the
# individual model files using the `{{ config(...) }}` macro.
models:
my_dbt_sql:
# Config indicated by + and applies to all files under models/example/
example:
+materialized: view

Some files were not shown because too many files have changed in this diff Show More