Merge remote-tracking branch 'origin/main' into populate-sources

This commit is contained in:
Pieter Noordhuis 2025-01-27 15:06:00 +01:00
commit 61d8848177
No known key found for this signature in database
GPG Key ID: 12ACCCC104CF2930
183 changed files with 5199 additions and 1298 deletions

View File

@ -1 +1 @@
779817ed8d63031f5ea761fbd25ee84f38feec0d
0be1b914249781b5e903b7676fd02255755bc851

View File

@ -109,16 +109,19 @@ var {{.CamelName}}Overrides []func(
{{- end }}
)
{{- $excludeFromJson := list "http-request"}}
func new{{.PascalName}}() *cobra.Command {
cmd := &cobra.Command{}
{{- $canUseJson := and .CanUseJson (not (in $excludeFromJson .KebabName )) -}}
{{- if .Request}}
var {{.CamelName}}Req {{.Service.Package.Name}}.{{.Request.PascalName}}
{{- if .RequestBodyField }}
{{.CamelName}}Req.{{.RequestBodyField.PascalName}} = &{{.Service.Package.Name}}.{{.RequestBodyField.Entity.PascalName}}{}
{{- end }}
{{- if .CanUseJson}}
{{- if $canUseJson}}
var {{.CamelName}}Json flags.JsonFlag
{{- end}}
{{- end}}
@ -135,7 +138,7 @@ func new{{.PascalName}}() *cobra.Command {
{{- $request = .RequestBodyField.Entity -}}
{{- end -}}
{{if $request }}// TODO: short flags
{{- if .CanUseJson}}
{{- if $canUseJson}}
cmd.Flags().Var(&{{.CamelName}}Json, "json", `either inline JSON string or @path/to/file.json with request body`)
{{- end}}
{{$method := .}}
@ -177,7 +180,7 @@ func new{{.PascalName}}() *cobra.Command {
{{- $hasRequiredArgs := and (not $hasIdPrompt) $hasPosArgs -}}
{{- $hasSingleRequiredRequestBodyFieldWithPrompt := and (and $hasIdPrompt $request) (eq 1 (len $request.RequiredRequestBodyFields)) -}}
{{- $onlyPathArgsRequiredAsPositionalArguments := and $request (eq (len .RequiredPositionalArguments) (len $request.RequiredPathFields)) -}}
{{- $hasDifferentArgsWithJsonFlag := and (not $onlyPathArgsRequiredAsPositionalArguments) (and .CanUseJson (or $request.HasRequiredRequestBodyFields )) -}}
{{- $hasDifferentArgsWithJsonFlag := and (not $onlyPathArgsRequiredAsPositionalArguments) (and $canUseJson (or $request.HasRequiredRequestBodyFields )) -}}
{{- $hasCustomArgHandler := or $hasRequiredArgs $hasDifferentArgsWithJsonFlag -}}
{{- $atleastOneArgumentWithDescription := false -}}
@ -239,7 +242,7 @@ func new{{.PascalName}}() *cobra.Command {
ctx := cmd.Context()
{{if .Service.IsAccounts}}a := root.AccountClient(ctx){{else}}w := root.WorkspaceClient(ctx){{end}}
{{- if .Request }}
{{ if .CanUseJson }}
{{ if $canUseJson }}
if cmd.Flags().Changed("json") {
diags := {{.CamelName}}Json.Unmarshal(&{{.CamelName}}Req{{ if .RequestBodyField }}.{{.RequestBodyField.PascalName}}{{ end }})
if diags.HasError() {
@ -255,7 +258,7 @@ func new{{.PascalName}}() *cobra.Command {
return fmt.Errorf("please provide command input in JSON format by specifying the --json flag")
}{{- end}}
{{- if $hasPosArgs }}
{{- if and .CanUseJson $hasSingleRequiredRequestBodyFieldWithPrompt }} else {
{{- if and $canUseJson $hasSingleRequiredRequestBodyFieldWithPrompt }} else {
{{- end}}
{{- if $hasIdPrompt}}
if len(args) == 0 {
@ -279,9 +282,9 @@ func new{{.PascalName}}() *cobra.Command {
{{$method := .}}
{{- range $arg, $field := .RequiredPositionalArguments}}
{{- template "args-scan" (dict "Arg" $arg "Field" $field "Method" $method "HasIdPrompt" $hasIdPrompt)}}
{{- template "args-scan" (dict "Arg" $arg "Field" $field "Method" $method "HasIdPrompt" $hasIdPrompt "ExcludeFromJson" $excludeFromJson)}}
{{- end -}}
{{- if and .CanUseJson $hasSingleRequiredRequestBodyFieldWithPrompt }}
{{- if and $canUseJson $hasSingleRequiredRequestBodyFieldWithPrompt }}
}
{{- end}}
@ -392,7 +395,8 @@ func new{{.PascalName}}() *cobra.Command {
{{- $method := .Method -}}
{{- $arg := .Arg -}}
{{- $hasIdPrompt := .HasIdPrompt -}}
{{- $optionalIfJsonIsUsed := and (not $hasIdPrompt) (and $field.IsRequestBodyField $method.CanUseJson) }}
{{ $canUseJson := and $method.CanUseJson (not (in .ExcludeFromJson $method.KebabName)) }}
{{- $optionalIfJsonIsUsed := and (not $hasIdPrompt) (and $field.IsRequestBodyField $canUseJson) }}
{{- if $optionalIfJsonIsUsed }}
if !cmd.Flags().Changed("json") {
{{- end }}

1
.gitattributes vendored
View File

@ -31,6 +31,7 @@ cmd/account/users/users.go linguist-generated=true
cmd/account/vpc-endpoints/vpc-endpoints.go linguist-generated=true
cmd/account/workspace-assignment/workspace-assignment.go linguist-generated=true
cmd/account/workspaces/workspaces.go linguist-generated=true
cmd/workspace/access-control/access-control.go linguist-generated=true
cmd/workspace/aibi-dashboard-embedding-access-policy/aibi-dashboard-embedding-access-policy.go linguist-generated=true
cmd/workspace/aibi-dashboard-embedding-approved-domains/aibi-dashboard-embedding-approved-domains.go linguist-generated=true
cmd/workspace/alerts-legacy/alerts-legacy.go linguist-generated=true

View File

@ -10,19 +10,65 @@ on:
jobs:
publish-to-winget-pkgs:
runs-on:
group: databricks-protected-runner-group
labels: windows-server-latest
group: databricks-deco-testing-runner-group
labels: ubuntu-latest-deco
environment: release
steps:
- uses: vedantmgoyal2009/winget-releaser@93fd8b606a1672ec3e5c6c3bb19426be68d1a8b0 # v2
with:
identifier: Databricks.DatabricksCLI
installers-regex: 'windows_.*-signed\.zip$' # Only signed Windows releases
token: ${{ secrets.ENG_DEV_ECOSYSTEM_BOT_TOKEN }}
fork-user: eng-dev-ecosystem-bot
- name: Checkout repository and submodules
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
# Use the tag from the input, or the ref name if the input is not provided.
# The ref name is equal to the tag name when this workflow is triggered by the "sign-cli" command.
release-tag: ${{ inputs.tag || github.ref_name }}
# When updating the version of komac, make sure to update the checksum in the next step.
# Find both at https://github.com/russellbanks/Komac/releases.
- name: Download komac binary
run: |
curl -s -L -o $RUNNER_TEMP/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz https://github.com/russellbanks/Komac/releases/download/v2.9.0/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz
- name: Verify komac binary
run: |
echo "d07a12831ad5418fee715488542a98ce3c0e591d05c850dd149fe78432be8c4c $RUNNER_TEMP/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz" | sha256sum -c -
- name: Untar komac binary to temporary path
run: |
mkdir -p $RUNNER_TEMP/komac
tar -xzf $RUNNER_TEMP/komac-2.9.0-x86_64-unknown-linux-gnu.tar.gz -C $RUNNER_TEMP/komac
- name: Add komac to PATH
run: echo "$RUNNER_TEMP/komac" >> $GITHUB_PATH
- name: Confirm komac version
run: komac --version
# Use the tag from the input, or the ref name if the input is not provided.
# The ref name is equal to the tag name when this workflow is triggered by the "sign-cli" command.
- name: Strip "v" prefix from version
id: strip_version
run: echo "version=$(echo ${{ inputs.tag || github.ref_name }} | sed 's/^v//')" >> "$GITHUB_OUTPUT"
- name: Get URLs of signed Windows binaries
id: get_windows_urls
run: |
urls=$(
gh api https://api.github.com/repos/databricks/cli/releases/tags/${{ inputs.tag || github.ref_name }} | \
jq -r .assets[].browser_download_url | \
grep -E '_windows_.*-signed\.zip$' | \
tr '\n' ' '
)
if [ -z "$urls" ]; then
echo "No signed Windows binaries found" >&2
exit 1
fi
echo "urls=$urls" >> "$GITHUB_OUTPUT"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Publish to Winget
run: |
komac update Databricks.DatabricksCLI \
--version ${{ steps.strip_version.outputs.version }} \
--submit \
--urls ${{ steps.get_windows_urls.outputs.urls }} \
env:
KOMAC_FORK_OWNER: eng-dev-ecosystem-bot
GITHUB_TOKEN: ${{ secrets.ENG_DEV_ECOSYSTEM_BOT_TOKEN }}

View File

@ -60,12 +60,6 @@ jobs:
- name: Install uv
uses: astral-sh/setup-uv@887a942a15af3a7626099df99e897a18d9e5ab3a # v5.1.0
- name: Run ruff
uses: astral-sh/ruff-action@31a518504640beb4897d0b9f9e50a2a9196e75ba # v3.0.1
with:
version: "0.9.1"
args: "format --check"
- name: Set go env
run: |
echo "GOPATH=$(go env GOPATH)" >> $GITHUB_ENV
@ -77,10 +71,10 @@ jobs:
make vendor
pip3 install wheel
- name: Run tests
run: make test
- name: Run tests with coverage
run: make cover
golangci:
linters:
needs: cleanups
name: lint
runs-on: ubuntu-latest
@ -105,6 +99,11 @@ jobs:
with:
version: v1.63.4
args: --timeout=15m
- name: Run ruff
uses: astral-sh/ruff-action@31a518504640beb4897d0b9f9e50a2a9196e75ba # v3.0.1
with:
version: "0.9.1"
args: "format --check"
validate-bundle-schema:
needs: cleanups

View File

@ -1,5 +1,25 @@
# Version changelog
## [Release] Release v0.239.1
CLI:
* Added text output templates for apps list and list-deployments ([#2175](https://github.com/databricks/cli/pull/2175)).
* Fix duplicate "apps" entry in help output ([#2191](https://github.com/databricks/cli/pull/2191)).
Bundles:
* Allow yaml-anchors in schema ([#2200](https://github.com/databricks/cli/pull/2200)).
* Show an error when non-yaml files used in include section ([#2201](https://github.com/databricks/cli/pull/2201)).
* Set WorktreeRoot to sync root outside git repo ([#2197](https://github.com/databricks/cli/pull/2197)).
* fix: Detailed message for using source-linked deployment with file_path specified ([#2119](https://github.com/databricks/cli/pull/2119)).
* Allow using variables in enum fields ([#2199](https://github.com/databricks/cli/pull/2199)).
* Add experimental-jobs-as-code template ([#2177](https://github.com/databricks/cli/pull/2177)).
* Reading variables from file ([#2171](https://github.com/databricks/cli/pull/2171)).
* Fixed an apps message order and added output test ([#2174](https://github.com/databricks/cli/pull/2174)).
* Default to forward slash-separated paths for path translation ([#2145](https://github.com/databricks/cli/pull/2145)).
* Include a materialized copy of built-in templates ([#2146](https://github.com/databricks/cli/pull/2146)).
## [Release] Release v0.239.0
### New feature announcement

View File

@ -3,6 +3,8 @@ default: vendor fmt lint
PACKAGES=./acceptance/... ./libs/... ./internal/... ./cmd/... ./bundle/... .
GOTESTSUM_FORMAT ?= pkgname-and-test-fails
GOTESTSUM_CMD ?= gotestsum --format ${GOTESTSUM_FORMAT} --no-summary=skipped
lint:
golangci-lint run --fix
@ -18,22 +20,19 @@ fmt:
golangci-lint run --enable-only="gofmt,gofumpt,goimports" --fix ./...
test:
gotestsum --format ${GOTESTSUM_FORMAT} --no-summary=skipped -- ${PACKAGES}
${GOTESTSUM_CMD} -- ${PACKAGES}
cover:
gotestsum --format ${GOTESTSUM_FORMAT} --no-summary=skipped -- -coverprofile=coverage.txt ${PACKAGES}
showcover:
go tool cover -html=coverage.txt
acc-cover:
rm -fr ./acceptance/build/cover/
CLI_GOCOVERDIR=build/cover go test ./acceptance
CLI_GOCOVERDIR=build/cover ${GOTESTSUM_CMD} -- -coverprofile=coverage.txt ${PACKAGES}
rm -fr ./acceptance/build/cover-merged/
mkdir -p acceptance/build/cover-merged/
go tool covdata merge -i $$(printf '%s,' acceptance/build/cover/* | sed 's/,$$//') -o acceptance/build/cover-merged/
go tool covdata textfmt -i acceptance/build/cover-merged -o coverage-acceptance.txt
showcover:
go tool cover -html=coverage.txt
acc-showcover:
go tool cover -html=coverage-acceptance.txt

4
NOTICE
View File

@ -105,3 +105,7 @@ License - https://github.com/wI2L/jsondiff/blob/master/LICENSE
https://github.com/hexops/gotextdiff
Copyright (c) 2009 The Go Authors. All rights reserved.
License - https://github.com/hexops/gotextdiff/blob/main/LICENSE
https://github.com/BurntSushi/toml
Copyright (c) 2013 TOML authors
https://github.com/BurntSushi/toml/blob/master/COPYING

View File

@ -17,3 +17,5 @@ For more complex tests one can also use:
- `errcode` helper: if the command fails with non-zero code, it appends `Exit code: N` to the output but returns success to caller (bash), allowing continuation of script.
- `trace` helper: prints the arguments before executing the command.
- custom output files: redirect output to custom file (it must start with `out`), e.g. `$CLI bundle validate > out.txt 2> out.error.txt`.
See [selftest](./selftest) for a toy test.

View File

@ -3,6 +3,7 @@ package acceptance_test
import (
"context"
"errors"
"flag"
"fmt"
"io"
"os"
@ -22,7 +23,22 @@ import (
"github.com/stretchr/testify/require"
)
var KeepTmp = os.Getenv("KEEP_TMP") != ""
var KeepTmp bool
// In order to debug CLI running under acceptance test, set this to full subtest name, e.g. "bundle/variables/empty"
// Then install your breakpoints and click "debug test" near TestAccept in VSCODE.
// example: var SingleTest = "bundle/variables/empty"
var SingleTest = ""
// If enabled, instead of compiling and running CLI externally, we'll start in-process server that accepts and runs
// CLI commands. The $CLI in test scripts is a helper that just forwards command-line arguments to this server (see bin/callserver.py).
// Also disables parallelism in tests.
var InprocessMode bool
func init() {
flag.BoolVar(&InprocessMode, "inprocess", SingleTest != "", "Run CLI in the same process as test (for debugging)")
flag.BoolVar(&KeepTmp, "keeptmp", false, "Do not delete TMP directory after run")
}
const (
EntryPointScript = "script"
@ -37,6 +53,18 @@ var Scripts = map[string]bool{
}
func TestAccept(t *testing.T) {
testAccept(t, InprocessMode, SingleTest)
}
func TestInprocessMode(t *testing.T) {
if InprocessMode {
t.Skip("Already tested by TestAccept")
}
require.Equal(t, 1, testAccept(t, true, "selftest"))
}
func testAccept(t *testing.T, InprocessMode bool, singleTest string) int {
repls := testdiff.ReplacementsContext{}
cwd, err := os.Getwd()
require.NoError(t, err)
@ -49,18 +77,24 @@ func TestAccept(t *testing.T) {
t.Logf("Writing coverage to %s", coverDir)
}
execPath := BuildCLI(t, cwd, coverDir)
// $CLI is what test scripts are using
execPath := ""
if InprocessMode {
cmdServer := StartCmdServer(t)
t.Setenv("CMD_SERVER_URL", cmdServer.URL)
execPath = filepath.Join(cwd, "bin", "callserver.py")
} else {
execPath = BuildCLI(t, cwd, coverDir)
}
t.Setenv("CLI", execPath)
repls.SetPath(execPath, "$CLI")
// Make helper scripts available
t.Setenv("PATH", fmt.Sprintf("%s%c%s", filepath.Join(cwd, "bin"), os.PathListSeparator, os.Getenv("PATH")))
repls := testdiff.ReplacementsContext{}
repls.Set(execPath, "$CLI")
tempHomeDir := t.TempDir()
repls.Set(tempHomeDir, "$TMPHOME")
repls.SetPath(tempHomeDir, "$TMPHOME")
t.Logf("$TMPHOME=%v", tempHomeDir)
// Prevent CLI from downloading terraform in each test:
@ -94,13 +128,25 @@ func TestAccept(t *testing.T) {
testDirs := getTests(t)
require.NotEmpty(t, testDirs)
if singleTest != "" {
testDirs = slices.DeleteFunc(testDirs, func(n string) bool {
return n != singleTest
})
require.NotEmpty(t, testDirs, "singleTest=%#v did not match any tests\n%#v", singleTest, testDirs)
}
for _, dir := range testDirs {
testName := strings.ReplaceAll(dir, "\\", "/")
t.Run(testName, func(t *testing.T) {
t.Parallel()
runTest(t, dir, coverDir, repls)
if !InprocessMode {
t.Parallel()
}
runTest(t, dir, coverDir, repls.Clone())
})
}
return len(testDirs)
}
func getTests(t *testing.T) []string {
@ -124,6 +170,13 @@ func getTests(t *testing.T) []string {
}
func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsContext) {
config, configPath := LoadConfig(t, dir)
isEnabled, isPresent := config.GOOS[runtime.GOOS]
if isPresent && !isEnabled {
t.Skipf("Disabled via GOOS.%s setting in %s", runtime.GOOS, configPath)
}
var tmpDir string
var err error
if KeepTmp {
@ -136,6 +189,9 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
tmpDir = t.TempDir()
}
repls.SetPathWithParents(tmpDir, "$TMPDIR")
repls.Repls = append(repls.Repls, config.Repls...)
scriptContents := readMergedScriptContents(t, dir)
testutil.WriteFile(t, filepath.Join(tmpDir, EntryPointScript), scriptContents)
@ -168,14 +224,15 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
formatOutput(out, err)
require.NoError(t, out.Close())
printedRepls := false
// Compare expected outputs
for relPath := range outputs {
doComparison(t, repls, dir, tmpDir, relPath)
doComparison(t, repls, dir, tmpDir, relPath, &printedRepls)
}
// Make sure there are not unaccounted for new files
files, err := ListDir(t, tmpDir)
require.NoError(t, err)
files := ListDir(t, tmpDir)
for _, relPath := range files {
if _, ok := inputs[relPath]; ok {
continue
@ -186,12 +243,12 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
if strings.HasPrefix(relPath, "out") {
// We have a new file starting with "out"
// Show the contents & support overwrite mode for it:
doComparison(t, repls, dir, tmpDir, relPath)
doComparison(t, repls, dir, tmpDir, relPath, &printedRepls)
}
}
}
func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirNew, relPath string) {
func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirNew, relPath string, printedRepls *bool) {
pathRef := filepath.Join(dirRef, relPath)
pathNew := filepath.Join(dirNew, relPath)
bufRef, okRef := readIfExists(t, pathRef)
@ -236,6 +293,15 @@ func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirN
t.Logf("Overwriting existing output file: %s", relPath)
testutil.WriteFile(t, pathRef, valueNew)
}
if !equal && printedRepls != nil && !*printedRepls {
*printedRepls = true
var items []string
for _, item := range repls.Repls {
items = append(items, fmt.Sprintf("REPL %s => %s", item.Old, item.New))
}
t.Log("Available replacements:\n" + strings.Join(items, "\n"))
}
}
// Returns combined script.prepare (root) + script.prepare (parent) + ... + script + ... + script.cleanup (parent) + ...
@ -392,11 +458,15 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error {
})
}
func ListDir(t *testing.T, src string) ([]string, error) {
func ListDir(t *testing.T, src string) []string {
var files []string
err := filepath.Walk(src, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
// Do not FailNow here.
// The output comparison is happening after this call which includes output.txt which
// includes errors printed by commands which include explanation why a given file cannot be read.
t.Errorf("Error when listing %s: path=%s: %s", src, path, err)
return nil
}
if info.IsDir() {
@ -411,5 +481,8 @@ func ListDir(t *testing.T, src string) ([]string, error) {
files = append(files, relPath)
return nil
})
return files, err
if err != nil {
t.Errorf("Failed to list %s: %s", src, err)
}
return files
}

31
acceptance/bin/callserver.py Executable file
View File

@ -0,0 +1,31 @@
#!/usr/bin/env python3
import sys
import os
import json
import urllib.request
from urllib.parse import urlencode
env = {}
for key, value in os.environ.items():
if len(value) > 10_000:
sys.stderr.write(f"Dropping key={key} value len={len(value)}\n")
continue
env[key] = value
q = {
"args": " ".join(sys.argv[1:]),
"cwd": os.getcwd(),
"env": json.dumps(env),
}
url = os.environ["CMD_SERVER_URL"] + "/?" + urlencode(q)
if len(url) > 100_000:
sys.exit("url too large")
resp = urllib.request.urlopen(url)
assert resp.status == 200, (resp.status, resp.url, resp.headers)
result = json.load(resp)
sys.stderr.write(result["stderr"])
sys.stdout.write(result["stdout"])
exitcode = int(result["exitcode"])
sys.exit(exitcode)

View File

@ -0,0 +1,2 @@
bundle:
name: git-permerror

View File

@ -0,0 +1,78 @@
=== No permission to access .git. Badness: inferred flag is set to true even though we did not infer branch. bundle_root_path is not correct in subdir case.
>>> chmod 000 .git
>>> $CLI bundle validate
Error: unable to load repository specific gitconfig: open config: permission denied
Name: git-permerror
Target: default
Workspace:
User: $USERNAME
Path: /Workspace/Users/$USERNAME/.bundle/git-permerror/default
Found 1 error
Exit code: 1
>>> $CLI bundle validate -o json
Error: unable to load repository specific gitconfig: open config: permission denied
Exit code: 1
{
"bundle_root_path": ".",
"inferred": true
}
>>> withdir subdir/a/b $CLI bundle validate -o json
Error: unable to load repository specific gitconfig: open config: permission denied
Exit code: 1
{
"bundle_root_path": ".",
"inferred": true
}
=== No permissions to read .git/HEAD. Badness: warning is not shown. inferred is incorrectly set to true. bundle_root_path is not correct in subdir case.
>>> chmod 000 .git/HEAD
>>> $CLI bundle validate -o json
{
"bundle_root_path": ".",
"inferred": true
}
>>> withdir subdir/a/b $CLI bundle validate -o json
{
"bundle_root_path": ".",
"inferred": true
}
=== No permissions to read .git/config. Badness: inferred is incorretly set to true. bundle_root_path is not correct is subdir case.
>>> chmod 000 .git/config
>>> $CLI bundle validate -o json
Error: unable to load repository specific gitconfig: open config: permission denied
Exit code: 1
{
"bundle_root_path": ".",
"inferred": true
}
>>> withdir subdir/a/b $CLI bundle validate -o json
Error: unable to load repository specific gitconfig: open config: permission denied
Exit code: 1
{
"bundle_root_path": ".",
"inferred": true
}

View File

@ -0,0 +1,25 @@
mkdir myrepo
cd myrepo
cp ../databricks.yml .
git-repo-init
mkdir -p subdir/a/b
printf "=== No permission to access .git. Badness: inferred flag is set to true even though we did not infer branch. bundle_root_path is not correct in subdir case.\n"
trace chmod 000 .git
errcode trace $CLI bundle validate
errcode trace $CLI bundle validate -o json | jq .bundle.git
errcode trace withdir subdir/a/b $CLI bundle validate -o json | jq .bundle.git
printf "\n\n=== No permissions to read .git/HEAD. Badness: warning is not shown. inferred is incorrectly set to true. bundle_root_path is not correct in subdir case.\n"
chmod 700 .git
trace chmod 000 .git/HEAD
errcode trace $CLI bundle validate -o json | jq .bundle.git
errcode trace withdir subdir/a/b $CLI bundle validate -o json | jq .bundle.git
printf "\n\n=== No permissions to read .git/config. Badness: inferred is incorretly set to true. bundle_root_path is not correct is subdir case.\n"
chmod 666 .git/HEAD
trace chmod 000 .git/config
errcode trace $CLI bundle validate -o json | jq .bundle.git
errcode trace withdir subdir/a/b $CLI bundle validate -o json | jq .bundle.git
rm -fr .git

View File

@ -0,0 +1,5 @@
Badness = "Warning logs not shown; inferred flag is set to true incorrect; bundle_root_path is not correct"
[GOOS]
# This test relies on chmod which does not work on Windows
windows = false

View File

@ -0,0 +1,6 @@
bundle:
name: non_yaml_in_includes
include:
- test.py
- resources/*.yml

View File

@ -0,0 +1,10 @@
Error: Files in the 'include' configuration section must be YAML files.
in databricks.yml:5:4
The file test.py in the 'include' configuration section is not a YAML file, and only YAML files are supported. To include files to sync, specify them in the 'sync.include' configuration section instead.
Name: non_yaml_in_includes
Found 1 error
Exit code: 1

View File

@ -0,0 +1 @@
$CLI bundle validate

View File

@ -0,0 +1 @@
print("Hello world")

View File

@ -1,8 +1,6 @@
>>> $CLI bundle validate -t development -o json
Exit code: 0
>>> $CLI bundle validate -t error
Error: notebook this value is overridden not found. Local notebook references are expected
to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb]

View File

@ -1,8 +1,6 @@
>>> $CLI bundle validate -t development -o json
Exit code: 0
>>> $CLI bundle validate -t error
Error: notebook this value is overridden not found. Local notebook references are expected
to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb]

View File

@ -0,0 +1,73 @@
>>> $CLI bundle validate -o json -t development
{
"mode": "development",
"quality_monitors": {
"my_monitor": {
"assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1",
"inference_log": {
"granularities": [
"1 day"
],
"model_id_col": "model_id",
"prediction_col": "prediction",
"problem_type": "PROBLEM_TYPE_REGRESSION",
"timestamp_col": "timestamp"
},
"output_schema_name": "main.dev",
"schedule": null,
"table_name": "main.test.dev"
}
}
}
>>> $CLI bundle validate -o json -t staging
{
"mode": null,
"quality_monitors": {
"my_monitor": {
"assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1",
"inference_log": {
"granularities": [
"1 day"
],
"model_id_col": "model_id",
"prediction_col": "prediction",
"problem_type": "PROBLEM_TYPE_REGRESSION",
"timestamp_col": "timestamp"
},
"output_schema_name": "main.staging",
"schedule": {
"quartz_cron_expression": "0 0 12 * * ?",
"timezone_id": "UTC"
},
"table_name": "main.test.staging"
}
}
}
>>> $CLI bundle validate -o json -t production
{
"mode": null,
"quality_monitors": {
"my_monitor": {
"assets_dir": "/Shared/provider-test/databricks_monitoring/main.test.thing1",
"inference_log": {
"granularities": [
"1 day",
"1 hour"
],
"model_id_col": "model_id_prod",
"prediction_col": "prediction_prod",
"problem_type": "PROBLEM_TYPE_REGRESSION",
"timestamp_col": "timestamp_prod"
},
"output_schema_name": "main.prod",
"schedule": {
"quartz_cron_expression": "0 0 12 * * ?",
"timezone_id": "UTC"
},
"table_name": "main.test.prod"
}
}
}

View File

@ -0,0 +1,3 @@
trace $CLI bundle validate -o json -t development | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }'
trace $CLI bundle validate -o json -t staging | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }'
trace $CLI bundle validate -o json -t production | jq '{ mode: .bundle.mode, quality_monitors: .resources.quality_monitors }'

View File

@ -0,0 +1,5 @@
bundle:
name: test-bundle
sync:
paths:
- ..

View File

@ -0,0 +1,11 @@
Error: path "$TMPDIR" is not within repository root "$TMPDIR/myrepo"
Name: test-bundle
Target: default
Workspace:
User: $USERNAME
Path: /Workspace/Users/$USERNAME/.bundle/test-bundle/default
Found 1 error
Exit code: 1

View File

@ -0,0 +1,6 @@
# This should error, we do not allow syncroot outside of git repo.
mkdir myrepo
cd myrepo
cp ../databricks.yml .
git-repo-init
$CLI bundle validate | sed 's/\\\\/\//g'

View File

@ -0,0 +1,5 @@
bundle:
name: test-bundle
sync:
paths:
- ..

View File

@ -0,0 +1,7 @@
Name: test-bundle
Target: default
Workspace:
User: $USERNAME
Path: /Workspace/Users/$USERNAME/.bundle/test-bundle/default
Validation OK!

View File

@ -0,0 +1,2 @@
# This should not error, syncroot can be outside bundle root.
$CLI bundle validate

View File

@ -0,0 +1,5 @@
{
"project_name": "my_jobs_as_code",
"include_notebook": "yes",
"include_python": "yes"
}

View File

@ -0,0 +1,85 @@
>>> $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output
Welcome to (EXPERIMENTAL) "Jobs as code" template for Databricks Asset Bundles!
Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): $DATABRICKS_URL
✨ Your new project has been created in the 'my_jobs_as_code' directory!
Please refer to the README.md file for "getting started" instructions.
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
>>> $CLI bundle validate -t dev --output json
{
"jobs": {
"my_jobs_as_code_job": {
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/state/metadata.json"
},
"edit_mode": "UI_LOCKED",
"email_notifications": {
"on_failure": [
"$USERNAME"
]
},
"format": "MULTI_TASK",
"job_clusters": [
{
"job_cluster_key": "job_cluster",
"new_cluster": {
"autoscale": {
"max_workers": 4,
"min_workers": 1
},
"node_type_id": "i3.xlarge",
"spark_version": "15.4.x-scala2.12"
}
}
],
"max_concurrent_runs": 4,
"name": "[dev $USERNAME] my_jobs_as_code_job",
"permissions": [],
"queue": {
"enabled": true
},
"tags": {
"dev": "$USERNAME"
},
"tasks": [
{
"job_cluster_key": "job_cluster",
"notebook_task": {
"notebook_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/files/src/notebook"
},
"task_key": "notebook_task"
},
{
"depends_on": [
{
"task_key": "notebook_task"
}
],
"job_cluster_key": "job_cluster",
"libraries": [
{
"whl": "dist/*.whl"
}
],
"python_wheel_task": {
"entry_point": "main",
"package_name": "my_jobs_as_code"
},
"task_key": "main_task"
}
],
"trigger": {
"pause_status": "PAUSED",
"periodic": {
"interval": 1,
"unit": "DAYS"
}
}
}
}
}

View File

@ -0,0 +1,8 @@
.databricks/
build/
dist/
__pycache__/
*.egg-info
.venv/
scratch/**
!scratch/README.md

View File

@ -0,0 +1,58 @@
# my_jobs_as_code
The 'my_jobs_as_code' project was generated by using the "Jobs as code" template.
## Prerequisites
1. Install Databricks CLI 0.238 or later.
See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html).
2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/).
We use uv to create a virtual environment and install the required dependencies.
3. Authenticate to your Databricks workspace if you have not done so already:
```
$ databricks configure
```
4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for
**Databricks Connect** for instructions on running the included Python code from a different IDE.
5. For documentation on the Databricks Asset Bundles format used
for this project, and for CI/CD configuration, see
https://docs.databricks.com/dev-tools/bundles/index.html.
## Deploy and run jobs
1. Create a new virtual environment and install the required dependencies:
```
$ uv sync
```
2. To deploy the bundle to the development target:
```
$ databricks bundle deploy --target dev
```
*(Note that "dev" is the default target, so the `--target` parameter is optional here.)*
This deploys everything that's defined for this project.
For example, the default template would deploy a job called
`[dev yourname] my_jobs_as_code_job` to your workspace.
You can find that job by opening your workspace and clicking on **Workflows**.
3. Similarly, to deploy a production copy, type:
```
$ databricks bundle deploy --target prod
```
Note that the default job from the template has a schedule that runs every day
(defined in resources/my_jobs_as_code_job.py). The schedule
is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes](
https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)).
4. To run a job:
```
$ databricks bundle run
```

View File

@ -0,0 +1,48 @@
# This is a Databricks asset bundle definition for my_jobs_as_code.
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
bundle:
name: my_jobs_as_code
uuid: <UUID>
experimental:
python:
# Activate virtual environment before loading resources defined in Python.
# If disabled, defaults to using the Python interpreter available in the current shell.
venv_path: .venv
# Functions called to load resources defined in Python. See resources/__init__.py
resources:
- "resources:load_resources"
artifacts:
default:
type: whl
path: .
# We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build
include:
- resources/*.yml
targets:
dev:
# The default target uses 'mode: development' to create a development copy.
# - Deployed resources get prefixed with '[dev my_user_name]'
# - Any job schedules and triggers are paused by default.
# See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
mode: development
default: true
workspace:
host: $DATABRICKS_URL
prod:
mode: production
workspace:
host: $DATABRICKS_URL
# We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy.
root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target}
permissions:
- user_name: $USERNAME
level: CAN_MANAGE
run_as:
user_name: $USERNAME

View File

@ -0,0 +1,22 @@
# Fixtures
This folder is reserved for fixtures, such as CSV files.
Below is an example of how to load fixtures as a data frame:
```
import pandas as pd
import os
def get_absolute_path(*relative_parts):
if 'dbutils' in globals():
base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
path = os.path.normpath(os.path.join(base_dir, *relative_parts))
return path if path.startswith("/Workspace") else "/Workspace" + path
else:
return os.path.join(*relative_parts)
csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
df = pd.read_csv(csv_file)
display(df)
```

View File

@ -0,0 +1,49 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "my_jobs_as_code"
requires-python = ">=3.10"
description = "wheel file based on my_jobs_as_code"
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
#
# Example:
# dependencies = [
# "requests==x.y.z",
# ]
dependencies = [
]
# see setup.py
dynamic = ["version"]
[project.entry-points.packages]
main = "my_jobs_as_code.main:main"
[tool.setuptools]
py-modules = ["resources", "my_jobs_as_code"]
[tool.uv]
## Dependencies for local development
dev-dependencies = [
"databricks-bundles==0.7.0",
## Add code completion support for DLT
# "databricks-dlt",
## databricks-connect can be used to run parts of this project locally.
## See https://docs.databricks.com/dev-tools/databricks-connect.html.
##
## Uncomment line below to install a version of db-connect that corresponds to
## the Databricks Runtime version used for this project.
# "databricks-connect>=15.4,<15.5",
]
override-dependencies = [
# pyspark package conflicts with 'databricks-connect'
"pyspark; sys_platform == 'never'",
]

View File

@ -0,0 +1,16 @@
from databricks.bundles.core import (
Bundle,
Resources,
load_resources_from_current_package_module,
)
def load_resources(bundle: Bundle) -> Resources:
"""
'load_resources' function is referenced in databricks.yml and is responsible for loading
bundle resources defined in Python code. This function is called by Databricks CLI during
bundle deployment. After deployment, this function is not used.
"""
# the default implementation loads all Python files in 'resources' directory
return load_resources_from_current_package_module()

View File

@ -0,0 +1,67 @@
from databricks.bundles.jobs import Job
"""
The main job for my_jobs_as_code.
"""
my_jobs_as_code_job = Job.from_dict(
{
"name": "my_jobs_as_code_job",
"trigger": {
# Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
"periodic": {
"interval": 1,
"unit": "DAYS",
},
},
"email_notifications": {
"on_failure": [
"$USERNAME",
],
},
"tasks": [
{
"task_key": "notebook_task",
"job_cluster_key": "job_cluster",
"notebook_task": {
"notebook_path": "src/notebook.ipynb",
},
},
{
"task_key": "main_task",
"depends_on": [
{
"task_key": "notebook_task",
},
],
"job_cluster_key": "job_cluster",
"python_wheel_task": {
"package_name": "my_jobs_as_code",
"entry_point": "main",
},
"libraries": [
# By default we just include the .whl file generated for the my_jobs_as_code package.
# See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
# for more information on how to add other libraries.
{
"whl": "dist/*.whl",
},
],
},
],
"job_clusters": [
{
"job_cluster_key": "job_cluster",
"new_cluster": {
"spark_version": "15.4.x-scala2.12",
"node_type_id": "i3.xlarge",
"autoscale": {
"min_workers": 1,
"max_workers": 4,
},
},
},
],
}
)

View File

@ -0,0 +1,4 @@
# scratch
This folder is reserved for personal, exploratory notebooks.
By default these are not committed to Git, as 'scratch' is listed in .gitignore.

View File

@ -0,0 +1,18 @@
"""
setup.py configuration script describing how to build and package this project.
This file is primarily used by the setuptools library and typically should not
be executed directly. See README.md for how to deploy, test, and run
the my_jobs_as_code project.
"""
import os
from setuptools import setup
local_version = os.getenv("LOCAL_VERSION")
version = "0.0.1"
setup(
version=f"{version}+{local_version}" if local_version else version,
)

View File

@ -0,0 +1,25 @@
from pyspark.sql import SparkSession, DataFrame
def get_taxis(spark: SparkSession) -> DataFrame:
return spark.read.table("samples.nyctaxi.trips")
# Create a new Databricks Connect session. If this fails,
# check that you have configured Databricks Connect correctly.
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
def get_spark() -> SparkSession:
try:
from databricks.connect import DatabricksSession
return DatabricksSession.builder.getOrCreate()
except ImportError:
return SparkSession.builder.getOrCreate()
def main():
get_taxis(get_spark()).show(5)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,75 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "<UUID>",
"showTitle": false,
"title": ""
}
},
"source": [
"# Default notebook\n",
"\n",
"This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "<UUID>",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"from my_jobs_as_code import main\n",
"\n",
"main.get_taxis(spark).show(10)"
]
}
],
"metadata": {
"application/vnd.databricks.v1+notebook": {
"dashboards": [],
"language": "python",
"notebookMetadata": {
"pythonIndentUnit": 2
},
"notebookName": "notebook",
"widgets": {}
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,8 @@
from my_jobs_as_code.main import get_taxis, get_spark
# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml
def test_main():
taxis = get_taxis(get_spark())
assert taxis.count() > 5

View File

@ -0,0 +1,14 @@
trace $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output
cd output/my_jobs_as_code
# silence uv output because it's non-deterministic
uv sync 2> /dev/null
# remove version constraint because it always creates a warning on dev builds
cat databricks.yml | grep -v databricks_cli_version > databricks.yml.new
mv databricks.yml.new databricks.yml
trace $CLI bundle validate -t dev --output json | jq ".resources"
rm -fr .venv resources/__pycache__ uv.lock my_jobs_as_code.egg-info

View File

@ -0,0 +1,3 @@
Error: not a bundle template: expected to find a template schema file at databricks_template_schema.json
Exit code: 1

View File

@ -0,0 +1,2 @@
export NO_COLOR=1
$CLI bundle init /DOES/NOT/EXIST

View File

@ -0,0 +1 @@
Badness = 'The error message should include full path: "expected to find a template schema file at databricks_template_schema.json"'

View File

@ -0,0 +1,5 @@
Error: git clone failed: git clone https://invalid-domain-123.databricks.com/hello/world $TMPDIR_GPARENT/world-123456 --no-tags --depth=1: exit status 128. Cloning into '$TMPDIR_GPARENT/world-123456'...
fatal: unable to access 'https://invalid-domain-123.databricks.com/hello/world/': Could not resolve host: invalid-domain-123.databricks.com
Exit code: 1

View File

@ -0,0 +1,2 @@
export NO_COLOR=1
$CLI bundle init https://invalid-domain-123.databricks.com/hello/world

View File

@ -0,0 +1,7 @@
[[Repls]]
Old = '\\'
New = '/'
[[Repls]]
Old = '/world-[0-9]+'
New = '/world-123456'

View File

@ -1,7 +1,5 @@
>>> errcode $CLI bundle validate --var a=one -o json
Exit code: 0
{
"a": {
"default": "hello",

View File

@ -1,4 +1,4 @@
Error: no value assigned to required variable a. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_a environment variable
Error: no value assigned to required variable a. Assignment can be done using "--var", by setting the BUNDLE_VAR_a environment variable, or in .databricks/bundle/<target>/variable-overrides.json file
Name: empty${var.a}
Target: default

View File

@ -9,7 +9,7 @@
"prod-a env-var-b"
>>> errcode $CLI bundle validate -t env-missing-a-required-variable-assignment
Error: no value assigned to required variable b. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_b environment variable
Error: no value assigned to required variable b. Assignment can be done using "--var", by setting the BUNDLE_VAR_b environment variable, or in .databricks/bundle/<target>/variable-overrides.json file
Name: test bundle
Target: env-missing-a-required-variable-assignment

View File

@ -0,0 +1,5 @@
{
"cluster_key": {
"node_type_id": "Standard_DS3_v2"
}
}

View File

@ -0,0 +1,7 @@
{
"cluster": {
"node_type_id": "Standard_DS3_v2"
},
"cluster_key": "mlops_stacks-cluster",
"cluster_workers": 2
}

View File

@ -0,0 +1,3 @@
{
"cluster": "mlops_stacks-cluster"
}

View File

@ -0,0 +1,3 @@
{
"cluster_key": "mlops_stacks-cluster-from-file"
}

View File

@ -0,0 +1,4 @@
{
"cluster_key": "mlops_stacks-cluster",
"cluster_workers": 2
}

View File

@ -0,0 +1 @@
!.databricks

View File

@ -0,0 +1,53 @@
bundle:
name: TestResolveVariablesFromFile
variables:
cluster:
type: "complex"
cluster_key:
cluster_workers:
resources:
jobs:
job1:
job_clusters:
- job_cluster_key: ${var.cluster_key}
new_cluster:
node_type_id: "${var.cluster.node_type_id}"
num_workers: ${var.cluster_workers}
targets:
default:
default: true
variables:
cluster_workers: 1
cluster:
node_type_id: "default"
cluster_key: "default"
without_defaults:
complex_to_string:
variables:
cluster_workers: 1
cluster:
node_type_id: "default"
cluster_key: "default"
string_to_complex:
variables:
cluster_workers: 1
cluster:
node_type_id: "default"
cluster_key: "default"
wrong_file_structure:
invalid_json:
with_value:
variables:
cluster_workers: 1
cluster:
node_type_id: "default"
cluster_key: cluster_key_value

View File

@ -0,0 +1,82 @@
=== variable file
>>> $CLI bundle validate -o json
{
"job_cluster_key": "mlops_stacks-cluster",
"new_cluster": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 2
}
}
=== variable file and variable flag
>>> $CLI bundle validate -o json --var=cluster_key=mlops_stacks-cluster-overriden
{
"job_cluster_key": "mlops_stacks-cluster-overriden",
"new_cluster": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 2
}
}
=== variable file and environment variable
>>> BUNDLE_VAR_cluster_key=mlops_stacks-cluster-overriden $CLI bundle validate -o json
{
"job_cluster_key": "mlops_stacks-cluster-overriden",
"new_cluster": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 2
}
}
=== variable has value in config file
>>> $CLI bundle validate -o json --target with_value
{
"job_cluster_key": "mlops_stacks-cluster-from-file",
"new_cluster": {
"node_type_id": "default",
"num_workers": 1
}
}
=== file has variable that is complex but default is string
>>> errcode $CLI bundle validate -o json --target complex_to_string
Error: variable cluster_key is not of type complex, but the value in the variable file is a complex type
Exit code: 1
{
"job_cluster_key": "${var.cluster_key}",
"new_cluster": {
"node_type_id": "${var.cluster.node_type_id}",
"num_workers": "${var.cluster_workers}"
}
}
=== file has variable that is string but default is complex
>>> errcode $CLI bundle validate -o json --target string_to_complex
Error: variable cluster is of type complex, but the value in the variable file is not a complex type
Exit code: 1
{
"job_cluster_key": "${var.cluster_key}",
"new_cluster": {
"node_type_id": "${var.cluster.node_type_id}",
"num_workers": "${var.cluster_workers}"
}
}
=== variable is required but it's not provided in the file
>>> errcode $CLI bundle validate -o json --target without_defaults
Error: no value assigned to required variable cluster. Assignment can be done using "--var", by setting the BUNDLE_VAR_cluster environment variable, or in .databricks/bundle/<target>/variable-overrides.json file
Exit code: 1
{
"job_cluster_key": "${var.cluster_key}",
"new_cluster": {
"node_type_id": "${var.cluster.node_type_id}",
"num_workers": "${var.cluster_workers}"
}
}

View File

@ -0,0 +1,30 @@
cluster_expr=".resources.jobs.job1.job_clusters[0]"
# defaults from variable file, see .databricks/bundle/<target>/variable-overrides.json
title "variable file"
trace $CLI bundle validate -o json | jq $cluster_expr
title "variable file and variable flag"
trace $CLI bundle validate -o json --var="cluster_key=mlops_stacks-cluster-overriden" | jq $cluster_expr
title "variable file and environment variable"
trace BUNDLE_VAR_cluster_key=mlops_stacks-cluster-overriden $CLI bundle validate -o json | jq $cluster_expr
title "variable has value in config file"
trace $CLI bundle validate -o json --target with_value | jq $cluster_expr
# title "file cannot be parsed"
# trace errcode $CLI bundle validate -o json --target invalid_json | jq $cluster_expr
# title "file has wrong structure"
# trace errcode $CLI bundle validate -o json --target wrong_file_structure | jq $cluster_expr
title "file has variable that is complex but default is string"
trace errcode $CLI bundle validate -o json --target complex_to_string | jq $cluster_expr
title "file has variable that is string but default is complex"
trace errcode $CLI bundle validate -o json --target string_to_complex | jq $cluster_expr
title "variable is required but it's not provided in the file"
trace errcode $CLI bundle validate -o json --target without_defaults | jq $cluster_expr

View File

@ -0,0 +1,24 @@
workspace:
profile: profile_name
root_path: ${var.workspace_root}/path/to/root
variables:
workspace_root:
description: "root directory in the Databricks workspace to store the asset bundle and associated artifacts"
default: /Users/${workspace.current_user.userName}
targets:
dev:
default: true
prod:
variables:
workspace_root: /Shared
resources:
jobs:
my_job:
tasks:
- existing_cluster_id: 500
python_wheel_task:
named_parameters:
conf-file: "${workspace.file_path}/path/to/config.yaml"

View File

@ -0,0 +1,67 @@
/Workspace should be prepended on all paths, but it is not the case:
{
"bundle": {
"environment": "dev",
"git": {
"bundle_root_path": ".",
"inferred": true
},
"target": "dev",
"terraform": {
"exec_path": "$TMPHOME"
}
},
"resources": {
"jobs": {
"my_job": {
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Users/$USERNAME/path/to/root/state/metadata.json"
},
"edit_mode": "UI_LOCKED",
"format": "MULTI_TASK",
"permissions": [],
"queue": {
"enabled": true
},
"tags": {},
"tasks": [
{
"existing_cluster_id": "500",
"python_wheel_task": {
"named_parameters": {
"conf-file": "/Users/$USERNAME/path/to/root/files/path/to/config.yaml"
}
},
"task_key": ""
}
]
}
}
},
"sync": {
"paths": [
"."
]
},
"targets": null,
"variables": {
"workspace_root": {
"default": "/Users/$USERNAME",
"description": "root directory in the Databricks workspace to store the asset bundle and associated artifacts",
"value": "/Users/$USERNAME"
}
},
"workspace": {
"artifact_path": "/Users/$USERNAME/path/to/root/artifacts",
"current_user": {
"short_name": "$USERNAME",
"userName": "$USERNAME"
},
"file_path": "/Users/$USERNAME/path/to/root/files",
"profile": "profile_name",
"resource_path": "/Users/$USERNAME/path/to/root/resources",
"root_path": "/Users/$USERNAME/path/to/root",
"state_path": "/Users/$USERNAME/path/to/root/state"
}
}

View File

@ -0,0 +1,2 @@
echo /Workspace should be prepended on all paths, but it is not the case: #2181
$CLI bundle validate -o json

View File

@ -3,7 +3,7 @@
"abc def"
>>> errcode $CLI bundle validate
Error: no value assigned to required variable b. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_b environment variable
Error: no value assigned to required variable b. Assignment can be done using "--var", by setting the BUNDLE_VAR_b environment variable, or in .databricks/bundle/<target>/variable-overrides.json file
Name: ${var.a} ${var.b}
Target: default

View File

@ -0,0 +1,73 @@
package acceptance_test
import (
"encoding/json"
"net/http"
"os"
"strings"
"testing"
"github.com/databricks/cli/internal/testcli"
"github.com/stretchr/testify/require"
)
func StartCmdServer(t *testing.T) *TestServer {
server := StartServer(t)
server.Handle("/", func(r *http.Request) (any, error) {
q := r.URL.Query()
args := strings.Split(q.Get("args"), " ")
var env map[string]string
require.NoError(t, json.Unmarshal([]byte(q.Get("env")), &env))
for key, val := range env {
defer Setenv(t, key, val)()
}
defer Chdir(t, q.Get("cwd"))()
c := testcli.NewRunner(t, r.Context(), args...)
c.Verbose = false
stdout, stderr, err := c.Run()
result := map[string]any{
"stdout": stdout.String(),
"stderr": stderr.String(),
}
exitcode := 0
if err != nil {
exitcode = 1
}
result["exitcode"] = exitcode
return result, nil
})
return server
}
// Chdir variant that is intended to be used with defer so that it can switch back before function ends.
// This is unlike testutil.Chdir which switches back only when tests end.
func Chdir(t *testing.T, cwd string) func() {
require.NotEmpty(t, cwd)
prevDir, err := os.Getwd()
require.NoError(t, err)
err = os.Chdir(cwd)
require.NoError(t, err)
return func() {
_ = os.Chdir(prevDir)
}
}
// Setenv variant that is intended to be used with defer so that it can switch back before function ends.
// This is unlike t.Setenv which switches back only when tests end.
func Setenv(t *testing.T, key, value string) func() {
prevVal, exists := os.LookupEnv(key)
require.NoError(t, os.Setenv(key, value))
return func() {
if exists {
_ = os.Setenv(key, prevVal)
} else {
_ = os.Unsetenv(key)
}
}
}

104
acceptance/config_test.go Normal file
View File

@ -0,0 +1,104 @@
package acceptance_test
import (
"os"
"path/filepath"
"sync"
"testing"
"github.com/BurntSushi/toml"
"github.com/databricks/cli/libs/testdiff"
"github.com/stretchr/testify/require"
)
const configFilename = "test.toml"
var (
configCache map[string]TestConfig
configMutex sync.Mutex
)
type TestConfig struct {
// Place to describe what's wrong with this test. Does not affect how the test is run.
Badness string
// Which OSes the test is enabled on. Each string is compared against runtime.GOOS.
// If absent, default to true.
GOOS map[string]bool
// List of additional replacements to apply on this test.
// Old is a regexp, New is a replacement expression.
Repls []testdiff.Replacement
}
// FindConfig finds the closest config file.
func FindConfig(t *testing.T, dir string) (string, bool) {
shared := false
for {
path := filepath.Join(dir, configFilename)
_, err := os.Stat(path)
if err == nil {
return path, shared
}
shared = true
if dir == "" || dir == "." {
break
}
if os.IsNotExist(err) {
dir = filepath.Dir(dir)
continue
}
t.Fatalf("Error while reading %s: %s", path, err)
}
t.Fatal("Config not found: " + configFilename)
return "", shared
}
// LoadConfig loads the config file. Non-leaf configs are cached.
func LoadConfig(t *testing.T, dir string) (TestConfig, string) {
path, leafConfig := FindConfig(t, dir)
if leafConfig {
return DoLoadConfig(t, path), path
}
configMutex.Lock()
defer configMutex.Unlock()
if configCache == nil {
configCache = make(map[string]TestConfig)
}
result, ok := configCache[path]
if ok {
return result, path
}
result = DoLoadConfig(t, path)
configCache[path] = result
return result, path
}
func DoLoadConfig(t *testing.T, path string) TestConfig {
bytes, err := os.ReadFile(path)
if err != nil {
t.Fatalf("failed to read config: %s", err)
}
var config TestConfig
meta, err := toml.Decode(string(bytes), &config)
require.NoError(t, err)
keys := meta.Undecoded()
if len(keys) > 0 {
t.Fatalf("Undecoded keys in %s: %#v", path, keys)
}
return config
}

View File

@ -115,7 +115,6 @@ Marketplace
Apps
apps Apps run directly on a customers Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on.
apps Apps run directly on a customers Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on.
Clean Rooms
clean-room-assets Clean room assets are data and code objects — Tables, volumes, and notebooks that are shared with the clean room.

View File

@ -6,7 +6,9 @@ errcode() {
local exit_code=$?
# Re-enable 'set -e' if it was previously set
set -e
>&2 printf "\nExit code: $exit_code\n"
if [ $exit_code -ne 0 ]; then
>&2 printf "\nExit code: $exit_code\n"
fi
}
trace() {
@ -34,9 +36,25 @@ trace() {
git-repo-init() {
git init -qb main
git config --global core.autocrlf false
git config core.autocrlf false
git config user.name "Tester"
git config user.email "tester@databricks.com"
git add databricks.yml
git commit -qm 'Add databricks.yml'
}
title() {
local label="$1"
printf "\n=== %s" "$label"
}
withdir() {
local dir="$1"
shift
local orig_dir="$(pwd)"
cd "$dir" || return $?
"$@"
local exit_code=$?
cd "$orig_dir" || return $?
return $exit_code
}

View File

@ -0,0 +1 @@
HELLO

View File

@ -0,0 +1,35 @@
=== Capturing STDERR
>>> python3 -c import sys; sys.stderr.write("STDERR\n")
STDERR
=== Capturing STDOUT
>>> python3 -c import sys; sys.stderr.write("STDOUT\n")
STDOUT
=== Capturing exit code
>>> errcode python3 -c raise SystemExit(5)
Exit code: 5
=== Capturing exit code (alt)
>>> python3 -c raise SystemExit(7)
Exit code: 7
=== Capturing pwd
>>> python3 -c import os; print(os.getcwd())
$TMPDIR
=== Capturing subdir
>>> mkdir -p subdir/a/b/c
>>> withdir subdir/a/b/c python3 -c import os; print(os.getcwd())
$TMPDIR/subdir/a/b/c
=== Custom output files - everything starting with out is captured and compared
>>> echo HELLO
=== Custom regex can be specified in [[Repl]] section
1234
CUSTOM_NUMBER_REGEX
123456

View File

@ -0,0 +1,26 @@
printf "=== Capturing STDERR"
trace python3 -c 'import sys; sys.stderr.write("STDERR\n")'
printf "\n=== Capturing STDOUT"
trace python3 -c 'import sys; sys.stderr.write("STDOUT\n")'
printf "\n=== Capturing exit code"
trace errcode python3 -c 'raise SystemExit(5)'
printf "\n=== Capturing exit code (alt)"
errcode trace python3 -c 'raise SystemExit(7)'
printf "\n=== Capturing pwd"
trace python3 -c 'import os; print(os.getcwd())'
printf "\n=== Capturing subdir"
trace mkdir -p subdir/a/b/c
trace withdir subdir/a/b/c python3 -c 'import os; print(os.getcwd())'
printf "\n=== Custom output files - everything starting with out is captured and compared"
trace echo HELLO > out.hello.txt
printf "\n=== Custom regex can be specified in [[Repl]] section\n"
echo 1234
echo 12345
echo 123456

View File

@ -0,0 +1,20 @@
# Badness = "Brief description of what's wrong with the test output, if anything"
#[GOOS]
# Disable on Windows
#windows = false
# Disable on Mac
#mac = false
# Disable on Linux
#linux = false
[[Repls]]
Old = '\b[0-9]{5}\b'
New = "CUSTOM_NUMBER_REGEX"
[[Repls]]
# Fix path with reverse slashes in the output for Windows.
Old = '\$TMPDIR\\subdir\\a\\b\\c'
New = '$$TMPDIR/subdir/a/b/c'

View File

@ -68,7 +68,7 @@ func StartServer(t *testing.T) *TestServer {
}
func AddHandlers(server *TestServer) {
server.Handle("/api/2.0/policies/clusters/list", func(r *http.Request) (any, error) {
server.Handle("GET /api/2.0/policies/clusters/list", func(r *http.Request) (any, error) {
return compute.ListPoliciesResponse{
Policies: []compute.Policy{
{
@ -83,7 +83,7 @@ func AddHandlers(server *TestServer) {
}, nil
})
server.Handle("/api/2.0/instance-pools/list", func(r *http.Request) (any, error) {
server.Handle("GET /api/2.0/instance-pools/list", func(r *http.Request) (any, error) {
return compute.ListInstancePools{
InstancePools: []compute.InstancePoolAndStats{
{
@ -94,7 +94,7 @@ func AddHandlers(server *TestServer) {
}, nil
})
server.Handle("/api/2.1/clusters/list", func(r *http.Request) (any, error) {
server.Handle("GET /api/2.1/clusters/list", func(r *http.Request) (any, error) {
return compute.ListClustersResponse{
Clusters: []compute.ClusterDetails{
{
@ -109,13 +109,13 @@ func AddHandlers(server *TestServer) {
}, nil
})
server.Handle("/api/2.0/preview/scim/v2/Me", func(r *http.Request) (any, error) {
server.Handle("GET /api/2.0/preview/scim/v2/Me", func(r *http.Request) (any, error) {
return iam.User{
UserName: "tester@databricks.com",
}, nil
})
server.Handle("/api/2.0/workspace/get-status", func(r *http.Request) (any, error) {
server.Handle("GET /api/2.0/workspace/get-status", func(r *http.Request) (any, error) {
return workspace.ObjectInfo{
ObjectId: 1001,
ObjectType: "DIRECTORY",
@ -124,13 +124,13 @@ func AddHandlers(server *TestServer) {
}, nil
})
server.Handle("/api/2.1/unity-catalog/current-metastore-assignment", func(r *http.Request) (any, error) {
server.Handle("GET /api/2.1/unity-catalog/current-metastore-assignment", func(r *http.Request) (any, error) {
return catalog.MetastoreAssignment{
DefaultCatalogName: "main",
}, nil
})
server.Handle("/api/2.0/permissions/directories/1001", func(r *http.Request) (any, error) {
server.Handle("GET /api/2.0/permissions/directories/1001", func(r *http.Request) (any, error) {
return workspace.WorkspaceObjectPermissions{
ObjectId: "1001",
ObjectType: "DIRECTORY",

2
acceptance/test.toml Normal file
View File

@ -0,0 +1,2 @@
# If test directory nor any of its parents do not have test.toml then this file serves as fallback configuration.
# The configurations are not merged across parents; the closest one is used fully.

View File

@ -17,6 +17,7 @@ import (
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/env"
"github.com/databricks/cli/bundle/metadata"
"github.com/databricks/cli/libs/auth"
"github.com/databricks/cli/libs/fileset"
"github.com/databricks/cli/libs/locker"
"github.com/databricks/cli/libs/log"
@ -24,7 +25,6 @@ import (
"github.com/databricks/cli/libs/terraform"
"github.com/databricks/cli/libs/vfs"
"github.com/databricks/databricks-sdk-go"
sdkconfig "github.com/databricks/databricks-sdk-go/config"
"github.com/hashicorp/terraform-exec/tfexec"
)
@ -242,21 +242,5 @@ func (b *Bundle) AuthEnv() (map[string]string, error) {
}
cfg := b.client.Config
out := make(map[string]string)
for _, attr := range sdkconfig.ConfigAttributes {
// Ignore profile so that downstream tools don't try and reload
// the profile even though we know the current configuration is valid.
if attr.Name == "profile" {
continue
}
if len(attr.EnvVars) == 0 {
continue
}
if attr.IsZero(cfg) {
continue
}
out[attr.EnvVars[0]] = attr.GetString(cfg)
}
return out, nil
return auth.Env(cfg), nil
}

View File

@ -2,6 +2,7 @@ package loader
import (
"context"
"fmt"
"path/filepath"
"slices"
"strings"
@ -36,6 +37,7 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag.
// Maintain list of files in order of files being loaded.
// This is stored in the bundle configuration for observability.
var files []string
var diags diag.Diagnostics
// For each glob, find all files to load.
// Ordering of the list of globs is maintained in the output.
@ -60,7 +62,7 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag.
// Filter matches to ones we haven't seen yet.
var includes []string
for _, match := range matches {
for i, match := range matches {
rel, err := filepath.Rel(b.BundleRootPath, match)
if err != nil {
return diag.FromErr(err)
@ -69,9 +71,22 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) diag.
continue
}
seen[rel] = true
if filepath.Ext(rel) != ".yaml" && filepath.Ext(rel) != ".yml" {
diags = diags.Append(diag.Diagnostic{
Severity: diag.Error,
Summary: "Files in the 'include' configuration section must be YAML files.",
Detail: fmt.Sprintf("The file %s in the 'include' configuration section is not a YAML file, and only YAML files are supported. To include files to sync, specify them in the 'sync.include' configuration section instead.", rel),
Locations: b.Config.GetLocations(fmt.Sprintf("include[%d]", i)),
})
continue
}
includes = append(includes, rel)
}
if len(diags) > 0 {
return diags
}
// Add matches to list of mutators to return.
slices.Sort(includes)
files = append(files, includes...)

View File

@ -72,17 +72,18 @@ func (m *applySourceLinkedDeploymentPreset) Apply(ctx context.Context, b *bundle
return diags
}
// This mutator runs before workspace paths are defaulted so it's safe to check for the user-defined value
if b.Config.Workspace.FilePath != "" && config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) {
path := dyn.NewPath(dyn.Key("targets"), dyn.Key(target), dyn.Key("workspace"), dyn.Key("file_path"))
path := dyn.NewPath(dyn.Key("workspace"), dyn.Key("file_path"))
diags = diags.Append(
diag.Diagnostic{
Severity: diag.Warning,
Summary: "workspace.file_path setting will be ignored in source-linked deployment mode",
Detail: "In source-linked deployment files are not copied to the destination and resources use source files instead",
Paths: []dyn.Path{
path[2:],
path,
},
Locations: b.Config.GetLocations(path[2:].String()),
Locations: b.Config.GetLocations(path.String()),
},
)
}

View File

@ -32,7 +32,7 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn
}
if info.WorktreeRoot == "" {
b.WorktreeRoot = b.BundleRoot
b.WorktreeRoot = b.SyncRoot
} else {
b.WorktreeRoot = vfs.MustNew(info.WorktreeRoot)
}

View File

@ -9,6 +9,7 @@ import (
"github.com/databricks/cli/libs/dyn"
)
// pythonDiagnostic is a single entry in diagnostics.json
type pythonDiagnostic struct {
Severity pythonSeverity `json:"severity"`
Summary string `json:"summary"`

View File

@ -0,0 +1,194 @@
package python
import (
"encoding/json"
"fmt"
"io"
"path/filepath"
"github.com/databricks/cli/libs/dyn"
)
// generatedFileName is used as the virtual file name for YAML generated by Python code.
//
// mergePythonLocations replaces dyn.Location with generatedFileName with locations loaded
// from locations.json
const generatedFileName = "__generated_by_python__.yml"
// pythonLocations is data structure for efficient location lookup for a given path
//
// Locations form a tree, and we assign locations of the closest ancestor to each dyn.Value based on its path.
// We implement it as a trie (prefix tree) where keys are components of the path. With that, lookups are O(n)
// where n is the number of components in the path.
//
// For example, with locations.json:
//
// {"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
// {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
// {"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
//
// - resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5
//
// - resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5,
// because we use the location of the job as the most precise approximation.
//
// See pythonLocationEntry for the structure of a single entry in locations.json
type pythonLocations struct {
// descendants referenced by index, e.g. '.foo'
keys map[string]*pythonLocations
// descendants referenced by key, e.g. '[0]'
indexes map[int]*pythonLocations
// location for the current node if it exists
location dyn.Location
// if true, location is present
exists bool
}
// pythonLocationEntry is a single entry in locations.json
type pythonLocationEntry struct {
Path string `json:"path"`
File string `json:"file"`
Line int `json:"line"`
Column int `json:"column"`
}
// mergePythonLocations applies locations from Python mutator into given dyn.Value
//
// The primary use-case is to merge locations.json with output.json, so that any
// validation errors will point to Python source code instead of generated YAML.
func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) {
return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) {
newLocation, ok := findPythonLocation(locations, path)
if !ok {
return value, nil
}
// The first item in the list is the "last" location used for error reporting
//
// Loaded YAML uses virtual file path as location, we remove any of such references,
// because they should use 'newLocation' instead.
//
// We preserve any previous non-virtual locations in case when Python function modified
// resource defined in YAML.
newLocations := append(
[]dyn.Location{newLocation},
removeVirtualLocations(value.Locations())...,
)
return value.WithLocations(newLocations), nil
})
}
func removeVirtualLocations(locations []dyn.Location) []dyn.Location {
var newLocations []dyn.Location
for _, location := range locations {
if filepath.Base(location.File) == generatedFileName {
continue
}
newLocations = append(newLocations, location)
}
return newLocations
}
// parsePythonLocations parses locations.json from the Python mutator.
//
// locations file is newline-separated JSON objects with pythonLocationEntry structure.
func parsePythonLocations(input io.Reader) (*pythonLocations, error) {
decoder := json.NewDecoder(input)
locations := newPythonLocations()
for decoder.More() {
var entry pythonLocationEntry
err := decoder.Decode(&entry)
if err != nil {
return nil, fmt.Errorf("failed to parse python location: %s", err)
}
path, err := dyn.NewPathFromString(entry.Path)
if err != nil {
return nil, fmt.Errorf("failed to parse python location: %s", err)
}
location := dyn.Location{
File: entry.File,
Line: entry.Line,
Column: entry.Column,
}
putPythonLocation(locations, path, location)
}
return locations, nil
}
// putPythonLocation puts the location to the trie for the given path
func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) {
currentNode := trie
for _, component := range path {
if key := component.Key(); key != "" {
if _, ok := currentNode.keys[key]; !ok {
currentNode.keys[key] = newPythonLocations()
}
currentNode = currentNode.keys[key]
} else {
index := component.Index()
if _, ok := currentNode.indexes[index]; !ok {
currentNode.indexes[index] = newPythonLocations()
}
currentNode = currentNode.indexes[index]
}
}
currentNode.location = location
currentNode.exists = true
}
// newPythonLocations creates a new trie node
func newPythonLocations() *pythonLocations {
return &pythonLocations{
keys: make(map[string]*pythonLocations),
indexes: make(map[int]*pythonLocations),
}
}
// findPythonLocation finds the location or closest ancestor location in the trie for the given path
// if no ancestor or exact location is found, false is returned.
func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) {
currentNode := locations
lastLocation := locations.location
exists := locations.exists
for _, component := range path {
if key := component.Key(); key != "" {
if _, ok := currentNode.keys[key]; !ok {
break
}
currentNode = currentNode.keys[key]
} else {
index := component.Index()
if _, ok := currentNode.indexes[index]; !ok {
break
}
currentNode = currentNode.indexes[index]
}
if currentNode.exists {
lastLocation = currentNode.location
exists = true
}
}
return lastLocation, exists
}

View File

@ -0,0 +1,179 @@
package python
import (
"bytes"
"path/filepath"
"testing"
"github.com/databricks/cli/libs/diag"
"github.com/stretchr/testify/require"
"github.com/databricks/cli/libs/dyn"
assert "github.com/databricks/cli/libs/dyn/dynassert"
)
func TestMergeLocations(t *testing.T) {
pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1}
generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1}
yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation)
input := dyn.NewValue(
map[string]dyn.Value{
"foo": dyn.NewValue(
map[string]dyn.Value{
"baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}),
"qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}),
},
[]dyn.Location{},
),
"bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}),
},
[]dyn.Location{yamlLocation},
)
expected := dyn.NewValue(
map[string]dyn.Value{
"foo": dyn.NewValue(
map[string]dyn.Value{
// pythonLocation is appended to the beginning of the list if absent
"baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
// generatedLocation is replaced by pythonLocation
"qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
},
[]dyn.Location{pythonLocation},
),
// if location is unknown, we keep it as-is
"bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}),
},
[]dyn.Location{yamlLocation},
)
actual, err := mergePythonLocations(input, locations)
assert.NoError(t, err)
assert.Equal(t, expected, actual)
}
func TestFindLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar"))
assert.True(t, exists)
assert.Equal(t, location1, actual)
}
func TestFindLocation_indexPathComponent(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2)
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"))
assert.True(t, exists)
assert.Equal(t, location2, actual)
}
func TestFindLocation_closestAncestorLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz"))
assert.True(t, exists)
assert.Equal(t, location1, actual)
}
func TestFindLocation_unknownLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
_, exists := findPythonLocation(locations, dyn.MustPathFromString("bar"))
assert.False(t, exists)
}
func TestLoadOutput(t *testing.T) {
location := dyn.Location{File: "my_job.py", Line: 1, Column: 1}
bundleRoot := t.TempDir()
output := `{
"resources": {
"jobs": {
"my_job": {
"name": "my_job",
"tasks": [
{
"task_key": "my_task",
"notebook_task": {
"notebook_path": "my_notebook"
}
}
]
}
}
}
}`
locations := newPythonLocations()
putPythonLocation(
locations,
dyn.MustPathFromString("resources.jobs.my_job"),
location,
)
value, diags := loadOutput(
bundleRoot,
bytes.NewReader([]byte(output)),
locations,
)
assert.Equal(t, diag.Diagnostics{}, diags)
name, err := dyn.Get(value, "resources.jobs.my_job.name")
require.NoError(t, err)
require.Equal(t, []dyn.Location{location}, name.Locations())
// until we implement path normalization, we have to keep locations of values
// that change semantic depending on their location
//
// note: it's important to have absolute path including 'bundleRoot'
// because mutator pipeline already has expanded locations into absolute path
notebookPath, err := dyn.Get(value, "resources.jobs.my_job.tasks[0].notebook_task.notebook_path")
require.NoError(t, err)
require.Len(t, notebookPath.Locations(), 1)
require.Equal(t, filepath.Join(bundleRoot, generatedFileName), notebookPath.Locations()[0].File)
}
func TestParsePythonLocations(t *testing.T) {
expected := dyn.Location{File: "foo.py", Line: 1, Column: 2}
input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}`
reader := bytes.NewReader([]byte(input))
locations, err := parsePythonLocations(reader)
assert.NoError(t, err)
assert.True(t, locations.keys["foo"].exists)
assert.Equal(t, expected, locations.keys["foo"].location)
}

View File

@ -7,11 +7,14 @@ import (
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"reflect"
"strings"
"github.com/databricks/cli/bundle/config/mutator/paths"
"github.com/databricks/databricks-sdk-go/logger"
"github.com/fatih/color"
@ -124,6 +127,15 @@ type opts struct {
enabled bool
venvPath string
loadLocations bool
}
type runPythonMutatorOpts struct {
cacheDir string
bundleRootPath string
pythonPath string
loadLocations bool
}
// getOpts adapts deprecated PyDABs and upcoming Python configuration
@ -148,8 +160,9 @@ func getOpts(b *bundle.Bundle, phase phase) (opts, error) {
// don't execute for phases for 'python' section
if phase == PythonMutatorPhaseInit || phase == PythonMutatorPhaseLoad {
return opts{
enabled: true,
venvPath: experimental.PyDABs.VEnvPath,
enabled: true,
venvPath: experimental.PyDABs.VEnvPath,
loadLocations: false, // not supported in PyDABs
}, nil
} else {
return opts{}, nil
@ -158,8 +171,9 @@ func getOpts(b *bundle.Bundle, phase phase) (opts, error) {
// don't execute for phases for 'pydabs' section
if phase == PythonMutatorPhaseLoadResources || phase == PythonMutatorPhaseApplyMutators {
return opts{
enabled: true,
venvPath: experimental.Python.VEnvPath,
enabled: true,
venvPath: experimental.Python.VEnvPath,
loadLocations: true,
}, nil
} else {
return opts{}, nil
@ -194,7 +208,12 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
}
rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.BundleRootPath, pythonPath, leftRoot)
rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{
cacheDir: cacheDir,
bundleRootPath: b.BundleRootPath,
pythonPath: pythonPath,
loadLocations: opts.loadLocations,
})
mutateDiags = diags
if diags.HasError() {
return dyn.InvalidValue, mutateDiagsHasError
@ -238,13 +257,14 @@ func createCacheDir(ctx context.Context) (string, error) {
return os.MkdirTemp("", "-python")
}
func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) {
inputPath := filepath.Join(cacheDir, "input.json")
outputPath := filepath.Join(cacheDir, "output.json")
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) {
inputPath := filepath.Join(opts.cacheDir, "input.json")
outputPath := filepath.Join(opts.cacheDir, "output.json")
diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json")
locationsPath := filepath.Join(opts.cacheDir, "locations.json")
args := []string{
pythonPath,
opts.pythonPath,
"-m",
"databricks.bundles.build",
"--phase",
@ -257,6 +277,10 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath
diagnosticsPath,
}
if opts.loadLocations {
args = append(args, "--locations", locationsPath)
}
if err := writeInputFile(inputPath, root); err != nil {
return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err)
}
@ -271,7 +295,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath
_, processErr := process.Background(
ctx,
args,
process.WithDir(rootPath),
process.WithDir(opts.bundleRootPath),
process.WithStderrWriter(stderrWriter),
process.WithStdoutWriter(stdoutWriter),
)
@ -307,7 +331,12 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath
return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr)
}
output, outputDiags := loadOutputFile(rootPath, outputPath)
locations, err := loadLocationsFile(locationsPath)
if err != nil {
return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err)
}
output, outputDiags := loadOutputFile(opts.bundleRootPath, outputPath, locations)
pythonDiagnostics = pythonDiagnostics.Extend(outputDiags)
// we pass through pythonDiagnostic because it contains warnings
@ -351,7 +380,21 @@ func writeInputFile(inputPath string, input dyn.Value) error {
return os.WriteFile(inputPath, rootConfigJson, 0o600)
}
func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) {
// loadLocationsFile loads locations.json containing source locations for generated YAML.
func loadLocationsFile(locationsPath string) (*pythonLocations, error) {
locationsFile, err := os.Open(locationsPath)
if errors.Is(err, fs.ErrNotExist) {
return newPythonLocations(), nil
} else if err != nil {
return nil, fmt.Errorf("failed to open locations file: %w", err)
}
defer locationsFile.Close()
return parsePythonLocations(locationsFile)
}
func loadOutputFile(rootPath, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) {
outputFile, err := os.Open(outputPath)
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err))
@ -359,15 +402,19 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) {
defer outputFile.Close()
return loadOutput(rootPath, outputFile, locations)
}
func loadOutput(rootPath string, outputFile io.Reader, locations *pythonLocations) (dyn.Value, diag.Diagnostics) {
// we need absolute path because later parts of pipeline assume all paths are absolute
// and this file will be used as location to resolve relative paths.
//
// virtualPath has to stay in rootPath, because locations outside root path are not allowed:
// virtualPath has to stay in bundleRootPath, because locations outside root path are not allowed:
//
// Error: path /var/folders/.../python/dist/*.whl is not contained in bundle root path
//
// for that, we pass virtualPath instead of outputPath as file location
virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_python__.yml"))
virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName))
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err))
}
@ -377,7 +424,29 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err))
}
return strictNormalize(config.Root{}, generated)
// paths are resolved relative to locations of their values, if we change location
// we have to update each path, until we simplify that, we don't update locations
// for such values, so we don't change how paths are resolved
//
// we can remove this once we:
// - add variable interpolation before and after PythonMutator
// - implement path normalization (aka path normal form)
_, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) {
putPythonLocation(locations, p, v.Location())
return v, nil
})
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
}
// generated has dyn.Location as if it comes from generated YAML file
// earlier we loaded locations.json with source locations in Python code
generatedWithLocations, err := mergePythonLocations(generated, locations)
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
}
return strictNormalize(config.Root{}, generatedWithLocations)
}
func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) {

View File

@ -7,7 +7,6 @@ import (
"os"
"os/exec"
"path/filepath"
"reflect"
"runtime"
"testing"
@ -93,6 +92,8 @@ func TestPythonMutator_loadResources(t *testing.T) {
}
}`,
`{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`,
`{"path": "resources.jobs.job0", "file": "src/examples/job0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job1", "file": "src/examples/job1.py", "line": 5, "column": 7}`,
)
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
@ -110,6 +111,25 @@ func TestPythonMutator_loadResources(t *testing.T) {
assert.Equal(t, "job_1", job1.Name)
}
// output of locations.json should be applied to underlying dyn.Value
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
name1, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job1.name"))
if err != nil {
return dyn.InvalidValue, err
}
assert.Equal(t, []dyn.Location{
{
File: "src/examples/job1.py",
Line: 5,
Column: 7,
},
}, name1.Locations())
return v, nil
})
assert.NoError(t, err)
assert.Equal(t, 1, len(diags))
assert.Equal(t, "job doesn't have any tasks", diags[0].Summary)
assert.Equal(t, []dyn.Location{
@ -157,7 +177,7 @@ func TestPythonMutator_loadResources_disallowed(t *testing.T) {
}
}
}
}`, "")
}`, "", "")
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
diag := bundle.Apply(ctx, b, mutator)
@ -202,7 +222,7 @@ func TestPythonMutator_applyMutators(t *testing.T) {
}
}
}
}`, "")
}`, "", "")
mutator := PythonMutator(PythonMutatorPhaseApplyMutators)
diag := bundle.Apply(ctx, b, mutator)
@ -224,7 +244,7 @@ func TestPythonMutator_applyMutators(t *testing.T) {
description, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job0.description"))
require.NoError(t, err)
expectedVirtualPath, err := filepath.Abs("__generated_by_python__.yml")
expectedVirtualPath, err := filepath.Abs(generatedFileName)
require.NoError(t, err)
assert.Equal(t, expectedVirtualPath, description.Location().File)
@ -263,7 +283,7 @@ func TestPythonMutator_badOutput(t *testing.T) {
}
}
}
}`, "")
}`, "", "")
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
diag := bundle.Apply(ctx, b, mutator)
@ -312,7 +332,7 @@ func TestGetOps_Python(t *testing.T) {
}, PythonMutatorPhaseLoadResources)
assert.NoError(t, err)
assert.Equal(t, opts{venvPath: ".venv", enabled: true}, actual)
assert.Equal(t, opts{venvPath: ".venv", enabled: true, loadLocations: true}, actual)
}
func TestGetOps_PyDABs(t *testing.T) {
@ -328,7 +348,7 @@ func TestGetOps_PyDABs(t *testing.T) {
}, PythonMutatorPhaseInit)
assert.NoError(t, err)
assert.Equal(t, opts{venvPath: ".venv", enabled: true}, actual)
assert.Equal(t, opts{venvPath: ".venv", enabled: true, loadLocations: false}, actual)
}
func TestGetOps_empty(t *testing.T) {
@ -661,7 +681,7 @@ or activate the environment before running CLI commands:
assert.Equal(t, expected, out)
}
func withProcessStub(t *testing.T, args []string, output, diagnostics string) context.Context {
func withProcessStub(t *testing.T, args []string, output, diagnostics, locations string) context.Context {
ctx := context.Background()
ctx, stub := process.WithStub(ctx)
@ -673,32 +693,51 @@ func withProcessStub(t *testing.T, args []string, output, diagnostics string) co
inputPath := filepath.Join(cacheDir, "input.json")
outputPath := filepath.Join(cacheDir, "output.json")
locationsPath := filepath.Join(cacheDir, "locations.json")
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
args = append(args, "--input", inputPath)
args = append(args, "--output", outputPath)
args = append(args, "--diagnostics", diagnosticsPath)
stub.WithCallback(func(actual *exec.Cmd) error {
_, err := os.Stat(inputPath)
assert.NoError(t, err)
if reflect.DeepEqual(actual.Args, args) {
err := os.WriteFile(outputPath, []byte(output), 0o600)
require.NoError(t, err)
actualInputPath := getArg(actual.Args, "--input")
actualOutputPath := getArg(actual.Args, "--output")
actualDiagnosticsPath := getArg(actual.Args, "--diagnostics")
actualLocationsPath := getArg(actual.Args, "--locations")
err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600)
require.NoError(t, err)
require.Equal(t, inputPath, actualInputPath)
require.Equal(t, outputPath, actualOutputPath)
require.Equal(t, diagnosticsPath, actualDiagnosticsPath)
return nil
} else {
return fmt.Errorf("unexpected command: %v", actual.Args)
// locations is an optional argument
if locations != "" {
require.Equal(t, locationsPath, actualLocationsPath)
err = os.WriteFile(locationsPath, []byte(locations), 0o600)
require.NoError(t, err)
}
err = os.WriteFile(outputPath, []byte(output), 0o600)
require.NoError(t, err)
err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600)
require.NoError(t, err)
return nil
})
return ctx
}
func getArg(args []string, name string) string {
for i := range args {
if args[i] == name {
return args[i+1]
}
}
return ""
}
func loadYaml(name, content string) *bundle.Bundle {
v, diag := config.LoadFromBytes(name, []byte(content))

View File

@ -3,11 +3,14 @@ package mutator
import (
"context"
"fmt"
"os"
"path/filepath"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/variable"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/dyn/jsonloader"
"github.com/databricks/cli/libs/env"
)
@ -23,7 +26,11 @@ func (m *setVariables) Name() string {
return "SetVariables"
}
func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, name string) (dyn.Value, error) {
func getDefaultVariableFilePath(target string) string {
return ".databricks/bundle/" + target + "/variable-overrides.json"
}
func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable, name string, fileDefault dyn.Value) (dyn.Value, error) {
// case: variable already has value initialized, so skip
if variable.HasValue() {
return v, nil
@ -49,6 +56,26 @@ func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable,
return v, nil
}
// case: Set the variable to the default value from the variable file
if fileDefault.Kind() != dyn.KindInvalid && fileDefault.Kind() != dyn.KindNil {
hasComplexType := variable.IsComplex()
hasComplexValue := fileDefault.Kind() == dyn.KindMap || fileDefault.Kind() == dyn.KindSequence
if hasComplexType && !hasComplexValue {
return dyn.InvalidValue, fmt.Errorf(`variable %s is of type complex, but the value in the variable file is not a complex type`, name)
}
if !hasComplexType && hasComplexValue {
return dyn.InvalidValue, fmt.Errorf(`variable %s is not of type complex, but the value in the variable file is a complex type`, name)
}
v, err := dyn.Set(v, "value", fileDefault)
if err != nil {
return dyn.InvalidValue, fmt.Errorf(`failed to assign default value from variable file to variable %s with error: %v`, name, err)
}
return v, nil
}
// case: Set the variable to its default value
if variable.HasDefault() {
vDefault, err := dyn.Get(v, "default")
@ -64,10 +91,43 @@ func setVariable(ctx context.Context, v dyn.Value, variable *variable.Variable,
}
// We should have had a value to set for the variable at this point.
return dyn.InvalidValue, fmt.Errorf(`no value assigned to required variable %s. Assignment can be done through the "--var" flag or by setting the %s environment variable`, name, bundleVarPrefix+name)
return dyn.InvalidValue, fmt.Errorf(`no value assigned to required variable %s. Assignment can be done using "--var", by setting the %s environment variable, or in %s file`, name, bundleVarPrefix+name, getDefaultVariableFilePath("<target>"))
}
func readVariablesFromFile(b *bundle.Bundle) (dyn.Value, diag.Diagnostics) {
var diags diag.Diagnostics
filePath := filepath.Join(b.BundleRootPath, getDefaultVariableFilePath(b.Config.Bundle.Target))
if _, err := os.Stat(filePath); err != nil {
return dyn.InvalidValue, nil
}
f, err := os.ReadFile(filePath)
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to read variables file: %w", err))
}
val, err := jsonloader.LoadJSON(f, filePath)
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse variables file %s: %w", filePath, err))
}
if val.Kind() != dyn.KindMap {
return dyn.InvalidValue, diags.Append(diag.Diagnostic{
Severity: diag.Error,
Summary: fmt.Sprintf("failed to parse variables file %s: invalid format", filePath),
Detail: "Variables file must be a JSON object with the following format:\n{\"var1\": \"value1\", \"var2\": \"value2\"}",
})
}
return val, nil
}
func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
defaults, diags := readVariablesFromFile(b)
if diags.HasError() {
return diags
}
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
return dyn.Map(v, "variables", dyn.Foreach(func(p dyn.Path, variable dyn.Value) (dyn.Value, error) {
name := p[1].Key()
@ -76,9 +136,10 @@ func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos
return dyn.InvalidValue, fmt.Errorf(`variable "%s" is not defined`, name)
}
return setVariable(ctx, variable, v, name)
fileDefault, _ := dyn.Get(defaults, name)
return setVariable(ctx, variable, v, name, fileDefault)
}))
})
return diag.FromErr(err)
return diags.Extend(diag.FromErr(err))
}

View File

@ -25,7 +25,7 @@ func TestSetVariableFromProcessEnvVar(t *testing.T) {
v, err := convert.FromTyped(variable, dyn.NilValue)
require.NoError(t, err)
v, err = setVariable(context.Background(), v, &variable, "foo")
v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue)
require.NoError(t, err)
err = convert.ToTyped(&variable, v)
@ -43,7 +43,7 @@ func TestSetVariableUsingDefaultValue(t *testing.T) {
v, err := convert.FromTyped(variable, dyn.NilValue)
require.NoError(t, err)
v, err = setVariable(context.Background(), v, &variable, "foo")
v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue)
require.NoError(t, err)
err = convert.ToTyped(&variable, v)
@ -65,7 +65,7 @@ func TestSetVariableWhenAlreadyAValueIsAssigned(t *testing.T) {
v, err := convert.FromTyped(variable, dyn.NilValue)
require.NoError(t, err)
v, err = setVariable(context.Background(), v, &variable, "foo")
v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue)
require.NoError(t, err)
err = convert.ToTyped(&variable, v)
@ -90,7 +90,7 @@ func TestSetVariableEnvVarValueDoesNotOverridePresetValue(t *testing.T) {
v, err := convert.FromTyped(variable, dyn.NilValue)
require.NoError(t, err)
v, err = setVariable(context.Background(), v, &variable, "foo")
v, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue)
require.NoError(t, err)
err = convert.ToTyped(&variable, v)
@ -107,8 +107,8 @@ func TestSetVariablesErrorsIfAValueCouldNotBeResolved(t *testing.T) {
v, err := convert.FromTyped(variable, dyn.NilValue)
require.NoError(t, err)
_, err = setVariable(context.Background(), v, &variable, "foo")
assert.ErrorContains(t, err, "no value assigned to required variable foo. Assignment can be done through the \"--var\" flag or by setting the BUNDLE_VAR_foo environment variable")
_, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue)
assert.ErrorContains(t, err, "no value assigned to required variable foo. Assignment can be done using \"--var\", by setting the BUNDLE_VAR_foo environment variable, or in .databricks/bundle/<target>/variable-overrides.json file")
}
func TestSetVariablesMutator(t *testing.T) {
@ -157,6 +157,6 @@ func TestSetComplexVariablesViaEnvVariablesIsNotAllowed(t *testing.T) {
v, err := convert.FromTyped(variable, dyn.NilValue)
require.NoError(t, err)
_, err = setVariable(context.Background(), v, &variable, "foo")
_, err = setVariable(context.Background(), v, &variable, "foo", dyn.NilValue)
assert.ErrorContains(t, err, "setting via environment variables (BUNDLE_VAR_foo) is not supported for complex variable foo")
}

View File

@ -392,14 +392,6 @@ func (r *Root) MergeTargetOverrides(name string) error {
return err
}
// If the branch was overridden, we need to clear the inferred flag.
if branch := v.Get("branch"); branch.Kind() != dyn.KindInvalid {
out, err = dyn.SetByPath(out, dyn.NewPath(dyn.Key("inferred")), dyn.V(false))
if err != nil {
return err
}
}
// Set the merged value.
root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("git")), out)
if err != nil {

View File

@ -36,11 +36,12 @@ type Variable struct {
// This field stores the resolved value for the variable. The variable are
// resolved in the following priority order (from highest to lowest)
//
// 1. Command line flag. For example: `--var="foo=bar"`
// 2. Target variable. eg: BUNDLE_VAR_foo=bar
// 3. Default value as defined in the applicable environments block
// 4. Default value defined in variable definition
// 5. Throw error, since if no default value is defined, then the variable
// 1. Command line flag `--var="foo=bar"`
// 2. Environment variable. eg: BUNDLE_VAR_foo=bar
// 3. Load defaults from .databricks/bundle/<target>/variable-overrides.json
// 4. Default value as defined in the applicable targets block
// 5. Default value defined in variable definition
// 6. Throw error, since if no default value is defined, then the variable
// is required
Value VariableValue `json:"value,omitempty" bundle:"readonly"`

View File

@ -54,6 +54,7 @@ func (m *compute) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
// Set file upload destination of the bundle in metadata
b.Metadata.Config.Workspace.FilePath = b.Config.Workspace.FilePath
// In source-linked deployment files are not copied and resources use source files, therefore we use sync path as file path in metadata
if config.IsExplicitlyEnabled(b.Config.Presets.SourceLinkedDeployment) {
b.Metadata.Config.Workspace.FilePath = b.SyncRootPath
}

View File

@ -419,7 +419,7 @@ func TestBundleToTerraformModelServing(t *testing.T) {
src := resources.ModelServingEndpoint{
CreateServingEndpoint: &serving.CreateServingEndpoint{
Name: "name",
Config: serving.EndpointCoreConfigInput{
Config: &serving.EndpointCoreConfigInput{
ServedModels: []serving.ServedModelInput{
{
ModelName: "model_name",
@ -474,7 +474,7 @@ func TestBundleToTerraformModelServingPermissions(t *testing.T) {
// and as such observed the `omitempty` tag.
// The new method leverages [dyn.Value] where any field that is not
// explicitly set is not part of the value.
Config: serving.EndpointCoreConfigInput{
Config: &serving.EndpointCoreConfigInput{
ServedModels: []serving.ServedModelInput{
{
ModelName: "model_name",

View File

@ -54,7 +54,7 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con
return tf.ExecPath, nil
}
binDir, err := b.CacheDir(context.Background(), "bin")
binDir, err := b.CacheDir(ctx, "bin")
if err != nil {
return "", err
}
@ -88,41 +88,43 @@ func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *con
return tf.ExecPath, nil
}
// This function inherits some environment variables for Terraform CLI.
func inheritEnvVars(ctx context.Context, environ map[string]string) error {
var envCopy = []string{
// Include $HOME in set of environment variables to pass along.
home, ok := env.Lookup(ctx, "HOME")
if ok {
environ["HOME"] = home
}
"HOME",
// Include $USERPROFILE in set of environment variables to pass along.
// This variable is used by Azure CLI on Windows to find stored credentials and metadata
userProfile, ok := env.Lookup(ctx, "USERPROFILE")
if ok {
environ["USERPROFILE"] = userProfile
}
"USERPROFILE",
// Include $PATH in set of environment variables to pass along.
// This is necessary to ensure that our Terraform provider can use the
// same auxiliary programs (e.g. `az`, or `gcloud`) as the CLI.
path, ok := env.Lookup(ctx, "PATH")
if ok {
environ["PATH"] = path
}
"PATH",
// Include $AZURE_CONFIG_FILE in set of environment variables to pass along.
// This is set in Azure DevOps by the AzureCLI@2 task.
azureConfigFile, ok := env.Lookup(ctx, "AZURE_CONFIG_FILE")
if ok {
environ["AZURE_CONFIG_FILE"] = azureConfigFile
}
"AZURE_CONFIG_FILE",
// Include $TF_CLI_CONFIG_FILE to override terraform provider in development.
// See: https://developer.hashicorp.com/terraform/cli/config/config-file#explicit-installation-method-configuration
devConfigFile, ok := env.Lookup(ctx, "TF_CLI_CONFIG_FILE")
if ok {
environ["TF_CLI_CONFIG_FILE"] = devConfigFile
"TF_CLI_CONFIG_FILE",
// Include $USE_SDK_V2_RESOURCES and $USE_SDK_V2_DATA_SOURCES, these are used to switch back from plugin framework to SDKv2.
// This is used for mitigation issues with resource migrated to plugin framework, as recommended here:
// https://registry.terraform.io/providers/databricks/databricks/latest/docs/guides/troubleshooting#plugin-framework-migration-problems
// It is currently a workaround for deploying quality_monitors
// https://github.com/databricks/terraform-provider-databricks/issues/4229#issuecomment-2520344690
"USE_SDK_V2_RESOURCES",
"USE_SDK_V2_DATA_SOURCES",
}
// This function inherits some environment variables for Terraform CLI.
func inheritEnvVars(ctx context.Context, environ map[string]string) error {
for _, key := range envCopy {
value, ok := env.Lookup(ctx, key)
if ok {
environ[key] = value
}
}
// Map $DATABRICKS_TF_CLI_CONFIG_FILE to $TF_CLI_CONFIG_FILE

View File

@ -17,7 +17,7 @@ func TestConvertModelServingEndpoint(t *testing.T) {
src := resources.ModelServingEndpoint{
CreateServingEndpoint: &serving.CreateServingEndpoint{
Name: "name",
Config: serving.EndpointCoreConfigInput{
Config: &serving.EndpointCoreConfigInput{
ServedModels: []serving.ServedModelInput{
{
ModelName: "model_name",

View File

@ -55,7 +55,7 @@ github.com/databricks/cli/bundle/config.Bundle:
The name of the bundle.
"uuid":
"description": |-
PLACEHOLDER
Reserved. A Universally Unique Identifier (UUID) for the bundle that uniquely identifies the bundle in internal Databricks systems. This is generated when a bundle project is initialized using a Databricks template (using the `databricks bundle init` command).
github.com/databricks/cli/bundle/config.Deployment:
"fail_on_active_runs":
"description": |-

View File

@ -353,12 +353,12 @@ github.com/databricks/cli/bundle/config/resources.MlflowModel:
github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint:
"ai_gateway":
"description": |-
The AI Gateway configuration for the serving endpoint. NOTE: only external model endpoints are supported as of now.
The AI Gateway configuration for the serving endpoint. NOTE: Only external model and provisioned throughput endpoints are currently supported.
"config":
"description": |-
The core config of the serving endpoint.
"name":
"description": |
"description": |-
The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.
An endpoint name can consist of alphanumeric characters, dashes, and underscores.
"rate_limits":
@ -1974,6 +1974,9 @@ github.com/databricks/databricks-sdk-go/service/jobs.SparkJarTask:
Parameters passed to the main method.
Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.
"run_as_repl":
"description": |-
Deprecated. A value of `false` is no longer supported.
github.com/databricks/databricks-sdk-go/service/jobs.SparkPythonTask:
"parameters":
"description": |-
@ -2684,27 +2687,36 @@ github.com/databricks/databricks-sdk-go/service/pipelines.TableSpecificConfigScd
github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig:
"ai21labs_api_key":
"description": |-
The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`.
The Databricks secret key reference for an AI21 Labs API key. If you
prefer to paste your API key directly, see `ai21labs_api_key_plaintext`.
You must provide an API key using one of the following fields:
`ai21labs_api_key` or `ai21labs_api_key_plaintext`.
"ai21labs_api_key_plaintext":
"description": |-
An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`.
An AI21 Labs API key provided as a plaintext string. If you prefer to
reference your key using Databricks Secrets, see `ai21labs_api_key`. You
must provide an API key using one of the following fields:
`ai21labs_api_key` or `ai21labs_api_key_plaintext`.
github.com/databricks/databricks-sdk-go/service/serving.AiGatewayConfig:
"guardrails":
"description": |-
Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses.
"inference_table_config":
"description": |-
Configuration for payload logging using inference tables. Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality.
Configuration for payload logging using inference tables.
Use these tables to monitor and audit data being sent to and received from model APIs and to improve model quality.
"rate_limits":
"description": |-
Configuration for rate limits which can be set to limit endpoint traffic.
"usage_tracking_config":
"description": |-
Configuration to enable usage tracking using system tables. These tables allow you to monitor operational usage on endpoints and their associated costs.
Configuration to enable usage tracking using system tables.
These tables allow you to monitor operational usage on endpoints and their associated costs.
github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailParameters:
"invalid_keywords":
"description": |-
List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
List of invalid keywords.
AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
"pii":
"description": |-
Configuration for guardrail PII filter.
@ -2713,15 +2725,14 @@ github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailParame
Indicates whether the safety filter is enabled.
"valid_topics":
"description": |-
The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
The list of allowed topics.
Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailPiiBehavior:
"behavior":
"description": |-
Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned.
Configuration for input guardrail filters.
github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrailPiiBehaviorBehavior:
"_":
"description": |-
Behavior for PII filter. Currently only 'BLOCK' is supported. If 'BLOCK' is set for the input guardrail and the request contains PII, the request is not sent to the model server and 400 status code is returned; if 'BLOCK' is set for the output guardrail and the model response contains PII, the PII info in the response is redacted and 400 status code is returned.
"enum":
- |-
NONE
@ -2737,30 +2748,32 @@ github.com/databricks/databricks-sdk-go/service/serving.AiGatewayGuardrails:
github.com/databricks/databricks-sdk-go/service/serving.AiGatewayInferenceTableConfig:
"catalog_name":
"description": |-
The name of the catalog in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the catalog name.
The name of the catalog in Unity Catalog. Required when enabling inference tables.
NOTE: On update, you have to disable inference table first in order to change the catalog name.
"enabled":
"description": |-
Indicates whether the inference table is enabled.
"schema_name":
"description": |-
The name of the schema in Unity Catalog. Required when enabling inference tables. NOTE: On update, you have to disable inference table first in order to change the schema name.
The name of the schema in Unity Catalog. Required when enabling inference tables.
NOTE: On update, you have to disable inference table first in order to change the schema name.
"table_name_prefix":
"description": |-
The prefix of the table in Unity Catalog. NOTE: On update, you have to disable inference table first in order to change the prefix name.
The prefix of the table in Unity Catalog.
NOTE: On update, you have to disable inference table first in order to change the prefix name.
github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimit:
"calls":
"description": |-
Used to specify how many calls are allowed for a key within the renewal_period.
"key":
"description": |-
Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.
Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported,
with 'endpoint' being the default if not specified.
"renewal_period":
"description": |-
Renewal period field for a rate limit. Currently, only 'minute' is supported.
github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitKey:
"_":
"description": |-
Key field for a rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.
"enum":
- |-
user
@ -2768,8 +2781,6 @@ github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitKey:
endpoint
github.com/databricks/databricks-sdk-go/service/serving.AiGatewayRateLimitRenewalPeriod:
"_":
"description": |-
Renewal period field for a rate limit. Currently, only 'minute' is supported.
"enum":
- |-
minute
@ -2780,26 +2791,43 @@ github.com/databricks/databricks-sdk-go/service/serving.AiGatewayUsageTrackingCo
github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfig:
"aws_access_key_id":
"description": |-
The Databricks secret key reference for an AWS access key ID with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`.
The Databricks secret key reference for an AWS access key ID with
permissions to interact with Bedrock services. If you prefer to paste
your API key directly, see `aws_access_key_id_plaintext`. You must provide an API
key using one of the following fields: `aws_access_key_id` or
`aws_access_key_id_plaintext`.
"aws_access_key_id_plaintext":
"description": |-
An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_access_key_id`. You must provide an API key using one of the following fields: `aws_access_key_id` or `aws_access_key_id_plaintext`.
An AWS access key ID with permissions to interact with Bedrock services
provided as a plaintext string. If you prefer to reference your key using
Databricks Secrets, see `aws_access_key_id`. You must provide an API key
using one of the following fields: `aws_access_key_id` or
`aws_access_key_id_plaintext`.
"aws_region":
"description": |-
The AWS region to use. Bedrock has to be enabled there.
"aws_secret_access_key":
"description": |-
The Databricks secret key reference for an AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services. If you prefer to paste your API key directly, see `aws_secret_access_key_plaintext`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`.
The Databricks secret key reference for an AWS secret access key paired
with the access key ID, with permissions to interact with Bedrock
services. If you prefer to paste your API key directly, see
`aws_secret_access_key_plaintext`. You must provide an API key using one
of the following fields: `aws_secret_access_key` or
`aws_secret_access_key_plaintext`.
"aws_secret_access_key_plaintext":
"description": |-
An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `aws_secret_access_key`. You must provide an API key using one of the following fields: `aws_secret_access_key` or `aws_secret_access_key_plaintext`.
An AWS secret access key paired with the access key ID, with permissions
to interact with Bedrock services provided as a plaintext string. If you
prefer to reference your key using Databricks Secrets, see
`aws_secret_access_key`. You must provide an API key using one of the
following fields: `aws_secret_access_key` or
`aws_secret_access_key_plaintext`.
"bedrock_provider":
"description": |-
The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
The underlying provider in Amazon Bedrock. Supported values (case
insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfigBedrockProvider:
"_":
"description": |-
The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
"enum":
- |-
anthropic
@ -2812,10 +2840,16 @@ github.com/databricks/databricks-sdk-go/service/serving.AmazonBedrockConfigBedro
github.com/databricks/databricks-sdk-go/service/serving.AnthropicConfig:
"anthropic_api_key":
"description": |-
The Databricks secret key reference for an Anthropic API key. If you prefer to paste your API key directly, see `anthropic_api_key_plaintext`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`.
The Databricks secret key reference for an Anthropic API key. If you
prefer to paste your API key directly, see `anthropic_api_key_plaintext`.
You must provide an API key using one of the following fields:
`anthropic_api_key` or `anthropic_api_key_plaintext`.
"anthropic_api_key_plaintext":
"description": |-
The Anthropic API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `anthropic_api_key`. You must provide an API key using one of the following fields: `anthropic_api_key` or `anthropic_api_key_plaintext`.
The Anthropic API key provided as a plaintext string. If you prefer to
reference your key using Databricks Secrets, see `anthropic_api_key`. You
must provide an API key using one of the following fields:
`anthropic_api_key` or `anthropic_api_key_plaintext`.
github.com/databricks/databricks-sdk-go/service/serving.AutoCaptureConfigInput:
"catalog_name":
"description": |-
@ -2831,42 +2865,58 @@ github.com/databricks/databricks-sdk-go/service/serving.AutoCaptureConfigInput:
The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled.
github.com/databricks/databricks-sdk-go/service/serving.CohereConfig:
"cohere_api_base":
"description": "This is an optional field to provide a customized base URL for the Cohere API. \nIf left unspecified, the standard Cohere base URL is used.\n"
"description": |-
This is an optional field to provide a customized base URL for the Cohere
API. If left unspecified, the standard Cohere base URL is used.
"cohere_api_key":
"description": |-
The Databricks secret key reference for a Cohere API key. If you prefer to paste your API key directly, see `cohere_api_key_plaintext`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`.
The Databricks secret key reference for a Cohere API key. If you prefer
to paste your API key directly, see `cohere_api_key_plaintext`. You must
provide an API key using one of the following fields: `cohere_api_key` or
`cohere_api_key_plaintext`.
"cohere_api_key_plaintext":
"description": |-
The Cohere API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `cohere_api_key`. You must provide an API key using one of the following fields: `cohere_api_key` or `cohere_api_key_plaintext`.
The Cohere API key provided as a plaintext string. If you prefer to
reference your key using Databricks Secrets, see `cohere_api_key`. You
must provide an API key using one of the following fields:
`cohere_api_key` or `cohere_api_key_plaintext`.
github.com/databricks/databricks-sdk-go/service/serving.DatabricksModelServingConfig:
"databricks_api_token":
"description": |
The Databricks secret key reference for a Databricks API token that corresponds to a user or service
principal with Can Query access to the model serving endpoint pointed to by this external model.
If you prefer to paste your API key directly, see `databricks_api_token_plaintext`.
You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.
"description": |-
The Databricks secret key reference for a Databricks API token that
corresponds to a user or service principal with Can Query access to the
model serving endpoint pointed to by this external model. If you prefer
to paste your API key directly, see `databricks_api_token_plaintext`. You
must provide an API key using one of the following fields:
`databricks_api_token` or `databricks_api_token_plaintext`.
"databricks_api_token_plaintext":
"description": |
The Databricks API token that corresponds to a user or service
principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.
If you prefer to reference your key using Databricks Secrets, see `databricks_api_token`.
You must provide an API key using one of the following fields: `databricks_api_token` or `databricks_api_token_plaintext`.
"description": |-
The Databricks API token that corresponds to a user or service principal
with Can Query access to the model serving endpoint pointed to by this
external model provided as a plaintext string. If you prefer to reference
your key using Databricks Secrets, see `databricks_api_token`. You must
provide an API key using one of the following fields:
`databricks_api_token` or `databricks_api_token_plaintext`.
"databricks_workspace_url":
"description": |
The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
"description": |-
The URL of the Databricks workspace containing the model serving endpoint
pointed to by this external model.
github.com/databricks/databricks-sdk-go/service/serving.EndpointCoreConfigInput:
"auto_capture_config":
"description": |-
Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
Note: this field is deprecated for creating new provisioned throughput endpoints,
or updating existing provisioned throughput endpoints that never have inference table configured;
in these cases please use AI Gateway to manage inference tables.
"served_entities":
"description": |-
A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities.
The list of served entities under the serving endpoint config.
"served_models":
"description": |-
(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models.
(Deprecated, use served_entities instead) The list of served models under the serving endpoint config.
"traffic_config":
"description": |-
The traffic config defining how invocations to the serving endpoint should be routed.
The traffic configuration associated with the serving endpoint config.
github.com/databricks/databricks-sdk-go/service/serving.EndpointTag:
"key":
"description": |-
@ -2903,17 +2953,13 @@ github.com/databricks/databricks-sdk-go/service/serving.ExternalModel:
"description": |-
PaLM Config. Only required if the provider is 'palm'.
"provider":
"description": |
The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',
'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.",
"description": |-
The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.
"task":
"description": |-
The task type of the external model.
github.com/databricks/databricks-sdk-go/service/serving.ExternalModelProvider:
"_":
"description": |
The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic',
'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', and 'palm'.",
"enum":
- |-
ai21labs
@ -2934,70 +2980,114 @@ github.com/databricks/databricks-sdk-go/service/serving.ExternalModelProvider:
github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig:
"private_key":
"description": |-
The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`
The Databricks secret key reference for a private key for the service
account which has access to the Google Cloud Vertex AI Service. See [Best
practices for managing service account keys]. If you prefer to paste your
API key directly, see `private_key_plaintext`. You must provide an API
key using one of the following fields: `private_key` or
`private_key_plaintext`
[Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys
"private_key_plaintext":
"description": |-
The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`.
The private key for the service account which has access to the Google
Cloud Vertex AI Service provided as a plaintext secret. See [Best
practices for managing service account keys]. If you prefer to reference
your key using Databricks Secrets, see `private_key`. You must provide an
API key using one of the following fields: `private_key` or
`private_key_plaintext`.
[Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys
"project_id":
"description": |-
This is the Google Cloud project id that the service account is associated with.
This is the Google Cloud project id that the service account is
associated with.
"region":
"description": |-
This is the region for the Google Cloud Vertex AI Service. See [supported regions](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. Some models are only available in specific regions.
This is the region for the Google Cloud Vertex AI Service. See [supported
regions] for more details. Some models are only available in specific
regions.
[supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations
github.com/databricks/databricks-sdk-go/service/serving.OpenAiConfig:
"_":
"description": |-
Configs needed to create an OpenAI model route.
"microsoft_entra_client_id":
"description": |
This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
"description": |-
This field is only required for Azure AD OpenAI and is the Microsoft
Entra Client ID.
"microsoft_entra_client_secret":
"description": |
The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
If you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`.
You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.
"description": |-
The Databricks secret key reference for a client secret used for
Microsoft Entra ID authentication. If you prefer to paste your client
secret directly, see `microsoft_entra_client_secret_plaintext`. You must
provide an API key using one of the following fields:
`microsoft_entra_client_secret` or
`microsoft_entra_client_secret_plaintext`.
"microsoft_entra_client_secret_plaintext":
"description": |
The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
If you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`.
You must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.
"description": |-
The client secret used for Microsoft Entra ID authentication provided as
a plaintext string. If you prefer to reference your key using Databricks
Secrets, see `microsoft_entra_client_secret`. You must provide an API key
using one of the following fields: `microsoft_entra_client_secret` or
`microsoft_entra_client_secret_plaintext`.
"microsoft_entra_tenant_id":
"description": |
This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
"description": |-
This field is only required for Azure AD OpenAI and is the Microsoft
Entra Tenant ID.
"openai_api_base":
"description": |
This is a field to provide a customized base URl for the OpenAI API.
For Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service
provided by Azure.
For other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used.
"description": |-
This is a field to provide a customized base URl for the OpenAI API. For
Azure OpenAI, this field is required, and is the base URL for the Azure
OpenAI API service provided by Azure. For other OpenAI API types, this
field is optional, and if left unspecified, the standard OpenAI base URL
is used.
"openai_api_key":
"description": |-
The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`.
The Databricks secret key reference for an OpenAI API key using the
OpenAI or Azure service. If you prefer to paste your API key directly,
see `openai_api_key_plaintext`. You must provide an API key using one of
the following fields: `openai_api_key` or `openai_api_key_plaintext`.
"openai_api_key_plaintext":
"description": |-
The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`.
The OpenAI API key using the OpenAI or Azure service provided as a
plaintext string. If you prefer to reference your key using Databricks
Secrets, see `openai_api_key`. You must provide an API key using one of
the following fields: `openai_api_key` or `openai_api_key_plaintext`.
"openai_api_type":
"description": |
This is an optional field to specify the type of OpenAI API to use.
For Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security
access validation protocol. For access token validation, use azure. For authentication using Azure Active
"description": |-
This is an optional field to specify the type of OpenAI API to use. For
Azure OpenAI, this field is required, and adjust this parameter to
represent the preferred security access validation protocol. For access
token validation, use azure. For authentication using Azure Active
Directory (Azure AD) use, azuread.
"openai_api_version":
"description": |
This is an optional field to specify the OpenAI API version.
For Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to
utilize, specified by a date.
"description": |-
This is an optional field to specify the OpenAI API version. For Azure
OpenAI, this field is required, and is the version of the Azure OpenAI
service to utilize, specified by a date.
"openai_deployment_name":
"description": |
This field is only required for Azure OpenAI and is the name of the deployment resource for the
Azure OpenAI service.
"description": |-
This field is only required for Azure OpenAI and is the name of the
deployment resource for the Azure OpenAI service.
"openai_organization":
"description": |
This is an optional field to specify the organization in OpenAI or Azure OpenAI.
"description": |-
This is an optional field to specify the organization in OpenAI or Azure
OpenAI.
github.com/databricks/databricks-sdk-go/service/serving.PaLmConfig:
"palm_api_key":
"description": |-
The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`.
The Databricks secret key reference for a PaLM API key. If you prefer to
paste your API key directly, see `palm_api_key_plaintext`. You must
provide an API key using one of the following fields: `palm_api_key` or
`palm_api_key_plaintext`.
"palm_api_key_plaintext":
"description": |-
The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`.
The PaLM API key provided as a plaintext string. If you prefer to
reference your key using Databricks Secrets, see `palm_api_key`. You must
provide an API key using one of the following fields: `palm_api_key` or
`palm_api_key_plaintext`.
github.com/databricks/databricks-sdk-go/service/serving.RateLimit:
"calls":
"description": |-
@ -3010,8 +3100,6 @@ github.com/databricks/databricks-sdk-go/service/serving.RateLimit:
Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.
github.com/databricks/databricks-sdk-go/service/serving.RateLimitKey:
"_":
"description": |-
Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.
"enum":
- |-
user
@ -3019,8 +3107,6 @@ github.com/databricks/databricks-sdk-go/service/serving.RateLimitKey:
endpoint
github.com/databricks/databricks-sdk-go/service/serving.RateLimitRenewalPeriod:
"_":
"description": |-
Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.
"enum":
- |-
minute
@ -3033,21 +3119,15 @@ github.com/databricks/databricks-sdk-go/service/serving.Route:
The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput:
"entity_name":
"description": |
The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC),
or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of
__catalog_name__.__schema_name__.__model_name__.
"entity_version":
"description": |-
The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of **catalog_name.schema_name.model_name**.
"entity_version": {}
"environment_vars":
"description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity.\nNote: this is an experimental feature and subject to change. \nExample entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`"
"description": |-
An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`
"external_model":
"description": |
The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled)
can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model,
it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
The task type of all external models within an endpoint must be the same.
"description": |-
The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same.
"instance_profile_arn":
"description": |-
ARN of the instance profile that the served entity uses to access AWS resources.
@ -3058,68 +3138,46 @@ github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput:
"description": |-
The minimum tokens per second that the endpoint can scale down to.
"name":
"description": |
The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores.
If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other
entities, it defaults to <entity-name>-<entity-version>.
"description": |-
The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version.
"scale_to_zero_enabled":
"description": |-
Whether the compute resources for the served entity should scale down to zero.
"workload_size":
"description": |
The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between.
A single unit of provisioned concurrency can process one request at a time.
Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
"description": |-
The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
"workload_type":
"description": |
The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
"CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.
See the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).
"description": |-
The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).
github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput:
"environment_vars":
"description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this model.\nNote: this is an experimental feature and subject to change. \nExample model environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`"
"description": |-
An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}`
"instance_profile_arn":
"description": |-
ARN of the instance profile that the served model will use to access AWS resources.
ARN of the instance profile that the served entity uses to access AWS resources.
"max_provisioned_throughput":
"description": |-
The maximum tokens per second that the endpoint can scale up to.
"min_provisioned_throughput":
"description": |-
The minimum tokens per second that the endpoint can scale down to.
"model_name":
"description": |
The name of the model in Databricks Model Registry to be served or if the model resides in Unity Catalog, the full name of model,
in the form of __catalog_name__.__schema_name__.__model_name__.
"model_version":
"description": |-
The version of the model in Databricks Model Registry or Unity Catalog to be served.
"model_name": {}
"model_version": {}
"name":
"description": |
The name of a served model. It must be unique across an endpoint. If not specified, this field will default to <model-name>-<model-version>.
A served model name can consist of alphanumeric characters, dashes, and underscores.
"description": |-
The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version.
"scale_to_zero_enabled":
"description": |-
Whether the compute resources for the served model should scale down to zero.
Whether the compute resources for the served entity should scale down to zero.
"workload_size":
"description": |
The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.
A single unit of provisioned concurrency can process one request at a time.
Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.
"description": |-
The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
"workload_type":
"description": |
The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
"CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.
See the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).
"description": |-
The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).
github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadSize:
"_":
"description": |
The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between.
A single unit of provisioned concurrency can process one request at a time.
Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).
If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.
"enum":
- |-
Small
@ -3129,17 +3187,26 @@ github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkload
Large
github.com/databricks/databricks-sdk-go/service/serving.ServedModelInputWorkloadType:
"_":
"description": |
The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
"CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.
See the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).
"enum":
- |-
CPU
- |-
GPU_MEDIUM
- |-
GPU_SMALL
- |-
GPU_LARGE
- |-
MULTIGPU_MEDIUM
github.com/databricks/databricks-sdk-go/service/serving.ServingModelWorkloadType:
"_":
"enum":
- |-
CPU
- |-
GPU_MEDIUM
- |-
GPU_SMALL
- |-
GPU_LARGE
- |-

View File

@ -197,3 +197,14 @@ github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger:
"manual":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/serving.ServedEntityInput:
"entity_version":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/serving.ServedModelInput:
"model_name":
"description": |-
PLACEHOLDER
"model_version":
"description": |-
PLACEHOLDER

Some files were not shown because too many files have changed in this diff Show More