Add acceptance tests (#2081)

## Changes
- New kind of test is added - acceptance tests. See acceptance/README.md
for explanation.
- A few tests are converted to acceptance tests by moving databricks.yml
to acceptance/ and adding corresponding script files.

As these tests run against compiled binary and can capture full output
of the command, they can be useful to support major changes such as
refactoring internal logging / diagnostics or complex variable
interpolation.

These are currently run as part of 'make test' but the intention is to
run them as part of integration tests as well.

### Benefits

- Full binary is tested, exactly as users get it.
  - We're not testing custom set of mutators like many existing tests.
- Not mocking anything, real SDK is used (although the HTTP endpoint is
not a real Databricks env).
- Easy to maintain: output can be updated automatically.
- Can easily set up external env, such as env vars, CLI args,
.databrickscfg location etc.

### Gaps

The tests currently share the test server and there is global place to
define handlers. We should have a way for tests to override / add new
handlers.

## Tests
I manually checked that output of new acceptance tests matches previous
asserts.
This commit is contained in:
Denis Bilenko 2025-01-08 13:41:08 +01:00 committed by GitHub
parent 8fd793b605
commit 185bbd28e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
58 changed files with 1462 additions and 464 deletions

View File

@ -1,6 +1,6 @@
default: build
PACKAGES=./libs/... ./internal/... ./cmd/... ./bundle/... .
PACKAGES=./acceptance/... ./libs/... ./internal/... ./cmd/... ./bundle/... .
GOTESTSUM_FORMAT ?= pkgname-and-test-fails

19
acceptance/README.md Normal file
View File

@ -0,0 +1,19 @@
Acceptance tests are blackbox tests that are run against compiled binary.
Currently these tests are run against "fake" HTTP server pretending to be Databricks API. However, they will be extended to run against real environment as regular integration tests.
To author a test,
- Add a new directory under `acceptance`. Any level of nesting is supported.
- Add `databricks.yml` there.
- Add `script` with commands to run, e.g. `$CLI bundle validate`. The test case is recognized by presence of `script`.
The test runner will run script and capture output and compare it with `output.txt` file in the same directory.
In order to write `output.txt` for the first time or overwrite it with the current output, set `TESTS_OUTPUT=OVERWRITE` env var.
The scripts are run with `bash -e` so any errors will be propagated. They are captured in `output.txt` by appending `Exit code: N` line at the end.
For more complex tests one can also use:
- `errcode` helper: if the command fails with non-zero code, it appends `Exit code: N` to the output but returns success to caller (bash), allowing continuation of script.
- `trace` helper: prints the arguments before executing the command.
- custom output files: redirect output to custom file (it must start with `out`), e.g. `$CLI bundle validate > out.txt 2> out.error.txt`.

View File

@ -0,0 +1,302 @@
package acceptance_test
import (
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"runtime"
"slices"
"sort"
"strings"
"testing"
"time"
"github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/testdiff"
"github.com/stretchr/testify/require"
)
var KeepTmp = os.Getenv("KEEP_TMP") != ""
const (
EntryPointScript = "script"
CleanupScript = "script.cleanup"
PrepareScript = "script.prepare"
)
var Scripts = map[string]bool{
EntryPointScript: true,
CleanupScript: true,
PrepareScript: true,
}
func TestAccept(t *testing.T) {
execPath := BuildCLI(t)
// $CLI is what test scripts are using
t.Setenv("CLI", execPath)
server := StartServer(t)
AddHandlers(server)
// Redirect API access to local server:
t.Setenv("DATABRICKS_HOST", fmt.Sprintf("http://127.0.0.1:%d", server.Port))
t.Setenv("DATABRICKS_TOKEN", "dapi1234")
homeDir := t.TempDir()
// Do not read user's ~/.databrickscfg
t.Setenv(env.HomeEnvVar(), homeDir)
testDirs := getTests(t)
require.NotEmpty(t, testDirs)
for _, dir := range testDirs {
t.Run(dir, func(t *testing.T) {
t.Parallel()
runTest(t, dir)
})
}
}
func getTests(t *testing.T) []string {
testDirs := make([]string, 0, 128)
err := filepath.Walk(".", func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
name := filepath.Base(path)
if name == EntryPointScript {
// Presence of 'script' marks a test case in this directory
testDirs = append(testDirs, filepath.Dir(path))
}
return nil
})
require.NoError(t, err)
sort.Strings(testDirs)
return testDirs
}
func runTest(t *testing.T, dir string) {
var tmpDir string
var err error
if KeepTmp {
tempDirBase := filepath.Join(os.TempDir(), "acceptance")
_ = os.Mkdir(tempDirBase, 0o755)
tmpDir, err = os.MkdirTemp(tempDirBase, "")
require.NoError(t, err)
t.Logf("Created directory: %s", tmpDir)
} else {
tmpDir = t.TempDir()
}
scriptContents := readMergedScriptContents(t, dir)
testutil.WriteFile(t, filepath.Join(tmpDir, EntryPointScript), scriptContents)
inputs := make(map[string]bool, 2)
outputs := make(map[string]bool, 2)
err = CopyDir(dir, tmpDir, inputs, outputs)
require.NoError(t, err)
args := []string{"bash", "-euo", "pipefail", EntryPointScript}
cmd := exec.Command(args[0], args[1:]...)
cmd.Dir = tmpDir
outB, err := cmd.CombinedOutput()
out := formatOutput(string(outB), err)
out = strings.ReplaceAll(out, os.Getenv("CLI"), "$CLI")
doComparison(t, filepath.Join(dir, "output.txt"), "script output", out)
for key := range outputs {
if key == "output.txt" {
// handled above
continue
}
pathNew := filepath.Join(tmpDir, key)
newValBytes, err := os.ReadFile(pathNew)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
t.Errorf("%s: expected to find this file but could not (%s)", key, tmpDir)
} else {
t.Errorf("%s: could not read: %s", key, err)
}
continue
}
pathExpected := filepath.Join(dir, key)
doComparison(t, pathExpected, pathNew, string(newValBytes))
}
// Make sure there are not unaccounted for new files
files, err := os.ReadDir(tmpDir)
require.NoError(t, err)
for _, f := range files {
name := f.Name()
if _, ok := inputs[name]; ok {
continue
}
if _, ok := outputs[name]; ok {
continue
}
t.Errorf("Unexpected output: %s", f)
if strings.HasPrefix(name, "out") {
// We have a new file starting with "out"
// Show the contents & support overwrite mode for it:
pathNew := filepath.Join(tmpDir, name)
newVal := testutil.ReadFile(t, pathNew)
doComparison(t, filepath.Join(dir, name), filepath.Join(tmpDir, name), newVal)
}
}
}
func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) {
valueNew = testdiff.NormalizeNewlines(valueNew)
valueExpected := string(readIfExists(t, pathExpected))
valueExpected = testdiff.NormalizeNewlines(valueExpected)
testdiff.AssertEqualTexts(t, pathExpected, pathNew, valueExpected, valueNew)
if testdiff.OverwriteMode {
if valueNew != "" {
t.Logf("Overwriting: %s", pathExpected)
testutil.WriteFile(t, pathExpected, valueNew)
} else {
t.Logf("Removing: %s", pathExpected)
_ = os.Remove(pathExpected)
}
}
}
// Returns combined script.prepare (root) + script.prepare (parent) + ... + script + ... + script.cleanup (parent) + ...
// Note, cleanups are not executed if main script fails; that's not a huge issue, since it runs it temp dir.
func readMergedScriptContents(t *testing.T, dir string) string {
scriptContents := testutil.ReadFile(t, filepath.Join(dir, EntryPointScript))
prepares := []string{}
cleanups := []string{}
for {
x := readIfExists(t, filepath.Join(dir, CleanupScript))
if len(x) > 0 {
cleanups = append(cleanups, string(x))
}
x = readIfExists(t, filepath.Join(dir, PrepareScript))
if len(x) > 0 {
prepares = append(prepares, string(x))
}
if dir == "" || dir == "." {
break
}
dir = filepath.Dir(dir)
require.True(t, filepath.IsLocal(dir))
}
slices.Reverse(prepares)
prepares = append(prepares, scriptContents)
prepares = append(prepares, cleanups...)
return strings.Join(prepares, "\n")
}
func BuildCLI(t *testing.T) string {
cwd, err := os.Getwd()
require.NoError(t, err)
execPath := filepath.Join(cwd, "build", "databricks")
if runtime.GOOS == "windows" {
execPath += ".exe"
}
start := time.Now()
args := []string{"go", "build", "-mod", "vendor", "-o", execPath}
cmd := exec.Command(args[0], args[1:]...)
cmd.Dir = ".."
out, err := cmd.CombinedOutput()
elapsed := time.Since(start)
t.Logf("%s took %s", args, elapsed)
require.NoError(t, err, "go build failed: %s: %s\n%s", args, err, out)
if len(out) > 0 {
t.Logf("go build output: %s: %s", args, out)
}
// Quick check + warm up cache:
cmd = exec.Command(execPath, "--version")
out, err = cmd.CombinedOutput()
require.NoError(t, err, "%s --version failed: %s\n%s", execPath, err, out)
return execPath
}
func copyFile(src, dst string) error {
in, err := os.Open(src)
if err != nil {
return err
}
defer in.Close()
out, err := os.Create(dst)
if err != nil {
return err
}
defer out.Close()
_, err = io.Copy(out, in)
return err
}
func formatOutput(out string, err error) string {
if err == nil {
return out
}
if exiterr, ok := err.(*exec.ExitError); ok {
exitCode := exiterr.ExitCode()
out += fmt.Sprintf("\nExit code: %d\n", exitCode)
} else {
out += fmt.Sprintf("\nError: %s\n", err)
}
return out
}
func readIfExists(t *testing.T, path string) []byte {
data, err := os.ReadFile(path)
if err == nil {
return data
}
if !errors.Is(err, os.ErrNotExist) {
t.Fatalf("%s: %s", path, err)
}
return []byte{}
}
func CopyDir(src, dst string, inputs, outputs map[string]bool) error {
return filepath.Walk(src, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
name := info.Name()
relPath, err := filepath.Rel(src, path)
if err != nil {
return err
}
if strings.HasPrefix(name, "out") {
outputs[relPath] = true
return nil
} else {
inputs[relPath] = true
}
if _, ok := Scripts[name]; ok {
return nil
}
destPath := filepath.Join(dst, relPath)
if info.IsDir() {
return os.MkdirAll(destPath, info.Mode())
}
return copyFile(path, destPath)
})
}

1
acceptance/build/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
databricks

View File

@ -1,9 +1,6 @@
bundle:
name: clusters
workspace:
host: https://acme.cloud.databricks.com/
resources:
clusters:
foo:

View File

@ -0,0 +1,33 @@
>>> $CLI bundle validate -o json -t default
{
"autoscale": {
"max_workers": 7,
"min_workers": 2
},
"cluster_name": "foo",
"custom_tags": {},
"node_type_id": "i3.xlarge",
"num_workers": 2,
"spark_conf": {
"spark.executor.memory": "2g"
},
"spark_version": "13.3.x-scala2.12"
}
>>> $CLI bundle validate -o json -t development
{
"autoscale": {
"max_workers": 3,
"min_workers": 1
},
"cluster_name": "foo-override",
"custom_tags": {},
"node_type_id": "m5.xlarge",
"num_workers": 3,
"spark_conf": {
"spark.executor.memory": "4g",
"spark.executor.memory2": "4g"
},
"spark_version": "15.2.x-scala2.12"
}

View File

@ -0,0 +1,2 @@
trace $CLI bundle validate -o json -t default | jq .resources.clusters.foo
trace $CLI bundle validate -o json -t development | jq .resources.clusters.foo

View File

@ -1,9 +1,6 @@
bundle:
name: override_job_cluster
workspace:
host: https://acme.cloud.databricks.com/
resources:
jobs:
foo:

View File

@ -0,0 +1,30 @@
>>> $CLI bundle validate -o json -t development
{
"name": "job",
"job_clusters": [
{
"job_cluster_key": "key",
"new_cluster": {
"node_type_id": "i3.xlarge",
"num_workers": 1,
"spark_version": "13.3.x-scala2.12"
}
}
]
}
>>> $CLI bundle validate -o json -t staging
{
"name": "job",
"job_clusters": [
{
"job_cluster_key": "key",
"new_cluster": {
"node_type_id": "i3.2xlarge",
"num_workers": 4,
"spark_version": "13.3.x-scala2.12"
}
}
]
}

View File

@ -0,0 +1,2 @@
trace $CLI bundle validate -o json -t development | jq '.resources.jobs.foo | {name,job_clusters}'
trace $CLI bundle validate -o json -t staging | jq '.resources.jobs.foo | {name,job_clusters}'

View File

@ -1,9 +1,6 @@
bundle:
name: override_job_tasks
workspace:
host: https://acme.cloud.databricks.com/
resources:
jobs:
foo:

View File

@ -0,0 +1,68 @@
>>> errcode $CLI bundle validate -o json -t development
Error: file ./test1.py not found
Exit code: 1
{
"name": "job",
"queue": {
"enabled": true
},
"tags": {},
"tasks": [
{
"new_cluster": {
"node_type_id": "i3.xlarge",
"num_workers": 1,
"spark_version": "13.3.x-scala2.12"
},
"spark_python_task": {
"python_file": "./test1.py"
},
"task_key": "key1"
},
{
"new_cluster": {
"spark_version": "13.3.x-scala2.12"
},
"spark_python_task": {
"python_file": "./test2.py"
},
"task_key": "key2"
}
]
}
>>> errcode $CLI bundle validate -o json -t staging
Error: file ./test1.py not found
Exit code: 1
{
"name": "job",
"queue": {
"enabled": true
},
"tags": {},
"tasks": [
{
"new_cluster": {
"spark_version": "13.3.x-scala2.12"
},
"spark_python_task": {
"python_file": "./test1.py"
},
"task_key": "key1"
},
{
"new_cluster": {
"node_type_id": "i3.2xlarge",
"num_workers": 4,
"spark_version": "13.3.x-scala2.12"
},
"spark_python_task": {
"python_file": "./test3.py"
},
"task_key": "key2"
}
]
}

View File

@ -0,0 +1,2 @@
trace errcode $CLI bundle validate -o json -t development | jq .resources.jobs.foo
trace errcode $CLI bundle validate -o json -t staging | jq .resources.jobs.foo

View File

@ -0,0 +1,13 @@
bundle:
name: merge-string-map
resources:
clusters:
my_cluster: "hello"
targets:
dev:
resources:
clusters:
my_cluster:
spark_version: "25"

View File

@ -0,0 +1,23 @@
>>> $CLI bundle validate -o json -t dev
{
"clusters": {
"my_cluster": {
"custom_tags": {},
"spark_version": "25"
}
}
}
>>> $CLI bundle validate -t dev
Warning: expected map, found string
at resources.clusters.my_cluster
in databricks.yml:6:17
Name: merge-string-map
Target: dev
Workspace:
User: tester@databricks.com
Path: /Workspace/Users/tester@databricks.com/.bundle/merge-string-map/dev
Found 1 warning

View File

@ -0,0 +1,2 @@
trace $CLI bundle validate -o json -t dev | jq .resources
trace $CLI bundle validate -t dev

View File

@ -1,9 +1,6 @@
bundle:
name: override_pipeline_cluster
workspace:
host: https://acme.cloud.databricks.com/
resources:
pipelines:
foo:

View File

@ -0,0 +1,44 @@
>>> $CLI bundle validate -o json -t development
{
"foo": {
"clusters": [
{
"label": "default",
"node_type_id": "i3.xlarge",
"num_workers": 1,
"spark_conf": {
"foo": "bar"
}
}
],
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_pipeline_cluster/development/state/metadata.json"
},
"name": "job",
"permissions": []
}
}
>>> $CLI bundle validate -o json -t staging
{
"foo": {
"clusters": [
{
"label": "default",
"node_type_id": "i3.2xlarge",
"num_workers": 4,
"spark_conf": {
"foo": "bar"
}
}
],
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/override_pipeline_cluster/staging/state/metadata.json"
},
"name": "job",
"permissions": []
}
}

View File

@ -0,0 +1,2 @@
trace $CLI bundle validate -o json -t development | jq .resources.pipelines
trace $CLI bundle validate -o json -t staging | jq .resources.pipelines

View File

@ -0,0 +1,19 @@
bundle:
name: complex-transitive
variables:
catalog:
default: hive_metastore
spark_conf:
default:
"spark.databricks.sql.initial.catalog.name": ${var.catalog}
etl_cluster_config:
type: complex
default:
spark_version: 14.3.x-scala2.12
runtime_engine: PHOTON
spark_conf: ${var.spark_conf}
resources:
clusters:
my_cluster: ${var.etl_cluster_config}

View File

@ -0,0 +1,3 @@
{
"spark.databricks.sql.initial.catalog.name": "${var.catalog}"
}

View File

@ -0,0 +1,2 @@
# Currently, this incorrectly outputs variable reference instead of resolved value
$CLI bundle validate -o json | jq '.resources.clusters.my_cluster.spark_conf'

View File

@ -46,6 +46,7 @@ variables:
targets:
default:
default: true
dev:
variables:
node_type: "Standard_DS3_v3"

View File

@ -0,0 +1,124 @@
{
"resources": {
"jobs": {
"my_job": {
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables/default/state/metadata.json"
},
"edit_mode": "UI_LOCKED",
"format": "MULTI_TASK",
"job_clusters": [
{
"job_cluster_key": "key",
"new_cluster": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 2,
"policy_id": "some-policy-id",
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": "false",
"spark.random": "true",
"spark.speculation": "true"
},
"spark_version": "13.2.x-scala2.11"
}
}
],
"permissions": [],
"queue": {
"enabled": true
},
"tags": {},
"tasks": [
{
"job_cluster_key": "key",
"libraries": [
{
"jar": "/path/to/jar"
},
{
"egg": "/path/to/egg"
},
{
"whl": "/path/to/whl"
}
],
"task_key": "task with spark version 13.2.x-scala2.11 and jar /path/to/jar"
}
]
}
}
},
"variables": {
"cluster": {
"default": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 2,
"policy_id": "some-policy-id",
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.random": true,
"spark.speculation": true
},
"spark_version": "13.2.x-scala2.11"
},
"description": "A cluster definition",
"type": "complex",
"value": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 2,
"policy_id": "some-policy-id",
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.random": true,
"spark.speculation": true
},
"spark_version": "13.2.x-scala2.11"
}
},
"complexvar": {
"default": {
"key1": "value1",
"key2": "value2",
"key3": "value3"
},
"description": "A complex variable",
"type": "complex",
"value": {
"key1": "value1",
"key2": "value2",
"key3": "value3"
}
},
"libraries": {
"default": [
{
"jar": "/path/to/jar"
},
{
"egg": "/path/to/egg"
},
{
"whl": "/path/to/whl"
}
],
"description": "A libraries definition",
"type": "complex",
"value": [
{
"jar": "/path/to/jar"
},
{
"egg": "/path/to/egg"
},
{
"whl": "/path/to/whl"
}
]
},
"node_type": {
"default": "Standard_DS3_v2",
"value": "Standard_DS3_v2"
}
}
}

View File

@ -0,0 +1,118 @@
{
"resources": {
"jobs": {
"my_job": {
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables/dev/state/metadata.json"
},
"edit_mode": "UI_LOCKED",
"format": "MULTI_TASK",
"job_clusters": [
{
"job_cluster_key": "key",
"new_cluster": {
"node_type_id": "Standard_DS3_v3",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": "false",
"spark.speculation": "false"
},
"spark_version": "14.2.x-scala2.11"
}
}
],
"permissions": [],
"queue": {
"enabled": true
},
"tags": {},
"tasks": [
{
"job_cluster_key": "key",
"libraries": [
{
"jar": "/path/to/jar"
},
{
"egg": "/path/to/egg"
},
{
"whl": "/path/to/whl"
}
],
"task_key": "task with spark version 14.2.x-scala2.11 and jar /path/to/jar"
}
]
}
}
},
"variables": {
"cluster": {
"default": {
"node_type_id": "Standard_DS3_v3",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.speculation": false
},
"spark_version": "14.2.x-scala2.11"
},
"description": "A cluster definition",
"type": "complex",
"value": {
"node_type_id": "Standard_DS3_v3",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.speculation": false
},
"spark_version": "14.2.x-scala2.11"
}
},
"complexvar": {
"default": {
"key1": "1",
"key2": "2",
"key3": "3"
},
"description": "A complex variable",
"type": "complex",
"value": {
"key1": "1",
"key2": "2",
"key3": "3"
}
},
"libraries": {
"default": [
{
"jar": "/path/to/jar"
},
{
"egg": "/path/to/egg"
},
{
"whl": "/path/to/whl"
}
],
"description": "A libraries definition",
"type": "complex",
"value": [
{
"jar": "/path/to/jar"
},
{
"egg": "/path/to/egg"
},
{
"whl": "/path/to/whl"
}
]
},
"node_type": {
"default": "Standard_DS3_v3",
"value": "Standard_DS3_v3"
}
}
}

View File

@ -0,0 +1,4 @@
$CLI bundle validate -o json | jq '{resources,variables}' > out.default.json
# spark.random and policy_id should be empty in this target:
$CLI bundle validate -o json -t dev | jq '{resources,variables}' > out.dev.json

View File

@ -0,0 +1,159 @@
{
"resources": {
"jobs": {
"my_job": {
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/complex-variables-multiple-files/dev/state/metadata.json"
},
"edit_mode": "UI_LOCKED",
"format": "MULTI_TASK",
"job_clusters": [
{
"job_cluster_key": "key1",
"new_cluster": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": "false",
"spark.speculation": "false"
},
"spark_version": "14.2.x-scala2.11"
}
},
{
"job_cluster_key": "key2",
"new_cluster": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": "false",
"spark.speculation": "false"
},
"spark_version": "14.2.x-scala2.11"
}
},
{
"job_cluster_key": "key3",
"new_cluster": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": "false",
"spark.speculation": "false"
},
"spark_version": "14.2.x-scala2.11"
}
},
{
"job_cluster_key": "key4",
"new_cluster": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": "false",
"spark.speculation": "false"
},
"spark_version": "14.2.x-scala2.11"
}
}
],
"permissions": [],
"queue": {
"enabled": true
},
"tags": {}
}
}
},
"variables": {
"cluster1": {
"default": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.speculation": false
},
"spark_version": "14.2.x-scala2.11"
},
"description": "A cluster definition",
"type": "complex",
"value": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.speculation": false
},
"spark_version": "14.2.x-scala2.11"
}
},
"cluster2": {
"default": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.speculation": false
},
"spark_version": "14.2.x-scala2.11"
},
"description": "A cluster definition",
"type": "complex",
"value": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.speculation": false
},
"spark_version": "14.2.x-scala2.11"
}
},
"cluster3": {
"default": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.speculation": false
},
"spark_version": "14.2.x-scala2.11"
},
"description": "A cluster definition",
"type": "complex",
"value": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.speculation": false
},
"spark_version": "14.2.x-scala2.11"
}
},
"cluster4": {
"default": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.speculation": false
},
"spark_version": "14.2.x-scala2.11"
},
"description": "A cluster definition",
"type": "complex",
"value": {
"node_type_id": "Standard_DS3_v2",
"num_workers": 4,
"spark_conf": {
"spark.databricks.delta.retentionDurationCheck.enabled": false,
"spark.speculation": false
},
"spark_version": "14.2.x-scala2.11"
}
}
}
}

View File

@ -0,0 +1 @@
$CLI bundle validate -t dev -o json | jq '{resources, variables}'

View File

@ -0,0 +1,11 @@
Error: no value assigned to required variable a. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_a environment variable
Name: empty${var.a}
Target: default
Workspace:
User: tester@databricks.com
Path: /Workspace/Users/tester@databricks.com/.bundle/empty${var.a}/default
Found 1 error
Exit code: 1

View File

@ -0,0 +1 @@
$CLI bundle validate

View File

@ -0,0 +1,40 @@
>>> $CLI bundle validate -t env-with-single-variable-override -o json
"default-a dev-b"
>>> $CLI bundle validate -t env-with-two-variable-overrides -o json
"prod-a prod-b"
>>> BUNDLE_VAR_b=env-var-b $CLI bundle validate -t env-with-two-variable-overrides -o json
"prod-a env-var-b"
>>> errcode $CLI bundle validate -t env-missing-a-required-variable-assignment
Error: no value assigned to required variable b. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_b environment variable
Name: test bundle
Target: env-missing-a-required-variable-assignment
Workspace:
User: tester@databricks.com
Path: /Workspace/Users/tester@databricks.com/.bundle/test bundle/env-missing-a-required-variable-assignment
Found 1 error
Exit code: 1
>>> errcode $CLI bundle validate -t env-using-an-undefined-variable
Error: variable c is not defined but is assigned a value
Name: test bundle
Found 1 error
Exit code: 1
>>> $CLI bundle validate -t env-overrides-lookup -o json
{
"a": "default-a",
"b": "prod-b",
"d": "4321",
"e": "1234",
"f": "9876"
}

View File

@ -0,0 +1,6 @@
trace $CLI bundle validate -t env-with-single-variable-override -o json | jq .workspace.profile
trace $CLI bundle validate -t env-with-two-variable-overrides -o json | jq .workspace.profile
trace BUNDLE_VAR_b=env-var-b $CLI bundle validate -t env-with-two-variable-overrides -o json | jq .workspace.profile
trace errcode $CLI bundle validate -t env-missing-a-required-variable-assignment
trace errcode $CLI bundle validate -t env-using-an-undefined-variable
trace $CLI bundle validate -t env-overrides-lookup -o json | jq '.variables | map_values(.value)'

View File

@ -0,0 +1,16 @@
>>> BUNDLE_VAR_b=def $CLI bundle validate -o json
"abc def"
>>> errcode $CLI bundle validate
Error: no value assigned to required variable b. Assignment can be done through the "--var" flag or by setting the BUNDLE_VAR_b environment variable
Name: ${var.a} ${var.b}
Target: default
Workspace:
User: tester@databricks.com
Path: /Workspace/Users/tester@databricks.com/.bundle/${var.a} ${var.b}/default
Found 1 error
Exit code: 1

View File

@ -0,0 +1,2 @@
trace BUNDLE_VAR_b=def $CLI bundle validate -o json | jq .bundle.name
trace errcode $CLI bundle validate

View File

@ -0,0 +1,84 @@
>>> $CLI bundle validate -o json -t use-default-variable-values
{
"pipelines": {
"my_pipeline": {
"clusters": [
{
"label": "default",
"num_workers": 42
}
],
"continuous": true,
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/use-default-variable-values/state/metadata.json"
},
"name": "a_string",
"permissions": []
}
}
}
>>> $CLI bundle validate -o json -t override-string-variable
{
"pipelines": {
"my_pipeline": {
"clusters": [
{
"label": "default",
"num_workers": 42
}
],
"continuous": true,
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-string-variable/state/metadata.json"
},
"name": "overridden_string",
"permissions": []
}
}
}
>>> $CLI bundle validate -o json -t override-int-variable
{
"pipelines": {
"my_pipeline": {
"clusters": [
{
"label": "default",
"num_workers": 43
}
],
"continuous": true,
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-int-variable/state/metadata.json"
},
"name": "a_string",
"permissions": []
}
}
}
>>> $CLI bundle validate -o json -t override-both-bool-and-string-variables
{
"pipelines": {
"my_pipeline": {
"clusters": [
{
"label": "default",
"num_workers": 42
}
],
"continuous": false,
"deployment": {
"kind": "BUNDLE",
"metadata_file_path": "/Workspace/Users/tester@databricks.com/.bundle/foobar/override-both-bool-and-string-variables/state/metadata.json"
},
"name": "overridden_string",
"permissions": []
}
}
}

View File

@ -0,0 +1,4 @@
trace $CLI bundle validate -o json -t use-default-variable-values | jq .resources
trace $CLI bundle validate -o json -t override-string-variable | jq .resources
trace $CLI bundle validate -o json -t override-int-variable | jq .resources
trace $CLI bundle validate -o json -t override-both-bool-and-string-variables | jq .resources

View File

@ -0,0 +1,4 @@
{
"a": "foo",
"b": "bar"
}

View File

@ -0,0 +1 @@
BUNDLE_VAR_a=foo BUNDLE_VAR_b=bar $CLI bundle validate -o json | jq '.variables | map_values(.value)'

143
acceptance/help/output.txt Normal file
View File

@ -0,0 +1,143 @@
Databricks CLI
Usage:
databricks [command]
Databricks Workspace
fs Filesystem related commands
git-credentials Registers personal access token for Databricks to do operations on behalf of the user.
repos The Repos API allows users to manage their git repos.
secrets The Secrets API allows you to manage secrets, secret scopes, and access permissions.
workspace The Workspace API allows you to list, import, export, and delete notebooks and folders.
Compute
cluster-policies You can use cluster policies to control users' ability to configure clusters based on a set of rules.
clusters The Clusters API allows you to create, start, edit, list, terminate, and delete clusters.
global-init-scripts The Global Init Scripts API enables Workspace administrators to configure global initialization scripts for their workspace.
instance-pools Instance Pools API are used to create, edit, delete and list instance pools by using ready-to-use cloud instances which reduces a cluster start and auto-scaling times.
instance-profiles The Instance Profiles API allows admins to add, list, and remove instance profiles that users can launch clusters with.
libraries The Libraries API allows you to install and uninstall libraries and get the status of libraries on a cluster.
policy-compliance-for-clusters The policy compliance APIs allow you to view and manage the policy compliance status of clusters in your workspace.
policy-families View available policy families.
Workflows
jobs The Jobs API allows you to create, edit, and delete jobs.
policy-compliance-for-jobs The compliance APIs allow you to view and manage the policy compliance status of jobs in your workspace.
Delta Live Tables
pipelines The Delta Live Tables API allows you to create, edit, delete, start, and view details about pipelines.
Machine Learning
experiments Experiments are the primary unit of organization in MLflow; all MLflow runs belong to an experiment.
model-registry Note: This API reference documents APIs for the Workspace Model Registry.
Real-time Serving
serving-endpoints The Serving Endpoints API allows you to create, update, and delete model serving endpoints.
Identity and Access Management
current-user This API allows retrieving information about currently authenticated user or service principal.
groups Groups simplify identity management, making it easier to assign access to Databricks workspace, data, and other securable objects.
permissions Permissions API are used to create read, write, edit, update and manage access for various users on different objects and endpoints.
service-principals Identities for use with jobs, automated tools, and systems such as scripts, apps, and CI/CD platforms.
users User identities recognized by Databricks and represented by email addresses.
Databricks SQL
alerts The alerts API can be used to perform CRUD operations on alerts.
alerts-legacy The alerts API can be used to perform CRUD operations on alerts.
dashboards In general, there is little need to modify dashboards using the API.
data-sources This API is provided to assist you in making new query objects.
queries The queries API can be used to perform CRUD operations on queries.
queries-legacy These endpoints are used for CRUD operations on query definitions.
query-history A service responsible for storing and retrieving the list of queries run against SQL endpoints and serverless compute.
warehouses A SQL warehouse is a compute resource that lets you run SQL commands on data objects within Databricks SQL.
Unity Catalog
artifact-allowlists In Databricks Runtime 13.3 and above, you can add libraries and init scripts to the allowlist in UC so that users can leverage these artifacts on compute configured with shared access mode.
catalogs A catalog is the first layer of Unity Catalogs three-level namespace.
connections Connections allow for creating a connection to an external data source.
credentials A credential represents an authentication and authorization mechanism for accessing services on your cloud tenant.
external-locations An external location is an object that combines a cloud storage path with a storage credential that authorizes access to the cloud storage path.
functions Functions implement User-Defined Functions (UDFs) in Unity Catalog.
grants In Unity Catalog, data is secure by default.
metastores A metastore is the top-level container of objects in Unity Catalog.
model-versions Databricks provides a hosted version of MLflow Model Registry in Unity Catalog.
online-tables Online tables provide lower latency and higher QPS access to data from Delta tables.
quality-monitors A monitor computes and monitors data or model quality metrics for a table over time.
registered-models Databricks provides a hosted version of MLflow Model Registry in Unity Catalog.
resource-quotas Unity Catalog enforces resource quotas on all securable objects, which limits the number of resources that can be created.
schemas A schema (also called a database) is the second layer of Unity Catalogs three-level namespace.
storage-credentials A storage credential represents an authentication and authorization mechanism for accessing data stored on your cloud tenant.
system-schemas A system schema is a schema that lives within the system catalog.
table-constraints Primary key and foreign key constraints encode relationships between fields in tables.
tables A table resides in the third layer of Unity Catalogs three-level namespace.
temporary-table-credentials Temporary Table Credentials refer to short-lived, downscoped credentials used to access cloud storage locationswhere table data is stored in Databricks.
volumes Volumes are a Unity Catalog (UC) capability for accessing, storing, governing, organizing and processing files.
workspace-bindings A securable in Databricks can be configured as __OPEN__ or __ISOLATED__.
Delta Sharing
providers A data provider is an object representing the organization in the real world who shares the data.
recipient-activation The Recipient Activation API is only applicable in the open sharing model where the recipient object has the authentication type of TOKEN.
recipients A recipient is an object you create using :method:recipients/create to represent an organization which you want to allow access shares.
shares A share is a container instantiated with :method:shares/create.
Settings
ip-access-lists IP Access List enables admins to configure IP access lists.
notification-destinations The notification destinations API lets you programmatically manage a workspace's notification destinations.
settings Workspace Settings API allows users to manage settings at the workspace level.
token-management Enables administrators to get all tokens and delete tokens for other users.
tokens The Token API allows you to create, list, and revoke tokens that can be used to authenticate and access Databricks REST APIs.
workspace-conf This API allows updating known workspace settings for advanced users.
Developer Tools
bundle Databricks Asset Bundles let you express data/AI/analytics projects as code.
sync Synchronize a local directory to a workspace directory
Vector Search
vector-search-endpoints **Endpoint**: Represents the compute resources to host vector search indexes.
vector-search-indexes **Index**: An efficient representation of your embedding vectors that supports real-time and efficient approximate nearest neighbor (ANN) search queries.
Dashboards
lakeview These APIs provide specific management operations for Lakeview dashboards.
Marketplace
consumer-fulfillments Fulfillments are entities that allow consumers to preview installations.
consumer-installations Installations are entities that allow consumers to interact with Databricks Marketplace listings.
consumer-listings Listings are the core entities in the Marketplace.
consumer-personalization-requests Personalization Requests allow customers to interact with the individualized Marketplace listing flow.
consumer-providers Providers are the entities that publish listings to the Marketplace.
provider-exchange-filters Marketplace exchanges filters curate which groups can access an exchange.
provider-exchanges Marketplace exchanges allow providers to share their listings with a curated set of customers.
provider-files Marketplace offers a set of file APIs for various purposes such as preview notebooks and provider icons.
provider-listings Listings are the core entities in the Marketplace.
provider-personalization-requests Personalization requests are an alternate to instantly available listings.
provider-provider-analytics-dashboards Manage templated analytics solution for providers.
provider-providers Providers are entities that manage assets in Marketplace.
Apps
apps Apps run directly on a customers Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on.
apps Apps run directly on a customers Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on.
Clean Rooms
clean-room-assets Clean room assets are data and code objects — Tables, volumes, and notebooks that are shared with the clean room.
clean-room-task-runs Clean room task runs are the executions of notebooks in a clean room.
clean-rooms A clean room uses Delta Sharing and serverless compute to provide a secure and privacy-protecting environment where multiple parties can work together on sensitive enterprise data without direct access to each others data.
Additional Commands:
account Databricks Account Commands
api Perform Databricks API call
auth Authentication related commands
completion Generate the autocompletion script for the specified shell
configure Configure authentication
help Help about any command
labs Manage Databricks Labs installations
version Retrieve information about the current version of this CLI
Flags:
--debug enable debug logging
-h, --help help for databricks
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
-v, --version version for databricks
Use "databricks [command] --help" for more information about a command.

1
acceptance/help/script Normal file
View File

@ -0,0 +1 @@
$CLI

View File

@ -0,0 +1 @@
rm -fr .databricks .gitignore

36
acceptance/script.prepare Normal file
View File

@ -0,0 +1,36 @@
# Prevent CLI from downloading terraform in each test:
export DATABRICKS_TF_EXEC_PATH=/tmp/
errcode() {
# Temporarily disable 'set -e' to prevent the script from exiting on error
set +e
# Execute the provided command with all arguments
"$@"
local exit_code=$?
# Re-enable 'set -e' if it was previously set
set -e
>&2 printf "\nExit code: $exit_code\n"
}
trace() {
>&2 printf "\n>>> %s\n" "$*"
if [[ "$1" == *"="* ]]; then
# If the first argument contains '=', collect all env vars
local env_vars=()
while [[ "$1" == *"="* ]]; do
env_vars+=("$1")
shift
done
# Export environment variables in a subshell and execute the command
(
export "${env_vars[@]}"
"$@"
)
else
# Execute the command normally
"$@"
fi
return $?
}

129
acceptance/server_test.go Normal file
View File

@ -0,0 +1,129 @@
package acceptance_test
import (
"encoding/json"
"net"
"net/http"
"net/http/httptest"
"testing"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/workspace"
)
type TestServer struct {
*httptest.Server
Mux *http.ServeMux
Port int
}
type HandlerFunc func(r *http.Request) (any, error)
func NewTestServer() *TestServer {
mux := http.NewServeMux()
server := httptest.NewServer(mux)
port := server.Listener.Addr().(*net.TCPAddr).Port
return &TestServer{
Server: server,
Mux: mux,
Port: port,
}
}
func (s *TestServer) Handle(pattern string, handler HandlerFunc) {
s.Mux.HandleFunc(pattern, func(w http.ResponseWriter, r *http.Request) {
resp, err := handler(r)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
var respBytes []byte
respString, ok := resp.(string)
if ok {
respBytes = []byte(respString)
} else {
respBytes, err = json.MarshalIndent(resp, "", " ")
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
}
if _, err := w.Write(respBytes); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
})
}
func StartServer(t *testing.T) *TestServer {
server := NewTestServer()
t.Cleanup(func() {
server.Close()
})
return server
}
func AddHandlers(server *TestServer) {
server.Handle("/api/2.0/policies/clusters/list", func(r *http.Request) (any, error) {
return compute.ListPoliciesResponse{
Policies: []compute.Policy{
{
PolicyId: "5678",
Name: "wrong-cluster-policy",
},
{
PolicyId: "9876",
Name: "some-test-cluster-policy",
},
},
}, nil
})
server.Handle("/api/2.0/instance-pools/list", func(r *http.Request) (any, error) {
return compute.ListInstancePools{
InstancePools: []compute.InstancePoolAndStats{
{
InstancePoolName: "some-test-instance-pool",
InstancePoolId: "1234",
},
},
}, nil
})
server.Handle("/api/2.1/clusters/list", func(r *http.Request) (any, error) {
return compute.ListClustersResponse{
Clusters: []compute.ClusterDetails{
{
ClusterName: "some-test-cluster",
ClusterId: "4321",
},
{
ClusterName: "some-other-cluster",
ClusterId: "9876",
},
},
}, nil
})
server.Handle("/api/2.0/preview/scim/v2/Me", func(r *http.Request) (any, error) {
return iam.User{
UserName: "tester@databricks.com",
}, nil
})
server.Handle("/api/2.0/workspace/get-status", func(r *http.Request) (any, error) {
return workspace.ObjectInfo{
ObjectId: 1001,
ObjectType: "DIRECTORY",
Path: "",
ResourceId: "1001",
}, nil
})
}

View File

@ -1,36 +0,0 @@
package config_tests
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestClusters(t *testing.T) {
b := load(t, "./clusters")
assert.Equal(t, "clusters", b.Config.Bundle.Name)
cluster := b.Config.Resources.Clusters["foo"]
assert.Equal(t, "foo", cluster.ClusterName)
assert.Equal(t, "13.3.x-scala2.12", cluster.SparkVersion)
assert.Equal(t, "i3.xlarge", cluster.NodeTypeId)
assert.Equal(t, 2, cluster.NumWorkers)
assert.Equal(t, "2g", cluster.SparkConf["spark.executor.memory"])
assert.Equal(t, 2, cluster.Autoscale.MinWorkers)
assert.Equal(t, 7, cluster.Autoscale.MaxWorkers)
}
func TestClustersOverride(t *testing.T) {
b := loadTarget(t, "./clusters", "development")
assert.Equal(t, "clusters", b.Config.Bundle.Name)
cluster := b.Config.Resources.Clusters["foo"]
assert.Equal(t, "foo-override", cluster.ClusterName)
assert.Equal(t, "15.2.x-scala2.12", cluster.SparkVersion)
assert.Equal(t, "m5.xlarge", cluster.NodeTypeId)
assert.Equal(t, 3, cluster.NumWorkers)
assert.Equal(t, "4g", cluster.SparkConf["spark.executor.memory"])
assert.Equal(t, "4g", cluster.SparkConf["spark.executor.memory2"])
assert.Equal(t, 1, cluster.Autoscale.MinWorkers)
assert.Equal(t, 3, cluster.Autoscale.MaxWorkers)
}

View File

@ -1,108 +0,0 @@
package config_tests
import (
"context"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/stretchr/testify/require"
)
func TestComplexVariables(t *testing.T) {
b, diags := loadTargetWithDiags("variables/complex", "default")
require.Empty(t, diags)
diags = bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SetVariables(),
mutator.ResolveVariableReferencesInComplexVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
require.NoError(t, diags.Error())
require.Equal(t, "13.2.x-scala2.11", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkVersion)
require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NodeTypeId)
require.Equal(t, "some-policy-id", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.PolicyId)
require.Equal(t, 2, b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NumWorkers)
require.Equal(t, "true", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.speculation"])
require.Equal(t, "true", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.random"])
require.Len(t, b.Config.Resources.Jobs["my_job"].Tasks[0].Libraries, 3)
require.Contains(t, b.Config.Resources.Jobs["my_job"].Tasks[0].Libraries, compute.Library{
Jar: "/path/to/jar",
})
require.Contains(t, b.Config.Resources.Jobs["my_job"].Tasks[0].Libraries, compute.Library{
Egg: "/path/to/egg",
})
require.Contains(t, b.Config.Resources.Jobs["my_job"].Tasks[0].Libraries, compute.Library{
Whl: "/path/to/whl",
})
require.Equal(t, "task with spark version 13.2.x-scala2.11 and jar /path/to/jar", b.Config.Resources.Jobs["my_job"].Tasks[0].TaskKey)
}
func TestComplexVariablesOverride(t *testing.T) {
b, diags := loadTargetWithDiags("variables/complex", "dev")
require.Empty(t, diags)
diags = bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SetVariables(),
mutator.ResolveVariableReferencesInComplexVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
require.NoError(t, diags.Error())
require.Equal(t, "14.2.x-scala2.11", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkVersion)
require.Equal(t, "Standard_DS3_v3", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NodeTypeId)
require.Equal(t, 4, b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NumWorkers)
require.Equal(t, "false", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.speculation"])
// Making sure the variable is overriden and not merged / extended
// These properties are set in the default target but not set in override target
// So they should be empty
require.Equal(t, "", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.random"])
require.Equal(t, "", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.PolicyId)
}
func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) {
b, diags := loadTargetWithDiags("variables/complex_multiple_files", "dev")
require.Empty(t, diags)
diags = bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SetVariables(),
mutator.ResolveVariableReferencesInComplexVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
require.NoError(t, diags.Error())
for _, cluster := range b.Config.Resources.Jobs["my_job"].JobClusters {
require.Equalf(t, "14.2.x-scala2.11", cluster.NewCluster.SparkVersion, "cluster: %v", cluster.JobClusterKey)
require.Equalf(t, "Standard_DS3_v2", cluster.NewCluster.NodeTypeId, "cluster: %v", cluster.JobClusterKey)
require.Equalf(t, 4, cluster.NewCluster.NumWorkers, "cluster: %v", cluster.JobClusterKey)
require.Equalf(t, "false", cluster.NewCluster.SparkConf["spark.speculation"], "cluster: %v", cluster.JobClusterKey)
}
}
func TestComplexVariablesOverrideWithFullSyntax(t *testing.T) {
b, diags := loadTargetWithDiags("variables/complex", "dev")
require.Empty(t, diags)
diags = bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SetVariables(),
mutator.ResolveVariableReferencesInComplexVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
require.NoError(t, diags.Error())
require.Empty(t, diags)
complexvar := b.Config.Variables["complexvar"].Value
require.Equal(t, map[string]any{"key1": "1", "key2": "2", "key3": "3"}, complexvar)
}

View File

@ -1,29 +0,0 @@
package config_tests
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestOverrideJobClusterDev(t *testing.T) {
b := loadTarget(t, "./override_job_cluster", "development")
assert.Equal(t, "job", b.Config.Resources.Jobs["foo"].Name)
assert.Len(t, b.Config.Resources.Jobs["foo"].JobClusters, 1)
c := b.Config.Resources.Jobs["foo"].JobClusters[0]
assert.Equal(t, "13.3.x-scala2.12", c.NewCluster.SparkVersion)
assert.Equal(t, "i3.xlarge", c.NewCluster.NodeTypeId)
assert.Equal(t, 1, c.NewCluster.NumWorkers)
}
func TestOverrideJobClusterStaging(t *testing.T) {
b := loadTarget(t, "./override_job_cluster", "staging")
assert.Equal(t, "job", b.Config.Resources.Jobs["foo"].Name)
assert.Len(t, b.Config.Resources.Jobs["foo"].JobClusters, 1)
c := b.Config.Resources.Jobs["foo"].JobClusters[0]
assert.Equal(t, "13.3.x-scala2.12", c.NewCluster.SparkVersion)
assert.Equal(t, "i3.2xlarge", c.NewCluster.NodeTypeId)
assert.Equal(t, 4, c.NewCluster.NumWorkers)
}

View File

@ -1,39 +0,0 @@
package config_tests
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestOverrideTasksDev(t *testing.T) {
b := loadTarget(t, "./override_job_tasks", "development")
assert.Equal(t, "job", b.Config.Resources.Jobs["foo"].Name)
assert.Len(t, b.Config.Resources.Jobs["foo"].Tasks, 2)
tasks := b.Config.Resources.Jobs["foo"].Tasks
assert.Equal(t, "key1", tasks[0].TaskKey)
assert.Equal(t, "i3.xlarge", tasks[0].NewCluster.NodeTypeId)
assert.Equal(t, 1, tasks[0].NewCluster.NumWorkers)
assert.Equal(t, "./test1.py", tasks[0].SparkPythonTask.PythonFile)
assert.Equal(t, "key2", tasks[1].TaskKey)
assert.Equal(t, "13.3.x-scala2.12", tasks[1].NewCluster.SparkVersion)
assert.Equal(t, "./test2.py", tasks[1].SparkPythonTask.PythonFile)
}
func TestOverrideTasksStaging(t *testing.T) {
b := loadTarget(t, "./override_job_tasks", "staging")
assert.Equal(t, "job", b.Config.Resources.Jobs["foo"].Name)
assert.Len(t, b.Config.Resources.Jobs["foo"].Tasks, 2)
tasks := b.Config.Resources.Jobs["foo"].Tasks
assert.Equal(t, "key1", tasks[0].TaskKey)
assert.Equal(t, "13.3.x-scala2.12", tasks[0].NewCluster.SparkVersion)
assert.Equal(t, "./test1.py", tasks[0].SparkPythonTask.PythonFile)
assert.Equal(t, "key2", tasks[1].TaskKey)
assert.Equal(t, "i3.2xlarge", tasks[1].NewCluster.NodeTypeId)
assert.Equal(t, 4, tasks[1].NewCluster.NumWorkers)
assert.Equal(t, "./test3.py", tasks[1].SparkPythonTask.PythonFile)
}

View File

@ -1,29 +0,0 @@
package config_tests
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestOverridePipelineClusterDev(t *testing.T) {
b := loadTarget(t, "./override_pipeline_cluster", "development")
assert.Equal(t, "job", b.Config.Resources.Pipelines["foo"].Name)
assert.Len(t, b.Config.Resources.Pipelines["foo"].Clusters, 1)
c := b.Config.Resources.Pipelines["foo"].Clusters[0]
assert.Equal(t, map[string]string{"foo": "bar"}, c.SparkConf)
assert.Equal(t, "i3.xlarge", c.NodeTypeId)
assert.Equal(t, 1, c.NumWorkers)
}
func TestOverridePipelineClusterStaging(t *testing.T) {
b := loadTarget(t, "./override_pipeline_cluster", "staging")
assert.Equal(t, "job", b.Config.Resources.Pipelines["foo"].Name)
assert.Len(t, b.Config.Resources.Pipelines["foo"].Clusters, 1)
c := b.Config.Resources.Pipelines["foo"].Clusters[0]
assert.Equal(t, map[string]string{"foo": "bar"}, c.SparkConf)
assert.Equal(t, "i3.2xlarge", c.NodeTypeId)
assert.Equal(t, 4, c.NumWorkers)
}

View File

@ -1,206 +0,0 @@
package config_tests
import (
"context"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/databricks-sdk-go/experimental/mocks"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
)
func TestVariables(t *testing.T) {
t.Setenv("BUNDLE_VAR_b", "def")
b := load(t, "./variables/vanilla")
diags := bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SetVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
require.NoError(t, diags.Error())
assert.Equal(t, "abc def", b.Config.Bundle.Name)
}
func TestVariablesLoadingFailsWhenRequiredVariableIsNotSpecified(t *testing.T) {
b := load(t, "./variables/vanilla")
diags := bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SetVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
assert.ErrorContains(t, diags.Error(), "no value assigned to required variable b. Assignment can be done through the \"--var\" flag or by setting the BUNDLE_VAR_b environment variable")
}
func TestVariablesTargetsBlockOverride(t *testing.T) {
b := load(t, "./variables/env_overrides")
diags := bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SelectTarget("env-with-single-variable-override"),
mutator.SetVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
require.NoError(t, diags.Error())
assert.Equal(t, "default-a dev-b", b.Config.Workspace.Profile)
}
func TestVariablesTargetsBlockOverrideForMultipleVariables(t *testing.T) {
b := load(t, "./variables/env_overrides")
diags := bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SelectTarget("env-with-two-variable-overrides"),
mutator.SetVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
require.NoError(t, diags.Error())
assert.Equal(t, "prod-a prod-b", b.Config.Workspace.Profile)
}
func TestVariablesTargetsBlockOverrideWithProcessEnvVars(t *testing.T) {
t.Setenv("BUNDLE_VAR_b", "env-var-b")
b := load(t, "./variables/env_overrides")
diags := bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SelectTarget("env-with-two-variable-overrides"),
mutator.SetVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
require.NoError(t, diags.Error())
assert.Equal(t, "prod-a env-var-b", b.Config.Workspace.Profile)
}
func TestVariablesTargetsBlockOverrideWithMissingVariables(t *testing.T) {
b := load(t, "./variables/env_overrides")
diags := bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SelectTarget("env-missing-a-required-variable-assignment"),
mutator.SetVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
assert.ErrorContains(t, diags.Error(), "no value assigned to required variable b. Assignment can be done through the \"--var\" flag or by setting the BUNDLE_VAR_b environment variable")
}
func TestVariablesTargetsBlockOverrideWithUndefinedVariables(t *testing.T) {
b := load(t, "./variables/env_overrides")
diags := bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SelectTarget("env-using-an-undefined-variable"),
mutator.SetVariables(),
mutator.ResolveVariableReferences(
"variables",
),
))
assert.ErrorContains(t, diags.Error(), "variable c is not defined but is assigned a value")
}
func TestVariablesWithoutDefinition(t *testing.T) {
t.Setenv("BUNDLE_VAR_a", "foo")
t.Setenv("BUNDLE_VAR_b", "bar")
b := load(t, "./variables/without_definition")
diags := bundle.Apply(context.Background(), b, mutator.SetVariables())
require.NoError(t, diags.Error())
require.True(t, b.Config.Variables["a"].HasValue())
require.True(t, b.Config.Variables["b"].HasValue())
assert.Equal(t, "foo", b.Config.Variables["a"].Value)
assert.Equal(t, "bar", b.Config.Variables["b"].Value)
}
func TestVariablesWithTargetLookupOverrides(t *testing.T) {
b := load(t, "./variables/env_overrides")
mockWorkspaceClient := mocks.NewMockWorkspaceClient(t)
b.SetWorkpaceClient(mockWorkspaceClient.WorkspaceClient)
instancePoolApi := mockWorkspaceClient.GetMockInstancePoolsAPI()
instancePoolApi.EXPECT().GetByInstancePoolName(mock.Anything, "some-test-instance-pool").Return(&compute.InstancePoolAndStats{
InstancePoolId: "1234",
}, nil)
clustersApi := mockWorkspaceClient.GetMockClustersAPI()
clustersApi.EXPECT().ListAll(mock.Anything, compute.ListClustersRequest{
FilterBy: &compute.ListClustersFilterBy{
ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi},
},
}).Return([]compute.ClusterDetails{
{ClusterId: "4321", ClusterName: "some-test-cluster"},
{ClusterId: "9876", ClusterName: "some-other-cluster"},
}, nil)
clusterPoliciesApi := mockWorkspaceClient.GetMockClusterPoliciesAPI()
clusterPoliciesApi.EXPECT().GetByName(mock.Anything, "some-test-cluster-policy").Return(&compute.Policy{
PolicyId: "9876",
}, nil)
diags := bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SelectTarget("env-overrides-lookup"),
mutator.SetVariables(),
mutator.ResolveResourceReferences(),
))
require.NoError(t, diags.Error())
assert.Equal(t, "4321", b.Config.Variables["d"].Value)
assert.Equal(t, "1234", b.Config.Variables["e"].Value)
assert.Equal(t, "9876", b.Config.Variables["f"].Value)
}
func TestVariableTargetOverrides(t *testing.T) {
tcases := []struct {
targetName string
pipelineName string
pipelineContinuous bool
pipelineNumWorkers int
}{
{
"use-default-variable-values",
"a_string",
true,
42,
},
{
"override-string-variable",
"overridden_string",
true,
42,
},
{
"override-int-variable",
"a_string",
true,
43,
},
{
"override-both-bool-and-string-variables",
"overridden_string",
false,
42,
},
}
for _, tcase := range tcases {
t.Run(tcase.targetName, func(t *testing.T) {
b := loadTarget(t, "./variables/variable_overrides_in_target", tcase.targetName)
diags := bundle.Apply(context.Background(), b, bundle.Seq(
mutator.SetVariables(),
mutator.ResolveVariableReferences("variables")),
)
require.NoError(t, diags.Error())
assert.Equal(t, tcase.pipelineName, b.Config.Resources.Pipelines["my_pipeline"].Name)
assert.Equal(t, tcase.pipelineContinuous, b.Config.Resources.Pipelines["my_pipeline"].Continuous)
assert.Equal(t, tcase.pipelineNumWorkers, b.Config.Resources.Pipelines["my_pipeline"].Clusters[0].NumWorkers)
})
}
}
func TestBundleWithEmptyVariableLoads(t *testing.T) {
b := load(t, "./variables/empty")
diags := bundle.Apply(context.Background(), b, mutator.SetVariables())
require.ErrorContains(t, diags.Error(), "no value assigned to required variable a")
}

6
libs/env/context.go vendored
View File

@ -65,7 +65,7 @@ func Set(ctx context.Context, key, value string) context.Context {
return setMap(ctx, m)
}
func homeEnvVar() string {
func HomeEnvVar() string {
if runtime.GOOS == "windows" {
return "USERPROFILE"
}
@ -73,14 +73,14 @@ func homeEnvVar() string {
}
func WithUserHomeDir(ctx context.Context, value string) context.Context {
return Set(ctx, homeEnvVar(), value)
return Set(ctx, HomeEnvVar(), value)
}
// ErrNoHomeEnv indicates the absence of $HOME env variable
var ErrNoHomeEnv = errors.New("$HOME is not set")
func UserHomeDir(ctx context.Context) (string, error) {
home := Get(ctx, homeEnvVar())
home := Get(ctx, HomeEnvVar())
if home == "" {
return "", ErrNoHomeEnv
}

View File

@ -25,9 +25,11 @@ func AssertEqualTexts(t testutil.TestingT, filename1, filename2, expected, out s
} else {
// only show diff for large texts
diff := UnifiedDiff(filename1, filename2, expected, out)
if diff != "" {
t.Errorf("Diff:\n" + diff)
}
}
}
func AssertEqualJQ(t testutil.TestingT, expectedName, outName, expected, out string, ignorePaths []string) {
t.Helper()