From 6771ba09a699b3890316cf8f849b3a51733750e4 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 20 Aug 2024 11:33:03 +0200 Subject: [PATCH 01/37] Correctly mark package names with versions as remote libraries (#1697) ## Changes Fixes https://github.com/databricks/setup-cli/issues/124 ## Tests Added regression test --- bundle/libraries/local_path.go | 5 +++++ bundle/libraries/local_path_test.go | 1 + 2 files changed, 6 insertions(+) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 5b5ec6c0..3e32adfd 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -66,6 +66,11 @@ func IsLibraryLocal(dep string) bool { } func isPackage(name string) bool { + // If the dependency has ==, it's a package with version + if strings.Contains(name, "==") { + return true + } + // If the dependency has no extension, it's a PyPi package name return path.Ext(name) == "" } diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index be4028d5..7299cdc9 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -54,6 +54,7 @@ func TestIsLibraryLocal(t *testing.T) { {path: "-r /Workspace/my_project/requirements.txt", expected: false}, {path: "s3://mybucket/path/to/package", expected: false}, {path: "dbfs:/mnt/path/to/package", expected: false}, + {path: "beautifulsoup4==4.12.3", expected: false}, } for i, tc := range testCases { From af5048e73efab56dd2a13a02132e78d3ee84c5e7 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 20 Aug 2024 14:54:56 +0200 Subject: [PATCH 02/37] Share test initializer in common helper function (#1695) ## Changes These tests inadvertently re-ran mutators, the first time through `loadTarget` and the second time by running `phases.Initialize()` themselves. Some of the mutators that are executed in `phases.Initialize()` are also run as part of `loadTarget`. This is overdue a refactor to make it unambiguous what runs when. Until then, this removes the duplicated execution. ## Tests Unit tests pass. --- bundle/tests/loader.go | 29 +++++++++++++++ bundle/tests/pipeline_glob_paths_test.go | 37 +------------------ .../tests/relative_path_translation_test.go | 29 +-------------- 3 files changed, 33 insertions(+), 62 deletions(-) diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index 069f0935..848132a1 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -8,6 +8,10 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) @@ -43,3 +47,28 @@ func loadTargetWithDiags(path, env string) (*bundle.Bundle, diag.Diagnostics) { )) return b, diags } + +func configureMock(t *testing.T, b *bundle.Bundle) { + // Configure mock workspace client + m := mocks.NewMockWorkspaceClient(t) + m.WorkspaceClient.Config = &config.Config{ + Host: "https://mock.databricks.workspace.com", + } + m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ + UserName: "user@domain.com", + }, nil) + b.SetWorkpaceClient(m.WorkspaceClient) +} + +func initializeTarget(t *testing.T, path, env string) (*bundle.Bundle, diag.Diagnostics) { + b := load(t, path) + configureMock(t, b) + + ctx := context.Background() + diags := bundle.Apply(ctx, b, bundle.Seq( + mutator.SelectTarget(env), + phases.Initialize(), + )) + + return b, diags +} diff --git a/bundle/tests/pipeline_glob_paths_test.go b/bundle/tests/pipeline_glob_paths_test.go index bf5039b5..c1c62cfb 100644 --- a/bundle/tests/pipeline_glob_paths_test.go +++ b/bundle/tests/pipeline_glob_paths_test.go @@ -1,33 +1,13 @@ package config_tests import ( - "context" "testing" - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/phases" - "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" - "github.com/databricks/databricks-sdk-go/service/iam" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) func TestExpandPipelineGlobPaths(t *testing.T) { - b := loadTarget(t, "./pipeline_glob_paths", "default") - - // Configure mock workspace client - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &config.Config{ - Host: "https://mock.databricks.workspace.com", - } - m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ - UserName: "user@domain.com", - }, nil) - b.SetWorkpaceClient(m.WorkspaceClient) - - ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Initialize()) + b, diags := initializeTarget(t, "./pipeline_glob_paths", "default") require.NoError(t, diags.Error()) require.Equal( t, @@ -37,19 +17,6 @@ func TestExpandPipelineGlobPaths(t *testing.T) { } func TestExpandPipelineGlobPathsWithNonExistent(t *testing.T) { - b := loadTarget(t, "./pipeline_glob_paths", "error") - - // Configure mock workspace client - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &config.Config{ - Host: "https://mock.databricks.workspace.com", - } - m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ - UserName: "user@domain.com", - }, nil) - b.SetWorkpaceClient(m.WorkspaceClient) - - ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Initialize()) + _, diags := initializeTarget(t, "./pipeline_glob_paths", "error") require.ErrorContains(t, diags.Error(), "notebook ./non-existent not found") } diff --git a/bundle/tests/relative_path_translation_test.go b/bundle/tests/relative_path_translation_test.go index d5b80bea..199871d2 100644 --- a/bundle/tests/relative_path_translation_test.go +++ b/bundle/tests/relative_path_translation_test.go @@ -1,36 +1,14 @@ package config_tests import ( - "context" "testing" - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/phases" - "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" - "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) -func configureMock(t *testing.T, b *bundle.Bundle) { - // Configure mock workspace client - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &config.Config{ - Host: "https://mock.databricks.workspace.com", - } - m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ - UserName: "user@domain.com", - }, nil) - b.SetWorkpaceClient(m.WorkspaceClient) -} - func TestRelativePathTranslationDefault(t *testing.T) { - b := loadTarget(t, "./relative_path_translation", "default") - configureMock(t, b) - - diags := bundle.Apply(context.Background(), b, phases.Initialize()) + b, diags := initializeTarget(t, "./relative_path_translation", "default") require.NoError(t, diags.Error()) t0 := b.Config.Resources.Jobs["job"].Tasks[0] @@ -40,10 +18,7 @@ func TestRelativePathTranslationDefault(t *testing.T) { } func TestRelativePathTranslationOverride(t *testing.T) { - b := loadTarget(t, "./relative_path_translation", "override") - configureMock(t, b) - - diags := bundle.Apply(context.Background(), b, phases.Initialize()) + b, diags := initializeTarget(t, "./relative_path_translation", "override") require.NoError(t, diags.Error()) t0 := b.Config.Resources.Jobs["job"].Tasks[0] From 44902fa3501033928a5ec46dbfcf4cb23f739788 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Tue, 20 Aug 2024 15:26:57 +0200 Subject: [PATCH 03/37] Make `pydabs/venv_path` optional (#1687) ## Changes Make `pydabs/venv_path` optional. When not specified, CLI detects the Python interpreter using `python.DetectExecutable`, the same way as for `artifacts`. `python.DetectExecutable` works correctly if a virtual environment is activated or `python3` is available on PATH through other means. Extract the venv detection code from PyDABs into `libs/python/detect`. This code will be used when we implement the `python/venv_path` section in `databricks.yml`. ## Tests Unit tests and manually --------- Co-authored-by: Pieter Noordhuis --- bundle/artifacts/whl/infer.go | 2 + bundle/config/experimental.go | 4 +- .../config/mutator/python/python_mutator.go | 33 ++++++------- .../mutator/python/python_mutator_test.go | 21 +++++++-- libs/python/detect.go | 46 +++++++++++++++++++ libs/python/detect_test.go | 46 +++++++++++++++++++ 6 files changed, 128 insertions(+), 24 deletions(-) create mode 100644 libs/python/detect_test.go diff --git a/bundle/artifacts/whl/infer.go b/bundle/artifacts/whl/infer.go index dd4ad295..cb727de0 100644 --- a/bundle/artifacts/whl/infer.go +++ b/bundle/artifacts/whl/infer.go @@ -15,6 +15,8 @@ type infer struct { func (m *infer) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { artifact := b.Config.Artifacts[m.name] + + // TODO use python.DetectVEnvExecutable once bundle has a way to specify venv path py, err := python.DetectExecutable(ctx) if err != nil { return diag.FromErr(err) diff --git a/bundle/config/experimental.go b/bundle/config/experimental.go index 66e97582..061bbdae 100644 --- a/bundle/config/experimental.go +++ b/bundle/config/experimental.go @@ -36,8 +36,8 @@ type PyDABs struct { // VEnvPath is path to the virtual environment. // - // Required if PyDABs is enabled. PyDABs will load the code in the specified - // environment. + // If enabled, PyDABs will execute code within this environment. If disabled, + // it defaults to using the Python interpreter available in the current shell. VEnvPath string `json:"venv_path,omitempty"` // Import contains a list Python packages with PyDABs code. diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index f9febe5b..4f44df0a 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -7,8 +7,8 @@ import ( "fmt" "os" "path/filepath" - "runtime" + "github.com/databricks/cli/libs/python" "github.com/databricks/databricks-sdk-go/logger" "github.com/databricks/cli/bundle/env" @@ -86,23 +86,15 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return nil } - if experimental.PyDABs.VEnvPath == "" { - return diag.Errorf("\"experimental.pydabs.enabled\" can only be used when \"experimental.pydabs.venv_path\" is set") - } - // mutateDiags is used because Mutate returns 'error' instead of 'diag.Diagnostics' var mutateDiags diag.Diagnostics var mutateDiagsHasError = errors.New("unexpected error") err := b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) { - pythonPath := interpreterPath(experimental.PyDABs.VEnvPath) + pythonPath, err := detectExecutable(ctx, experimental.PyDABs.VEnvPath) - if _, err := os.Stat(pythonPath); err != nil { - if os.IsNotExist(err) { - return dyn.InvalidValue, fmt.Errorf("can't find %q, check if venv is created", pythonPath) - } else { - return dyn.InvalidValue, fmt.Errorf("can't find %q: %w", pythonPath, err) - } + if err != nil { + return dyn.InvalidValue, fmt.Errorf("failed to get Python interpreter path: %w", err) } cacheDir, err := createCacheDir(ctx) @@ -423,11 +415,16 @@ func isOmitemptyDelete(left dyn.Value) bool { } } -// interpreterPath returns platform-specific path to Python interpreter in the virtual environment. -func interpreterPath(venvPath string) string { - if runtime.GOOS == "windows" { - return filepath.Join(venvPath, "Scripts", "python3.exe") - } else { - return filepath.Join(venvPath, "bin", "python3") +// detectExecutable lookups Python interpreter in virtual environment, or if not set, in PATH. +func detectExecutable(ctx context.Context, venvPath string) (string, error) { + if venvPath == "" { + interpreter, err := python.DetectExecutable(ctx) + if err != nil { + return "", err + } + + return interpreter, nil } + + return python.DetectVEnvExecutable(venvPath) } diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index fbe835f9..ea02d1ce 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -282,7 +282,7 @@ func TestPythonMutator_venvRequired(t *testing.T) { } func TestPythonMutator_venvNotFound(t *testing.T) { - expectedError := fmt.Sprintf("can't find %q, check if venv is created", interpreterPath("bad_path")) + expectedError := fmt.Sprintf("failed to get Python interpreter path: can't find %q, check if virtualenv is created", interpreterPath("bad_path")) b := loadYaml("databricks.yml", ` experimental: @@ -596,9 +596,7 @@ func loadYaml(name string, content string) *bundle.Bundle { } } -func withFakeVEnv(t *testing.T, path string) { - interpreterPath := interpreterPath(path) - +func withFakeVEnv(t *testing.T, venvPath string) { cwd, err := os.Getwd() if err != nil { panic(err) @@ -608,6 +606,8 @@ func withFakeVEnv(t *testing.T, path string) { panic(err) } + interpreterPath := interpreterPath(venvPath) + err = os.MkdirAll(filepath.Dir(interpreterPath), 0755) if err != nil { panic(err) @@ -618,9 +618,22 @@ func withFakeVEnv(t *testing.T, path string) { panic(err) } + err = os.WriteFile(filepath.Join(venvPath, "pyvenv.cfg"), []byte(""), 0755) + if err != nil { + panic(err) + } + t.Cleanup(func() { if err := os.Chdir(cwd); err != nil { panic(err) } }) } + +func interpreterPath(venvPath string) string { + if runtime.GOOS == "windows" { + return filepath.Join(venvPath, "Scripts", "python3.exe") + } else { + return filepath.Join(venvPath, "bin", "python3") + } +} diff --git a/libs/python/detect.go b/libs/python/detect.go index b0c1475c..8fcc7cd9 100644 --- a/libs/python/detect.go +++ b/libs/python/detect.go @@ -3,9 +3,23 @@ package python import ( "context" "errors" + "fmt" + "io/fs" + "os" "os/exec" + "path/filepath" + "runtime" ) +// DetectExecutable looks up the path to the python3 executable from the PATH +// environment variable. +// +// If virtualenv is activated, executable from the virtualenv is returned, +// because activating virtualenv adds python3 executable on a PATH. +// +// If python3 executable is not found on the PATH, the interpreter with the +// least version that satisfies minimal 3.8 version is returned, e.g. +// python3.10. func DetectExecutable(ctx context.Context) (string, error) { // TODO: add a shortcut if .python-version file is detected somewhere in // the parent directory tree. @@ -32,3 +46,35 @@ func DetectExecutable(ctx context.Context) (string, error) { } return interpreter.Path, nil } + +// DetectVEnvExecutable returns the path to the python3 executable inside venvPath, +// that is not necessarily activated. +// +// If virtualenv is not created, or executable doesn't exist, the error is returned. +func DetectVEnvExecutable(venvPath string) (string, error) { + interpreterPath := filepath.Join(venvPath, "bin", "python3") + if runtime.GOOS == "windows" { + interpreterPath = filepath.Join(venvPath, "Scripts", "python3.exe") + } + + if _, err := os.Stat(interpreterPath); err != nil { + if errors.Is(err, fs.ErrNotExist) { + return "", fmt.Errorf("can't find %q, check if virtualenv is created", interpreterPath) + } else { + return "", fmt.Errorf("can't find %q: %w", interpreterPath, err) + } + } + + // pyvenv.cfg must be always present in correctly configured virtualenv, + // read more in https://snarky.ca/how-virtual-environments-work/ + pyvenvPath := filepath.Join(venvPath, "pyvenv.cfg") + if _, err := os.Stat(pyvenvPath); err != nil { + if errors.Is(err, fs.ErrNotExist) { + return "", fmt.Errorf("expected %q to be virtualenv, but pyvenv.cfg is missing", venvPath) + } else { + return "", fmt.Errorf("can't find %q: %w", pyvenvPath, err) + } + } + + return interpreterPath, nil +} diff --git a/libs/python/detect_test.go b/libs/python/detect_test.go new file mode 100644 index 00000000..78c7067f --- /dev/null +++ b/libs/python/detect_test.go @@ -0,0 +1,46 @@ +package python + +import ( + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDetectVEnvExecutable(t *testing.T) { + dir := t.TempDir() + interpreterPath := interpreterPath(dir) + + err := os.Mkdir(filepath.Dir(interpreterPath), 0755) + require.NoError(t, err) + + err = os.WriteFile(interpreterPath, []byte(""), 0755) + require.NoError(t, err) + + err = os.WriteFile(filepath.Join(dir, "pyvenv.cfg"), []byte(""), 0755) + require.NoError(t, err) + + executable, err := DetectVEnvExecutable(dir) + + assert.NoError(t, err) + assert.Equal(t, interpreterPath, executable) +} + +func TestDetectVEnvExecutable_badLayout(t *testing.T) { + dir := t.TempDir() + + _, err := DetectVEnvExecutable(dir) + + assert.Errorf(t, err, "can't find %q, check if virtualenv is created", interpreterPath(dir)) +} + +func interpreterPath(venvPath string) string { + if runtime.GOOS == "windows" { + return filepath.Join(venvPath, "Scripts", "python3.exe") + } else { + return filepath.Join(venvPath, "bin", "python3") + } +} From a4c1ba3e2827abca29034115436d441310b7ee33 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 21 Aug 2024 13:15:25 +0530 Subject: [PATCH 04/37] Use API mocks for duplicate path errors in workspace files extensions client (#1690) ## Changes `TestAccFilerWorkspaceFilesExtensionsErrorsOnDupName` recently started failing in our nightlies because the upstream `import` API was changed to [prohibit conflicting file paths](https://docs.databricks.com/en/release-notes/product/2024/august.html#files-can-no-longer-have-identical-names-in-workspace-folders). Because existing conflicting file paths can still be grandfathered in, we need to retain coverage for the test. To do this, this PR: 1. Removes the failing `TestAccFilerWorkspaceFilesExtensionsErrorsOnDupName` 2. Add an equivalent unit test with the `list` and `get-status` API calls mocked. --- internal/filer_test.go | 62 ------- libs/filer/workspace_files_client.go | 26 +-- .../workspace_files_extensions_client.go | 8 +- .../workspace_files_extensions_client_test.go | 151 ++++++++++++++++++ 4 files changed, 172 insertions(+), 75 deletions(-) create mode 100644 libs/filer/workspace_files_extensions_client_test.go diff --git a/internal/filer_test.go b/internal/filer_test.go index 27530425..bc4c9480 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -5,7 +5,6 @@ import ( "context" "encoding/json" "errors" - "fmt" "io" "io/fs" "path" @@ -722,67 +721,6 @@ func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) { assert.ErrorIs(t, err, fs.ErrNotExist) } -func TestAccFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { - t.Parallel() - - tcases := []struct { - files []struct{ name, content string } - name string - }{ - { - name: "python", - files: []struct{ name, content string }{ - {"foo.py", "print('foo')"}, - {"foo.py", "# Databricks notebook source\nprint('foo')"}, - }, - }, - { - name: "r", - files: []struct{ name, content string }{ - {"foo.r", "print('foo')"}, - {"foo.r", "# Databricks notebook source\nprint('foo')"}, - }, - }, - { - name: "sql", - files: []struct{ name, content string }{ - {"foo.sql", "SELECT 'foo'"}, - {"foo.sql", "-- Databricks notebook source\nSELECT 'foo'"}, - }, - }, - { - name: "scala", - files: []struct{ name, content string }{ - {"foo.scala", "println('foo')"}, - {"foo.scala", "// Databricks notebook source\nprintln('foo')"}, - }, - }, - // We don't need to test this for ipynb notebooks. The import API - // fails when the file extension is .ipynb but the content is not a - // valid juptyer notebook. - } - - for i := range tcases { - tc := tcases[i] - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - ctx := context.Background() - wf, tmpDir := setupWsfsExtensionsFiler(t) - - for _, f := range tc.files { - err := wf.Write(ctx, f.name, strings.NewReader(f.content), filer.CreateParentDirectories) - require.NoError(t, err) - } - - _, err := wf.ReadDir(ctx, ".") - assert.ErrorAs(t, err, &filer.DuplicatePathError{}) - assert.ErrorContains(t, err, fmt.Sprintf("failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at %s and FILE at %s resolve to the same name %s. Changing the name of one of these objects will resolve this issue", path.Join(tmpDir, "foo"), path.Join(tmpDir, tc.files[0].name), tc.files[0].name)) - }) - } - -} - func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) { t.Parallel() diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index e911f440..d8ab5a6b 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -102,13 +102,21 @@ func (info *wsfsFileInfo) MarshalJSON() ([]byte, error) { return marshal.Marshal(info) } +// Interface for *client.DatabricksClient from the Databricks Go SDK. Abstracted +// as an interface to allow for mocking in tests. +type apiClient interface { + Do(ctx context.Context, method, path string, + headers map[string]string, request, response any, + visitors ...func(*http.Request) error) error +} + // WorkspaceFilesClient implements the files-in-workspace API. // NOTE: This API is available for files under /Repos if a workspace has files-in-repos enabled. // It can access any workspace path if files-in-workspace is enabled. -type WorkspaceFilesClient struct { +type workspaceFilesClient struct { workspaceClient *databricks.WorkspaceClient - apiClient *client.DatabricksClient + apiClient apiClient // File operations will be relative to this path. root WorkspaceRootPath @@ -120,7 +128,7 @@ func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, return nil, err } - return &WorkspaceFilesClient{ + return &workspaceFilesClient{ workspaceClient: w, apiClient: apiClient, @@ -128,7 +136,7 @@ func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, }, nil } -func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { +func (w *workspaceFilesClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { absPath, err := w.root.Join(name) if err != nil { return err @@ -198,7 +206,7 @@ func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io return err } -func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { +func (w *workspaceFilesClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err @@ -222,7 +230,7 @@ func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.ReadCl return w.workspaceClient.Workspace.Download(ctx, absPath) } -func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { +func (w *workspaceFilesClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { absPath, err := w.root.Join(name) if err != nil { return err @@ -266,7 +274,7 @@ func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string, mode ... return err } -func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { +func (w *workspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err @@ -299,7 +307,7 @@ func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.D return wsfsDirEntriesFromObjectInfos(objects), nil } -func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error { +func (w *workspaceFilesClient) Mkdir(ctx context.Context, name string) error { dirPath, err := w.root.Join(name) if err != nil { return err @@ -309,7 +317,7 @@ func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error { }) } -func (w *WorkspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { +func (w *workspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 844e736b..b24ecf7e 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -133,14 +133,14 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con }, nil } -type DuplicatePathError struct { +type duplicatePathError struct { oi1 workspace.ObjectInfo oi2 workspace.ObjectInfo commonName string } -func (e DuplicatePathError) Error() string { +func (e duplicatePathError) Error() string { return fmt.Sprintf("failed to read files from the workspace file system. Duplicate paths encountered. Both %s at %s and %s at %s resolve to the same name %s. Changing the name of one of these objects will resolve this issue", e.oi1.ObjectType, e.oi1.Path, e.oi2.ObjectType, e.oi2.Path, e.commonName) } @@ -157,7 +157,7 @@ func (e ReadOnlyError) Error() string { // delete, and stat notebooks (and files in general) in the workspace, using their paths // with the extension included. // -// The ReadDir method returns a DuplicatePathError if this traditional file system view is +// The ReadDir method returns a duplicatePathError if this traditional file system view is // not possible. For example, a Python notebook called foo and a Python file called `foo.py` // would resolve to the same path `foo.py` in a tradition file system. // @@ -220,7 +220,7 @@ func (w *workspaceFilesExtensionsClient) ReadDir(ctx context.Context, name strin // Error if we have seen this path before in the current directory. // If not seen before, add it to the seen paths. if _, ok := seenPaths[entries[i].Name()]; ok { - return nil, DuplicatePathError{ + return nil, duplicatePathError{ oi1: seenPaths[entries[i].Name()], oi2: sysInfo, commonName: path.Join(name, entries[i].Name()), diff --git a/libs/filer/workspace_files_extensions_client_test.go b/libs/filer/workspace_files_extensions_client_test.go new file mode 100644 index 00000000..321c4371 --- /dev/null +++ b/libs/filer/workspace_files_extensions_client_test.go @@ -0,0 +1,151 @@ +package filer + +import ( + "context" + "net/http" + "testing" + + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" +) + +// Mocks client.DatabricksClient from the databricks-sdk-go package. +type mockApiClient struct { + mock.Mock +} + +func (m *mockApiClient) Do(ctx context.Context, method, path string, + headers map[string]string, request any, response any, + visitors ...func(*http.Request) error) error { + args := m.Called(ctx, method, path, headers, request, response, visitors) + + // Set the http response from a value provided in the mock call. + p := response.(*wsfsFileInfo) + *p = args.Get(1).(wsfsFileInfo) + return args.Error(0) +} + +func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { + for _, tc := range []struct { + name string + language workspace.Language + notebookExportFormat workspace.ExportFormat + notebookPath string + filePath string + expectedError string + }{ + { + name: "python source notebook and file", + language: workspace.LanguagePython, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.py", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue", + }, + { + name: "python jupyter notebook and file", + language: workspace.LanguagePython, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.py", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "scala source notebook and file", + language: workspace.LanguageScala, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.scala", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue", + }, + { + name: "r source notebook and file", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.r", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue", + }, + { + name: "sql source notebook and file", + language: workspace.LanguageSql, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.sql", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue", + }, + { + name: "python jupyter notebook and file", + language: workspace.LanguagePython, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + } { + t.Run(tc.name, func(t *testing.T) { + mockedWorkspaceClient := mocks.NewMockWorkspaceClient(t) + mockedApiClient := mockApiClient{} + + // Mock the workspace API's ListAll method. + workspaceApi := mockedWorkspaceClient.GetMockWorkspaceAPI() + workspaceApi.EXPECT().ListAll(mock.Anything, workspace.ListWorkspaceRequest{ + Path: "/dir", + }).Return([]workspace.ObjectInfo{ + { + Path: tc.filePath, + Language: tc.language, + ObjectType: workspace.ObjectTypeFile, + }, + { + Path: tc.notebookPath, + Language: tc.language, + ObjectType: workspace.ObjectTypeNotebook, + }, + }, nil) + + // Mock bespoke API calls to /api/2.0/workspace/get-status, that are + // used to figure out the right file extension for the notebook. + statNotebook := wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: tc.notebookPath, + Language: tc.language, + ObjectType: workspace.ObjectTypeNotebook, + }, + ReposExportFormat: tc.notebookExportFormat, + } + + mockedApiClient.On("Do", mock.Anything, http.MethodGet, "/api/2.0/workspace/get-status", map[string]string(nil), map[string]string{ + "path": tc.notebookPath, + "return_export_info": "true", + }, mock.AnythingOfType("*filer.wsfsFileInfo"), []func(*http.Request) error(nil)).Return(nil, statNotebook) + + workspaceFilesClient := workspaceFilesClient{ + workspaceClient: mockedWorkspaceClient.WorkspaceClient, + apiClient: &mockedApiClient, + root: NewWorkspaceRootPath("/dir"), + } + + workspaceFilesExtensionsClient := workspaceFilesExtensionsClient{ + workspaceClient: mockedWorkspaceClient.WorkspaceClient, + wsfs: &workspaceFilesClient, + } + + _, err := workspaceFilesExtensionsClient.ReadDir(context.Background(), "/") + + if tc.expectedError == "" { + assert.NoError(t, err) + } else { + assert.ErrorAs(t, err, &duplicatePathError{}) + assert.EqualError(t, err, tc.expectedError) + } + + // assert the mocked methods were actually called, as a sanity check. + workspaceApi.AssertNumberOfCalls(t, "ListAll", 1) + mockedApiClient.AssertNumberOfCalls(t, "Do", 1) + }) + } +} From c775d251eda6fa567de95e55e4558d5b99abce39 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 21 Aug 2024 10:22:35 +0200 Subject: [PATCH 05/37] Improves detection of PyPI package names in environment dependencies (#1699) ## Changes Improves detection of PyPi package names in environment dependencies ## Tests Added unit tests --- bundle/libraries/local_path.go | 22 ++++++++++++++++++---- bundle/libraries/local_path_test.go | 9 +++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 3e32adfd..417bce10 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -3,6 +3,7 @@ package libraries import ( "net/url" "path" + "regexp" "strings" ) @@ -65,14 +66,27 @@ func IsLibraryLocal(dep string) bool { return IsLocalPath(dep) } +// ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). +// \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. +// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?)?: Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +// Spec for package name and version specifier: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ +var packageRegex = regexp.MustCompile(`^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$`) + func isPackage(name string) bool { - // If the dependency has ==, it's a package with version - if strings.Contains(name, "==") { + if packageRegex.MatchString(name) { return true } - // If the dependency has no extension, it's a PyPi package name - return path.Ext(name) == "" + return isUrlBasedLookup(name) +} + +func isUrlBasedLookup(name string) bool { + parts := strings.Split(name, " @ ") + if len(parts) != 2 { + return false + } + + return packageRegex.MatchString(parts[0]) && isRemoteStorageScheme(parts[1]) } func isRemoteStorageScheme(path string) bool { diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index 7299cdc9..7f84b324 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -54,7 +54,16 @@ func TestIsLibraryLocal(t *testing.T) { {path: "-r /Workspace/my_project/requirements.txt", expected: false}, {path: "s3://mybucket/path/to/package", expected: false}, {path: "dbfs:/mnt/path/to/package", expected: false}, + {path: "beautifulsoup4", expected: false}, {path: "beautifulsoup4==4.12.3", expected: false}, + {path: "beautifulsoup4 >= 4.12.3", expected: false}, + {path: "beautifulsoup4 < 4.12.3", expected: false}, + {path: "beautifulsoup4 ~= 4.12.3", expected: false}, + {path: "beautifulsoup4[security, tests]", expected: false}, + {path: "beautifulsoup4[security, tests] ~= 4.12.3", expected: false}, + {path: "https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, + {path: "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, + {path: "requests [security] @ https://github.com/psf/requests/archive/refs/heads/main.zip", expected: false}, } for i, tc := range testCases { From 192f33bb13a156bebf9d7d2c2b06092d8ae9775d Mon Sep 17 00:00:00 2001 From: Witold Czaplewski Date: Wed, 21 Aug 2024 12:03:56 +0200 Subject: [PATCH 06/37] [DAB] Add support for requirements libraries in Job Tasks (#1543) ## Changes While experimenting with DAB I discovered that requirements libraries are being ignored. One thing worth mentioning is that `bundle validate` runs successfully, but `bundle deploy` fails. This PR only covers the second part. ## Tests Added a unit test --- bundle/config/mutator/translate_paths_jobs.go | 5 +++++ bundle/config/mutator/translate_paths_test.go | 9 +++++++++ bundle/libraries/helpers.go | 3 +++ bundle/libraries/helpers_test.go | 1 + 4 files changed, 18 insertions(+) diff --git a/bundle/config/mutator/translate_paths_jobs.go b/bundle/config/mutator/translate_paths_jobs.go index 6febf4f8..e34eeb2f 100644 --- a/bundle/config/mutator/translate_paths_jobs.go +++ b/bundle/config/mutator/translate_paths_jobs.go @@ -50,6 +50,11 @@ func rewritePatterns(t *translateContext, base dyn.Pattern) []jobRewritePattern t.translateNoOp, noSkipRewrite, }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")), + t.translateFilePath, + noSkipRewrite, + }, } } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 780a540d..fd64593b 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -110,6 +110,7 @@ func TestTranslatePaths(t *testing.T) { touchNotebookFile(t, filepath.Join(dir, "my_pipeline_notebook.py")) touchEmptyFile(t, filepath.Join(dir, "my_python_file.py")) touchEmptyFile(t, filepath.Join(dir, "dist", "task.jar")) + touchEmptyFile(t, filepath.Join(dir, "requirements.txt")) b := &bundle.Bundle{ RootPath: dir, @@ -140,6 +141,9 @@ func TestTranslatePaths(t *testing.T) { NotebookTask: &jobs.NotebookTask{ NotebookPath: "./my_job_notebook.py", }, + Libraries: []compute.Library{ + {Requirements: "./requirements.txt"}, + }, }, { PythonWheelTask: &jobs.PythonWheelTask{ @@ -232,6 +236,11 @@ func TestTranslatePaths(t *testing.T) { "/bundle/my_job_notebook", b.Config.Resources.Jobs["job"].Tasks[2].NotebookTask.NotebookPath, ) + assert.Equal( + t, + "/bundle/requirements.txt", + b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements, + ) assert.Equal( t, "/bundle/my_python_file.py", diff --git a/bundle/libraries/helpers.go b/bundle/libraries/helpers.go index 89679c91..b7e707cc 100644 --- a/bundle/libraries/helpers.go +++ b/bundle/libraries/helpers.go @@ -12,5 +12,8 @@ func libraryPath(library *compute.Library) string { if library.Egg != "" { return library.Egg } + if library.Requirements != "" { + return library.Requirements + } return "" } diff --git a/bundle/libraries/helpers_test.go b/bundle/libraries/helpers_test.go index adc20a24..e4bd3277 100644 --- a/bundle/libraries/helpers_test.go +++ b/bundle/libraries/helpers_test.go @@ -13,5 +13,6 @@ func TestLibraryPath(t *testing.T) { assert.Equal(t, path, libraryPath(&compute.Library{Whl: path})) assert.Equal(t, path, libraryPath(&compute.Library{Jar: path})) assert.Equal(t, path, libraryPath(&compute.Library{Egg: path})) + assert.Equal(t, path, libraryPath(&compute.Library{Requirements: path})) assert.Equal(t, "", libraryPath(&compute.Library{})) } From f5df211320a5fad876c58737d959a0a034040c63 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 21 Aug 2024 18:23:54 +0530 Subject: [PATCH 07/37] Fix prefix preset used for UC schemas (#1704) ## Changes In https://github.com/databricks/cli/pull/1490 we regressed and started using the development mode prefix for UC schemas regardless of the mode of the bundle target. This PR fixes the regression and adds a regression test ## Tests Failing integration tests pass now. --- bundle/config/mutator/apply_presets.go | 3 +- bundle/config/mutator/apply_presets_test.go | 57 +++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/bundle/config/mutator/apply_presets.go b/bundle/config/mutator/apply_presets.go index 42e6ab95..28d015c1 100644 --- a/bundle/config/mutator/apply_presets.go +++ b/bundle/config/mutator/apply_presets.go @@ -155,8 +155,7 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos // Schemas: Prefix for i := range r.Schemas { - prefix = "dev_" + b.Config.Workspace.CurrentUser.ShortName + "_" - r.Schemas[i].Name = prefix + r.Schemas[i].Name + r.Schemas[i].Name = normalizePrefix(prefix) + r.Schemas[i].Name // HTTP API for schemas doesn't yet support tags. It's only supported in // the Databricks UI and via the SQL API. } diff --git a/bundle/config/mutator/apply_presets_test.go b/bundle/config/mutator/apply_presets_test.go index 35dac1f7..ab2478ae 100644 --- a/bundle/config/mutator/apply_presets_test.go +++ b/bundle/config/mutator/apply_presets_test.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/require" ) @@ -68,6 +69,62 @@ func TestApplyPresetsPrefix(t *testing.T) { } } +func TestApplyPresetsPrefixForUcSchema(t *testing.T) { + tests := []struct { + name string + prefix string + schema *resources.Schema + want string + }{ + { + name: "add prefix to schema", + prefix: "[prefix]", + schema: &resources.Schema{ + CreateSchema: &catalog.CreateSchema{ + Name: "schema1", + }, + }, + want: "prefix_schema1", + }, + { + name: "add empty prefix to schema", + prefix: "", + schema: &resources.Schema{ + CreateSchema: &catalog.CreateSchema{ + Name: "schema1", + }, + }, + want: "schema1", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": tt.schema, + }, + }, + Presets: config.Presets{ + NamePrefix: tt.prefix, + }, + }, + } + + ctx := context.Background() + diag := bundle.Apply(ctx, b, mutator.ApplyPresets()) + + if diag.HasError() { + t.Fatalf("unexpected error: %v", diag) + } + + require.Equal(t, tt.want, b.Config.Resources.Schemas["schema1"].Name) + }) + } +} + func TestApplyPresetsTags(t *testing.T) { tests := []struct { name string From 6f345293b1e5f4febcd702da8a362b15b606ebd9 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 21 Aug 2024 17:05:49 +0200 Subject: [PATCH 08/37] Added filtering flags for cluster list commands (#1703) ## Changes Fixes #1701 ## Tests ``` Usage: databricks clusters list [flags] Flags: --cluster-sources []string Filter clusters by source --cluster-states []string Filter clusters by states -h, --help help for list --is-pinned Filter clusters by pinned status --page-size int Use this field to specify the maximum number of results to be returned by the server. --page-token string Use next_page_token or prev_page_token returned from the previous request to list the next or previous page of clusters respectively. --policy-id string Filter clusters by policy id ``` --- cmd/workspace/clusters/overrides.go | 68 ++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/cmd/workspace/clusters/overrides.go b/cmd/workspace/clusters/overrides.go index 55976d40..6038978a 100644 --- a/cmd/workspace/clusters/overrides.go +++ b/cmd/workspace/clusters/overrides.go @@ -1,17 +1,83 @@ package clusters import ( + "strings" + "github.com/databricks/cli/libs/cmdio" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/spf13/cobra" ) -func listOverride(listCmd *cobra.Command, _ *compute.ListClustersRequest) { +// Below we add overrides for filter flags for cluster list command to allow for custom filtering +// Auto generating such flags is not yet supported by the CLI generator +func listOverride(listCmd *cobra.Command, listReq *compute.ListClustersRequest) { listCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` {{header "ID"}} {{header "Name"}} {{header "State"}}`) listCmd.Annotations["template"] = cmdio.Heredoc(` {{range .}}{{.ClusterId | green}} {{.ClusterName | cyan}} {{if eq .State "RUNNING"}}{{green "%s" .State}}{{else if eq .State "TERMINATED"}}{{red "%s" .State}}{{else}}{{blue "%s" .State}}{{end}} {{end}}`) + + listReq.FilterBy = &compute.ListClustersFilterBy{} + listCmd.Flags().BoolVar(&listReq.FilterBy.IsPinned, "is-pinned", false, "Filter clusters by pinned status") + listCmd.Flags().StringVar(&listReq.FilterBy.PolicyId, "policy-id", "", "Filter clusters by policy id") + + sources := &clusterSources{source: &listReq.FilterBy.ClusterSources} + listCmd.Flags().Var(sources, "cluster-sources", "Filter clusters by source") + + states := &clusterStates{state: &listReq.FilterBy.ClusterStates} + listCmd.Flags().Var(states, "cluster-states", "Filter clusters by states") +} + +type clusterSources struct { + source *[]compute.ClusterSource +} + +func (c *clusterSources) String() string { + s := make([]string, len(*c.source)) + for i, source := range *c.source { + s[i] = string(source) + } + + return strings.Join(s, ",") +} + +func (c *clusterSources) Set(value string) error { + splits := strings.Split(value, ",") + for _, split := range splits { + *c.source = append(*c.source, compute.ClusterSource(split)) + } + + return nil +} + +func (c *clusterSources) Type() string { + return "[]string" +} + +type clusterStates struct { + state *[]compute.State +} + +func (c *clusterStates) String() string { + s := make([]string, len(*c.state)) + for i, source := range *c.state { + s[i] = string(source) + } + + return strings.Join(s, ",") +} + +func (c *clusterStates) Set(value string) error { + splits := strings.Split(value, ",") + for _, split := range splits { + *c.state = append(*c.state, compute.State(split)) + } + + return nil +} + +func (c *clusterStates) Type() string { + return "[]string" } func listNodeTypesOverride(listNodeTypesCmd *cobra.Command) { From 6e8cd835a3f699ffec0c04e9301e3a49fd61fc9c Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 21 Aug 2024 17:33:25 +0200 Subject: [PATCH 09/37] Add paths field to bundle sync configuration (#1694) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Changes This field allows a user to configure paths to synchronize to the workspace. Allowed values are relative paths to files and directories anchored at the directory where the field is set. If one or more values traverse up the directory tree (to an ancestor of the bundle root directory), the CLI will dynamically determine the root path to use to ensure that the file tree structure remains intact. For example, given a `databricks.yml` in `my_bundle` that includes: ```yaml sync: paths: - ../common - . ``` Then upon synchronization, the workspace will look like: ``` . ├── common │ └── lib.py └── my_bundle ├── databricks.yml └── notebook.py ``` If not set behavior remains identical. ## Tests * Newly added unit tests for the mutators and under `bundle/tests`. * Manually confirmed a bundle without this configuration works the same. * Manually confirmed a bundle with this configuration works. --- bundle/bundle.go | 8 + bundle/bundle_read_only.go | 4 + bundle/config/mutator/configure_wsfs.go | 4 +- bundle/config/mutator/rewrite_sync_paths.go | 4 + .../config/mutator/rewrite_sync_paths_test.go | 16 ++ bundle/config/mutator/sync_default_path.go | 48 +++++ .../config/mutator/sync_default_path_test.go | 82 ++++++++ bundle/config/mutator/sync_infer_root.go | 120 +++++++++++ .../mutator/sync_infer_root_internal_test.go | 72 +++++++ bundle/config/mutator/sync_infer_root_test.go | 198 ++++++++++++++++++ bundle/config/mutator/trampoline.go | 2 +- bundle/config/mutator/trampoline_test.go | 8 +- bundle/config/mutator/translate_paths.go | 12 +- bundle/config/mutator/translate_paths_test.go | 60 +++--- bundle/config/sync.go | 4 + bundle/deploy/files/sync.go | 4 +- bundle/deploy/state_pull.go | 2 +- bundle/deploy/state_pull_test.go | 8 +- bundle/phases/initialize.go | 11 + bundle/python/conditional_transform_test.go | 22 +- bundle/tests/loader.go | 2 + bundle/tests/sync/paths/databricks.yml | 20 ++ .../tests/sync/paths_no_root/databricks.yml | 26 +++ .../sync/shared_code/bundle/databricks.yml | 10 + .../tests/sync/shared_code/common/library.txt | 1 + bundle/tests/sync_test.go | 65 ++++++ cmd/sync/sync_test.go | 6 +- 27 files changed, 760 insertions(+), 59 deletions(-) create mode 100644 bundle/config/mutator/sync_default_path.go create mode 100644 bundle/config/mutator/sync_default_path_test.go create mode 100644 bundle/config/mutator/sync_infer_root.go create mode 100644 bundle/config/mutator/sync_infer_root_internal_test.go create mode 100644 bundle/config/mutator/sync_infer_root_test.go create mode 100644 bundle/tests/sync/paths/databricks.yml create mode 100644 bundle/tests/sync/paths_no_root/databricks.yml create mode 100644 bundle/tests/sync/shared_code/bundle/databricks.yml create mode 100644 bundle/tests/sync/shared_code/common/library.txt diff --git a/bundle/bundle.go b/bundle/bundle.go index 032d98ab..8b5ff976 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -39,6 +39,14 @@ type Bundle struct { // Exclusively use this field for filesystem operations. BundleRoot vfs.Path + // SyncRoot is a virtual filesystem path to the root directory of the files that are synchronized to the workspace. + // It can be an ancestor to [BundleRoot], but not a descendant; that is, [SyncRoot] must contain [BundleRoot]. + SyncRoot vfs.Path + + // SyncRootPath is the local path to the root directory of files that are synchronized to the workspace. + // It is equal to `SyncRoot.Native()` and included as dedicated field for convenient access. + SyncRootPath string + Config config.Root // Metadata about the bundle deployment. This is the interface Databricks services diff --git a/bundle/bundle_read_only.go b/bundle/bundle_read_only.go index 59084f2a..74b9d94d 100644 --- a/bundle/bundle_read_only.go +++ b/bundle/bundle_read_only.go @@ -28,6 +28,10 @@ func (r ReadOnlyBundle) BundleRoot() vfs.Path { return r.b.BundleRoot } +func (r ReadOnlyBundle) SyncRoot() vfs.Path { + return r.b.SyncRoot +} + func (r ReadOnlyBundle) WorkspaceClient() *databricks.WorkspaceClient { return r.b.WorkspaceClient() } diff --git a/bundle/config/mutator/configure_wsfs.go b/bundle/config/mutator/configure_wsfs.go index c7b764f0..1d1bec58 100644 --- a/bundle/config/mutator/configure_wsfs.go +++ b/bundle/config/mutator/configure_wsfs.go @@ -24,7 +24,7 @@ func (m *configureWSFS) Name() string { } func (m *configureWSFS) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - root := b.BundleRoot.Native() + root := b.SyncRoot.Native() // The bundle root must be located in /Workspace/ if !strings.HasPrefix(root, "/Workspace/") { @@ -45,6 +45,6 @@ func (m *configureWSFS) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return diag.FromErr(err) } - b.BundleRoot = p + b.SyncRoot = p return nil } diff --git a/bundle/config/mutator/rewrite_sync_paths.go b/bundle/config/mutator/rewrite_sync_paths.go index cfdc55f3..888714ab 100644 --- a/bundle/config/mutator/rewrite_sync_paths.go +++ b/bundle/config/mutator/rewrite_sync_paths.go @@ -45,6 +45,10 @@ func (m *rewriteSyncPaths) makeRelativeTo(root string) dyn.MapFunc { func (m *rewriteSyncPaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { return dyn.Map(v, "sync", func(_ dyn.Path, v dyn.Value) (nv dyn.Value, err error) { + v, err = dyn.Map(v, "paths", dyn.Foreach(m.makeRelativeTo(b.RootPath))) + if err != nil { + return dyn.InvalidValue, err + } v, err = dyn.Map(v, "include", dyn.Foreach(m.makeRelativeTo(b.RootPath))) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/rewrite_sync_paths_test.go b/bundle/config/mutator/rewrite_sync_paths_test.go index 56ada19e..fa7f124b 100644 --- a/bundle/config/mutator/rewrite_sync_paths_test.go +++ b/bundle/config/mutator/rewrite_sync_paths_test.go @@ -17,6 +17,10 @@ func TestRewriteSyncPathsRelative(t *testing.T) { RootPath: ".", Config: config.Root{ Sync: config.Sync{ + Paths: []string{ + ".", + "../common", + }, Include: []string{ "foo", "bar", @@ -29,6 +33,8 @@ func TestRewriteSyncPathsRelative(t *testing.T) { }, } + bundletest.SetLocation(b, "sync.paths[0]", "./databricks.yml") + bundletest.SetLocation(b, "sync.paths[1]", "./databricks.yml") bundletest.SetLocation(b, "sync.include[0]", "./file.yml") bundletest.SetLocation(b, "sync.include[1]", "./a/file.yml") bundletest.SetLocation(b, "sync.exclude[0]", "./a/b/file.yml") @@ -37,6 +43,8 @@ func TestRewriteSyncPathsRelative(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths()) assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.Clean("."), b.Config.Sync.Paths[0]) + assert.Equal(t, filepath.Clean("../common"), b.Config.Sync.Paths[1]) assert.Equal(t, filepath.Clean("foo"), b.Config.Sync.Include[0]) assert.Equal(t, filepath.Clean("a/bar"), b.Config.Sync.Include[1]) assert.Equal(t, filepath.Clean("a/b/baz"), b.Config.Sync.Exclude[0]) @@ -48,6 +56,10 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { RootPath: "/tmp/dir", Config: config.Root{ Sync: config.Sync{ + Paths: []string{ + ".", + "../common", + }, Include: []string{ "foo", "bar", @@ -60,6 +72,8 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { }, } + bundletest.SetLocation(b, "sync.paths[0]", "/tmp/dir/databricks.yml") + bundletest.SetLocation(b, "sync.paths[1]", "/tmp/dir/databricks.yml") bundletest.SetLocation(b, "sync.include[0]", "/tmp/dir/file.yml") bundletest.SetLocation(b, "sync.include[1]", "/tmp/dir/a/file.yml") bundletest.SetLocation(b, "sync.exclude[0]", "/tmp/dir/a/b/file.yml") @@ -68,6 +82,8 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths()) assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.Clean("."), b.Config.Sync.Paths[0]) + assert.Equal(t, filepath.Clean("../common"), b.Config.Sync.Paths[1]) assert.Equal(t, filepath.Clean("foo"), b.Config.Sync.Include[0]) assert.Equal(t, filepath.Clean("a/bar"), b.Config.Sync.Include[1]) assert.Equal(t, filepath.Clean("a/b/baz"), b.Config.Sync.Exclude[0]) diff --git a/bundle/config/mutator/sync_default_path.go b/bundle/config/mutator/sync_default_path.go new file mode 100644 index 00000000..8e14ce20 --- /dev/null +++ b/bundle/config/mutator/sync_default_path.go @@ -0,0 +1,48 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type syncDefaultPath struct{} + +// SyncDefaultPath configures the default sync path to be equal to the bundle root. +func SyncDefaultPath() bundle.Mutator { + return &syncDefaultPath{} +} + +func (m *syncDefaultPath) Name() string { + return "SyncDefaultPath" +} + +func (m *syncDefaultPath) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + isset := false + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + pv, _ := dyn.Get(v, "sync.paths") + + // If the sync paths field is already set, do nothing. + // We know it is set if its value is either a nil or a sequence (empty or not). + switch pv.Kind() { + case dyn.KindNil, dyn.KindSequence: + isset = true + } + + return v, nil + }) + if err != nil { + return diag.FromErr(err) + } + + // If the sync paths field is already set, do nothing. + if isset { + return nil + } + + // Set the sync paths to the default value. + b.Config.Sync.Paths = []string{"."} + return nil +} diff --git a/bundle/config/mutator/sync_default_path_test.go b/bundle/config/mutator/sync_default_path_test.go new file mode 100644 index 00000000..a37e913d --- /dev/null +++ b/bundle/config/mutator/sync_default_path_test.go @@ -0,0 +1,82 @@ +package mutator_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSyncDefaultPath_DefaultIfUnset(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{}, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncDefaultPath()) + require.NoError(t, diags.Error()) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) +} + +func TestSyncDefaultPath_SkipIfSet(t *testing.T) { + tcases := []struct { + name string + paths dyn.Value + expect []string + }{ + { + name: "nil", + paths: dyn.V(nil), + expect: nil, + }, + { + name: "empty sequence", + paths: dyn.V([]dyn.Value{}), + expect: []string{}, + }, + { + name: "non-empty sequence", + paths: dyn.V([]dyn.Value{dyn.V("something")}), + expect: []string{"something"}, + }, + } + + for _, tcase := range tcases { + t.Run(tcase.name, func(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{}, + } + + diags := bundle.ApplyFunc(context.Background(), b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + v, err := dyn.Set(v, "sync", dyn.V(dyn.NewMapping())) + if err != nil { + return dyn.InvalidValue, err + } + v, err = dyn.Set(v, "sync.paths", tcase.paths) + if err != nil { + return dyn.InvalidValue, err + } + return v, nil + }) + return diag.FromErr(err) + }) + require.NoError(t, diags.Error()) + + ctx := context.Background() + diags = bundle.Apply(ctx, b, mutator.SyncDefaultPath()) + require.NoError(t, diags.Error()) + + // If the sync paths field is already set, do nothing. + assert.Equal(t, tcase.expect, b.Config.Sync.Paths) + }) + } +} diff --git a/bundle/config/mutator/sync_infer_root.go b/bundle/config/mutator/sync_infer_root.go new file mode 100644 index 00000000..012acf80 --- /dev/null +++ b/bundle/config/mutator/sync_infer_root.go @@ -0,0 +1,120 @@ +package mutator + +import ( + "context" + "fmt" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" +) + +type syncInferRoot struct{} + +// SyncInferRoot is a mutator that infers the root path of all files to synchronize by looking at the +// paths in the sync configuration. The sync root may be different from the bundle root +// when the user intends to synchronize files outside the bundle root. +// +// The sync root can be equivalent to or an ancestor of the bundle root, but not a descendant. +// That is, the sync root must contain the bundle root. +// +// This mutator requires all sync-related paths and patterns to be relative to the bundle root path. +// This is done by the [RewriteSyncPaths] mutator, which must run before this mutator. +func SyncInferRoot() bundle.Mutator { + return &syncInferRoot{} +} + +func (m *syncInferRoot) Name() string { + return "SyncInferRoot" +} + +// computeRoot finds the innermost path that contains the specified path. +// It traverses up the root path until it finds the innermost path. +// If the path does not exist, it returns an empty string. +// +// See "sync_infer_root_internal_test.go" for examples. +func (m *syncInferRoot) computeRoot(path string, root string) string { + for !filepath.IsLocal(path) { + // Break if we have reached the root of the filesystem. + dir := filepath.Dir(root) + if dir == root { + return "" + } + + // Update the sync path as we navigate up the directory tree. + path = filepath.Join(filepath.Base(root), path) + + // Move up the directory tree. + root = dir + } + + return filepath.Clean(root) +} + +func (m *syncInferRoot) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + + // Use the bundle root path as the starting point for inferring the sync root path. + bundleRootPath := filepath.Clean(b.RootPath) + + // Infer the sync root path by looking at each one of the sync paths. + // Every sync path must be a descendant of the final sync root path. + syncRootPath := bundleRootPath + for _, path := range b.Config.Sync.Paths { + computedPath := m.computeRoot(path, bundleRootPath) + if computedPath == "" { + continue + } + + // Update sync root path if the computed root path is an ancestor of the current sync root path. + if len(computedPath) < len(syncRootPath) { + syncRootPath = computedPath + } + } + + // The new sync root path can only be an ancestor of the previous root path. + // Compute the relative path from the sync root to the bundle root. + rel, err := filepath.Rel(syncRootPath, bundleRootPath) + if err != nil { + return diag.FromErr(err) + } + + // If during computation of the sync root path we hit the root of the filesystem, + // then one or more of the sync paths are outside the filesystem. + // Check if this happened by verifying that none of the paths escape the root + // when joined with the sync root path. + for i, path := range b.Config.Sync.Paths { + if filepath.IsLocal(filepath.Join(rel, path)) { + continue + } + + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("invalid sync path %q", path), + Locations: b.Config.GetLocations(fmt.Sprintf("sync.paths[%d]", i)), + Paths: []dyn.Path{dyn.NewPath(dyn.Key("sync"), dyn.Key("paths"), dyn.Index(i))}, + }) + } + + if diags.HasError() { + return diags + } + + // Update all paths in the sync configuration to be relative to the sync root. + for i, p := range b.Config.Sync.Paths { + b.Config.Sync.Paths[i] = filepath.Join(rel, p) + } + for i, p := range b.Config.Sync.Include { + b.Config.Sync.Include[i] = filepath.Join(rel, p) + } + for i, p := range b.Config.Sync.Exclude { + b.Config.Sync.Exclude[i] = filepath.Join(rel, p) + } + + // Configure the sync root path. + b.SyncRoot = vfs.MustNew(syncRootPath) + b.SyncRootPath = syncRootPath + return nil +} diff --git a/bundle/config/mutator/sync_infer_root_internal_test.go b/bundle/config/mutator/sync_infer_root_internal_test.go new file mode 100644 index 00000000..9ab9c88f --- /dev/null +++ b/bundle/config/mutator/sync_infer_root_internal_test.go @@ -0,0 +1,72 @@ +package mutator + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSyncInferRootInternal_ComputeRoot(t *testing.T) { + s := syncInferRoot{} + + tcases := []struct { + path string + root string + out string + }{ + { + // Test that "." doesn't change the root. + path: ".", + root: "/tmp/some/dir", + out: "/tmp/some/dir", + }, + { + // Test that a subdirectory doesn't change the root. + path: "sub", + root: "/tmp/some/dir", + out: "/tmp/some/dir", + }, + { + // Test that a parent directory changes the root. + path: "../common", + root: "/tmp/some/dir", + out: "/tmp/some", + }, + { + // Test that a deeply nested parent directory changes the root. + path: "../../../../../../common", + root: "/tmp/some/dir/that/is/very/deeply/nested", + out: "/tmp/some", + }, + { + // Test that a parent directory changes the root at the filesystem root boundary. + path: "../common", + root: "/tmp", + out: "/", + }, + { + // Test that an invalid parent directory doesn't change the root and returns an empty string. + path: "../common", + root: "/", + out: "", + }, + { + // Test that the returned path is cleaned even if the root doesn't change. + path: "sub", + root: "/tmp/some/../dir", + out: "/tmp/dir", + }, + { + // Test that a relative root path also works. + path: "../common", + root: "foo/bar", + out: "foo", + }, + } + + for _, tc := range tcases { + out := s.computeRoot(tc.path, tc.root) + assert.Equal(t, tc.out, filepath.ToSlash(out)) + } +} diff --git a/bundle/config/mutator/sync_infer_root_test.go b/bundle/config/mutator/sync_infer_root_test.go new file mode 100644 index 00000000..383e5676 --- /dev/null +++ b/bundle/config/mutator/sync_infer_root_test.go @@ -0,0 +1,198 @@ +package mutator_test + +import ( + "context" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSyncInferRoot_NominalAbsolute(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + ".", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.FromSlash("/tmp/some/dir"), b.SyncRootPath) + + // Check that the paths are unchanged. + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) + assert.Equal(t, []string{"foo", "bar"}, b.Config.Sync.Include) + assert.Equal(t, []string{"baz", "qux"}, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_NominalRelative(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "./some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + ".", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.FromSlash("some/dir"), b.SyncRootPath) + + // Check that the paths are unchanged. + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) + assert.Equal(t, []string{"foo", "bar"}, b.Config.Sync.Include) + assert.Equal(t, []string{"baz", "qux"}, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_ParentDirectory(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "../common", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.FromSlash("/tmp/some"), b.SyncRootPath) + + // Check that the paths are updated. + assert.Equal(t, []string{"common"}, b.Config.Sync.Paths) + assert.Equal(t, []string{filepath.FromSlash("dir/foo"), filepath.FromSlash("dir/bar")}, b.Config.Sync.Include) + assert.Equal(t, []string{filepath.FromSlash("dir/baz"), filepath.FromSlash("dir/qux")}, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_ManyParentDirectories(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir/that/is/very/deeply/nested", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "../../../../../../common", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.FromSlash("/tmp/some"), b.SyncRootPath) + + // Check that the paths are updated. + assert.Equal(t, []string{"common"}, b.Config.Sync.Paths) + assert.Equal(t, []string{ + filepath.FromSlash("dir/that/is/very/deeply/nested/foo"), + filepath.FromSlash("dir/that/is/very/deeply/nested/bar"), + }, b.Config.Sync.Include) + assert.Equal(t, []string{ + filepath.FromSlash("dir/that/is/very/deeply/nested/baz"), + filepath.FromSlash("dir/that/is/very/deeply/nested/qux"), + }, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_MultiplePaths(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/bundle/root", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "./foo", + "../common", + "./bar", + "../../baz", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.FromSlash("/tmp/some"), b.SyncRootPath) + + // Check that the paths are updated. + assert.Equal(t, filepath.FromSlash("bundle/root/foo"), b.Config.Sync.Paths[0]) + assert.Equal(t, filepath.FromSlash("bundle/common"), b.Config.Sync.Paths[1]) + assert.Equal(t, filepath.FromSlash("bundle/root/bar"), b.Config.Sync.Paths[2]) + assert.Equal(t, filepath.FromSlash("baz"), b.Config.Sync.Paths[3]) +} + +func TestSyncInferRoot_Error(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "../../../../error", + "../../../thisworks", + "../../../../../error", + }, + }, + }, + } + + bundletest.SetLocation(b, "sync.paths", "databricks.yml") + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + require.Len(t, diags, 2) + assert.Equal(t, `invalid sync path "../../../../error"`, diags[0].Summary) + assert.Equal(t, "databricks.yml:0:0", diags[0].Locations[0].String()) + assert.Equal(t, "sync.paths[0]", diags[0].Paths[0].String()) + assert.Equal(t, `invalid sync path "../../../../../error"`, diags[1].Summary) + assert.Equal(t, "databricks.yml:0:0", diags[1].Locations[0].String()) + assert.Equal(t, "sync.paths[2]", diags[1].Paths[0].String()) +} diff --git a/bundle/config/mutator/trampoline.go b/bundle/config/mutator/trampoline.go index dde9a299..dcca5014 100644 --- a/bundle/config/mutator/trampoline.go +++ b/bundle/config/mutator/trampoline.go @@ -82,7 +82,7 @@ func (m *trampoline) generateNotebookWrapper(ctx context.Context, b *bundle.Bund return err } - internalDirRel, err := filepath.Rel(b.RootPath, internalDir) + internalDirRel, err := filepath.Rel(b.SyncRootPath, internalDir) if err != nil { return err } diff --git a/bundle/config/mutator/trampoline_test.go b/bundle/config/mutator/trampoline_test.go index de395c16..08d3c822 100644 --- a/bundle/config/mutator/trampoline_test.go +++ b/bundle/config/mutator/trampoline_test.go @@ -56,8 +56,12 @@ func TestGenerateTrampoline(t *testing.T) { } b := &bundle.Bundle{ - RootPath: tmpDir, + RootPath: filepath.Join(tmpDir, "parent", "my_bundle"), + SyncRootPath: filepath.Join(tmpDir, "parent"), Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/Workspace/files", + }, Bundle: config.Bundle{ Target: "development", }, @@ -89,6 +93,6 @@ func TestGenerateTrampoline(t *testing.T) { require.Equal(t, "Hello from Trampoline", string(bytes)) task := b.Config.Resources.Jobs["test"].Tasks[0] - require.Equal(t, task.NotebookTask.NotebookPath, ".databricks/bundle/development/.internal/notebook_test_to_trampoline") + require.Equal(t, "/Workspace/files/my_bundle/.databricks/bundle/development/.internal/notebook_test_to_trampoline", task.NotebookTask.NotebookPath) require.Nil(t, task.PythonWheelTask) } diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 28f7d3d3..5f22570e 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -93,14 +93,14 @@ func (t *translateContext) rewritePath( return nil } - // Local path must be contained in the bundle root. + // Local path must be contained in the sync root. // If it isn't, it won't be synchronized into the workspace. - localRelPath, err := filepath.Rel(t.b.RootPath, localPath) + localRelPath, err := filepath.Rel(t.b.SyncRootPath, localPath) if err != nil { return err } if strings.HasPrefix(localRelPath, "..") { - return fmt.Errorf("path %s is not contained in bundle root path", localPath) + return fmt.Errorf("path %s is not contained in sync root path", localPath) } // Prefix remote path with its remote root path. @@ -118,7 +118,7 @@ func (t *translateContext) rewritePath( } func (t *translateContext) translateNotebookPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.BundleRoot, filepath.ToSlash(localRelPath)) + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("notebook %s not found", literal) } @@ -134,7 +134,7 @@ func (t *translateContext) translateNotebookPath(literal, localFullPath, localRe } func (t *translateContext) translateFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.BundleRoot, filepath.ToSlash(localRelPath)) + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } @@ -148,7 +148,7 @@ func (t *translateContext) translateFilePath(literal, localFullPath, localRelPat } func (t *translateContext) translateDirectoryPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - info, err := t.b.BundleRoot.Stat(filepath.ToSlash(localRelPath)) + info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) if err != nil { return "", err } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index fd64593b..50fcd3b0 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -41,8 +41,8 @@ func touchEmptyFile(t *testing.T, path string) { func TestTranslatePathsSkippedWithGitSource(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -113,8 +113,8 @@ func TestTranslatePaths(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "requirements.txt")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -289,8 +289,8 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "job", "my_dbt_project", "dbt_project.yml")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -380,12 +380,12 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { ) } -func TestTranslatePathsOutsideBundleRoot(t *testing.T) { +func TestTranslatePathsOutsideSyncRoot(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -411,15 +411,15 @@ func TestTranslatePathsOutsideBundleRoot(t *testing.T) { bundletest.SetLocation(b, ".", filepath.Join(dir, "../resource.yml")) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) - assert.ErrorContains(t, diags.Error(), "is not contained in bundle root") + assert.ErrorContains(t, diags.Error(), "is not contained in sync root path") } func TestJobNotebookDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -449,8 +449,8 @@ func TestJobFileDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -480,8 +480,8 @@ func TestPipelineNotebookDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ @@ -511,8 +511,8 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ @@ -543,8 +543,8 @@ func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) { touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -578,8 +578,8 @@ func TestJobNotebookTaskWithFileSourceError(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "my_file.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -613,8 +613,8 @@ func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "my_file.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -648,8 +648,8 @@ func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) { touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -684,8 +684,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "env2.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -724,8 +724,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { func TestTranslatePathWithComplexVariables(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Variables: map[string]*variable.Variable{ "cluster_libraries": { diff --git a/bundle/config/sync.go b/bundle/config/sync.go index 0580e4c4..377b1333 100644 --- a/bundle/config/sync.go +++ b/bundle/config/sync.go @@ -1,6 +1,10 @@ package config type Sync struct { + // Paths contains a list of paths to synchronize relative to the bundle root path. + // If not configured, this defaults to synchronizing everything in the bundle root path (i.e. `.`). + Paths []string `json:"paths,omitempty"` + // Include contains a list of globs evaluated relative to the bundle root path // to explicitly include files that were excluded by the user's gitignore. Include []string `json:"include,omitempty"` diff --git a/bundle/deploy/files/sync.go b/bundle/deploy/files/sync.go index dc45053f..347ed307 100644 --- a/bundle/deploy/files/sync.go +++ b/bundle/deploy/files/sync.go @@ -28,8 +28,8 @@ func GetSyncOptions(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.SyncOp } opts := &sync.SyncOptions{ - LocalRoot: rb.BundleRoot(), - Paths: []string{"."}, + LocalRoot: rb.SyncRoot(), + Paths: rb.Config().Sync.Paths, Include: includes, Exclude: rb.Config().Sync.Exclude, diff --git a/bundle/deploy/state_pull.go b/bundle/deploy/state_pull.go index 24ed9d36..5e301a6f 100644 --- a/bundle/deploy/state_pull.go +++ b/bundle/deploy/state_pull.go @@ -85,7 +85,7 @@ func (s *statePull) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic } log.Infof(ctx, "Creating new snapshot") - snapshot, err := sync.NewSnapshot(state.Files.ToSlice(b.BundleRoot), opts) + snapshot, err := sync.NewSnapshot(state.Files.ToSlice(b.SyncRoot), opts) if err != nil { return diag.FromErr(err) } diff --git a/bundle/deploy/state_pull_test.go b/bundle/deploy/state_pull_test.go index 38f0b402..f7519306 100644 --- a/bundle/deploy/state_pull_test.go +++ b/bundle/deploy/state_pull_test.go @@ -64,6 +64,10 @@ func testStatePull(t *testing.T, opts statePullOpts) { b := &bundle.Bundle{ RootPath: tmpDir, BundleRoot: vfs.MustNew(tmpDir), + + SyncRootPath: tmpDir, + SyncRoot: vfs.MustNew(tmpDir), + Config: config.Root{ Bundle: config.Bundle{ Target: "default", @@ -81,11 +85,11 @@ func testStatePull(t *testing.T, opts statePullOpts) { ctx := context.Background() for _, file := range opts.localFiles { - testutil.Touch(t, b.RootPath, "bar", file) + testutil.Touch(t, b.SyncRootPath, "bar", file) } for _, file := range opts.localNotebooks { - testutil.TouchNotebook(t, b.RootPath, "bar", file) + testutil.TouchNotebook(t, b.SyncRootPath, "bar", file) } if opts.withExistingSnapshot { diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 7a1081de..8039a4f1 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -21,7 +21,18 @@ func Initialize() bundle.Mutator { "initialize", []bundle.Mutator{ validate.AllResourcesHaveValues(), + + // Update all path fields in the sync block to be relative to the bundle root path. mutator.RewriteSyncPaths(), + + // Configure the default sync path to equal the bundle root if not explicitly configured. + // By default, this means all files in the bundle root directory are synchronized. + mutator.SyncDefaultPath(), + + // Figure out if the sync root path is identical or an ancestor of the bundle root path. + // If it is an ancestor, this updates all paths to be relative to the sync root path. + mutator.SyncInferRoot(), + mutator.MergeJobClusters(), mutator.MergeJobParameters(), mutator.MergeJobTasks(), diff --git a/bundle/python/conditional_transform_test.go b/bundle/python/conditional_transform_test.go index 677970d7..1d397f7a 100644 --- a/bundle/python/conditional_transform_test.go +++ b/bundle/python/conditional_transform_test.go @@ -2,7 +2,6 @@ package python import ( "context" - "path" "path/filepath" "testing" @@ -18,11 +17,15 @@ func TestNoTransformByDefault(t *testing.T) { tmpDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tmpDir, + RootPath: filepath.Join(tmpDir, "parent", "my_bundle"), + SyncRootPath: filepath.Join(tmpDir, "parent"), Config: config.Root{ Bundle: config.Bundle{ Target: "development", }, + Workspace: config.Workspace{ + FilePath: "/Workspace/files", + }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ "job1": { @@ -63,11 +66,15 @@ func TestTransformWithExperimentalSettingSetToTrue(t *testing.T) { tmpDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tmpDir, + RootPath: filepath.Join(tmpDir, "parent", "my_bundle"), + SyncRootPath: filepath.Join(tmpDir, "parent"), Config: config.Root{ Bundle: config.Bundle{ Target: "development", }, + Workspace: config.Workspace{ + FilePath: "/Workspace/files", + }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ "job1": { @@ -102,14 +109,7 @@ func TestTransformWithExperimentalSettingSetToTrue(t *testing.T) { task := b.Config.Resources.Jobs["job1"].Tasks[0] require.Nil(t, task.PythonWheelTask) require.NotNil(t, task.NotebookTask) - - dir, err := b.InternalDir(context.Background()) - require.NoError(t, err) - - internalDirRel, err := filepath.Rel(b.RootPath, dir) - require.NoError(t, err) - - require.Equal(t, path.Join(filepath.ToSlash(internalDirRel), "notebook_job1_key1"), task.NotebookTask.NotebookPath) + require.Equal(t, "/Workspace/files/my_bundle/.databricks/bundle/development/.internal/notebook_job1_key1", task.NotebookTask.NotebookPath) require.Len(t, task.Libraries, 1) require.Equal(t, "/Workspace/Users/test@test.com/bundle/dist/test.jar", task.Libraries[0].Jar) diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index 848132a1..5c48d81c 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -40,6 +40,8 @@ func loadTargetWithDiags(path, env string) (*bundle.Bundle, diag.Diagnostics) { diags := bundle.Apply(ctx, b, bundle.Seq( phases.LoadNamedTarget(env), mutator.RewriteSyncPaths(), + mutator.SyncDefaultPath(), + mutator.SyncInferRoot(), mutator.MergeJobClusters(), mutator.MergeJobParameters(), mutator.MergeJobTasks(), diff --git a/bundle/tests/sync/paths/databricks.yml b/bundle/tests/sync/paths/databricks.yml new file mode 100644 index 00000000..9ef6fa03 --- /dev/null +++ b/bundle/tests/sync/paths/databricks.yml @@ -0,0 +1,20 @@ +bundle: + name: sync_paths + +workspace: + host: https://acme.cloud.databricks.com/ + +sync: + paths: + - src + +targets: + development: + sync: + paths: + - development + + staging: + sync: + paths: + - staging diff --git a/bundle/tests/sync/paths_no_root/databricks.yml b/bundle/tests/sync/paths_no_root/databricks.yml new file mode 100644 index 00000000..df15b12b --- /dev/null +++ b/bundle/tests/sync/paths_no_root/databricks.yml @@ -0,0 +1,26 @@ +bundle: + name: sync_paths + +workspace: + host: https://acme.cloud.databricks.com/ + +targets: + development: + sync: + paths: + - development + + staging: + sync: + paths: + - staging + + undefined: ~ + + nil: + sync: + paths: ~ + + empty: + sync: + paths: [] diff --git a/bundle/tests/sync/shared_code/bundle/databricks.yml b/bundle/tests/sync/shared_code/bundle/databricks.yml new file mode 100644 index 00000000..738b6170 --- /dev/null +++ b/bundle/tests/sync/shared_code/bundle/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: shared_code + +workspace: + host: https://acme.cloud.databricks.com/ + +sync: + paths: + - "../common" + - "." diff --git a/bundle/tests/sync/shared_code/common/library.txt b/bundle/tests/sync/shared_code/common/library.txt new file mode 100644 index 00000000..83b32384 --- /dev/null +++ b/bundle/tests/sync/shared_code/common/library.txt @@ -0,0 +1 @@ +Placeholder for files to be deployed as part of multiple bundles. diff --git a/bundle/tests/sync_test.go b/bundle/tests/sync_test.go index d08e889c..15644b67 100644 --- a/bundle/tests/sync_test.go +++ b/bundle/tests/sync_test.go @@ -12,14 +12,20 @@ func TestSyncOverride(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/override", "development") + assert.Equal(t, filepath.FromSlash("sync/override"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("src/*"), filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override", "staging") + assert.Equal(t, filepath.FromSlash("sync/override"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("src/*"), filepath.FromSlash("fixtures/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override", "prod") + assert.Equal(t, filepath.FromSlash("sync/override"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("src/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) } @@ -28,14 +34,20 @@ func TestSyncOverrideNoRootSync(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/override_no_root", "development") + assert.Equal(t, filepath.FromSlash("sync/override_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override_no_root", "staging") + assert.Equal(t, filepath.FromSlash("sync/override_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("fixtures/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override_no_root", "prod") + assert.Equal(t, filepath.FromSlash("sync/override_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) } @@ -44,10 +56,14 @@ func TestSyncNil(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/nil", "development") + assert.Equal(t, filepath.FromSlash("sync/nil"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.Nil(t, b.Config.Sync.Include) assert.Nil(t, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/nil", "staging") + assert.Equal(t, filepath.FromSlash("sync/nil"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) } @@ -56,10 +72,59 @@ func TestSyncNilRoot(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/nil_root", "development") + assert.Equal(t, filepath.FromSlash("sync/nil_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.Nil(t, b.Config.Sync.Include) assert.Nil(t, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/nil_root", "staging") + assert.Equal(t, filepath.FromSlash("sync/nil_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) } + +func TestSyncPaths(t *testing.T) { + var b *bundle.Bundle + + b = loadTarget(t, "./sync/paths", "development") + assert.Equal(t, filepath.FromSlash("sync/paths"), b.SyncRootPath) + assert.Equal(t, []string{"src", "development"}, b.Config.Sync.Paths) + + b = loadTarget(t, "./sync/paths", "staging") + assert.Equal(t, filepath.FromSlash("sync/paths"), b.SyncRootPath) + assert.Equal(t, []string{"src", "staging"}, b.Config.Sync.Paths) +} + +func TestSyncPathsNoRoot(t *testing.T) { + var b *bundle.Bundle + + b = loadTarget(t, "./sync/paths_no_root", "development") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) + assert.ElementsMatch(t, []string{"development"}, b.Config.Sync.Paths) + + b = loadTarget(t, "./sync/paths_no_root", "staging") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) + assert.ElementsMatch(t, []string{"staging"}, b.Config.Sync.Paths) + + // If not set at all, it defaults to "." + b = loadTarget(t, "./sync/paths_no_root", "undefined") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) + + // If set to nil, it won't sync anything. + b = loadTarget(t, "./sync/paths_no_root", "nil") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) + assert.Len(t, b.Config.Sync.Paths, 0) + + // If set to an empty sequence, it won't sync anything. + b = loadTarget(t, "./sync/paths_no_root", "empty") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) + assert.Len(t, b.Config.Sync.Paths, 0) +} + +func TestSyncSharedCode(t *testing.T) { + b := loadTarget(t, "./sync/shared_code/bundle", "default") + assert.Equal(t, filepath.FromSlash("sync/shared_code"), b.SyncRootPath) + assert.ElementsMatch(t, []string{"common", "bundle"}, b.Config.Sync.Paths) +} diff --git a/cmd/sync/sync_test.go b/cmd/sync/sync_test.go index 0d0c5738..bd03eec9 100644 --- a/cmd/sync/sync_test.go +++ b/cmd/sync/sync_test.go @@ -17,8 +17,10 @@ import ( func TestSyncOptionsFromBundle(t *testing.T) { tempDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tempDir, - BundleRoot: vfs.MustNew(tempDir), + RootPath: tempDir, + BundleRoot: vfs.MustNew(tempDir), + SyncRootPath: tempDir, + SyncRoot: vfs.MustNew(tempDir), Config: config.Root{ Bundle: config.Bundle{ Target: "default", From 35e48be81c634e50164edcfb086c362e948ca57e Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 22 Aug 2024 10:44:22 +0200 Subject: [PATCH 10/37] [Release] Release v0.227.0 (#1705) CLI: * Added filtering flags for cluster list commands ([#1703](https://github.com/databricks/cli/pull/1703)). Bundles: * Remove reference to "dbt" in the default-sql template ([#1696](https://github.com/databricks/cli/pull/1696)). * Pause continuous pipelines when 'mode: development' is used ([#1590](https://github.com/databricks/cli/pull/1590)). * Add configurable presets for name prefixes, tags, etc. ([#1490](https://github.com/databricks/cli/pull/1490)). * Report all empty resources present in error diagnostic ([#1685](https://github.com/databricks/cli/pull/1685)). * Improves detection of PyPI package names in environment dependencies ([#1699](https://github.com/databricks/cli/pull/1699)). * [DAB] Add support for requirements libraries in Job Tasks ([#1543](https://github.com/databricks/cli/pull/1543)). * Add paths field to bundle sync configuration ([#1694](https://github.com/databricks/cli/pull/1694)). Internal: * Add `import` option for PyDABs ([#1693](https://github.com/databricks/cli/pull/1693)). * Make fileset take optional list of paths to list ([#1684](https://github.com/databricks/cli/pull/1684)). * Pass through paths argument to libs/sync ([#1689](https://github.com/databricks/cli/pull/1689)). * Correctly mark package names with versions as remote libraries ([#1697](https://github.com/databricks/cli/pull/1697)). * Share test initializer in common helper function ([#1695](https://github.com/databricks/cli/pull/1695)). * Make `pydabs/venv_path` optional ([#1687](https://github.com/databricks/cli/pull/1687)). * Use API mocks for duplicate path errors in workspace files extensions client ([#1690](https://github.com/databricks/cli/pull/1690)). * Fix prefix preset used for UC schemas ([#1704](https://github.com/databricks/cli/pull/1704)). --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39960e30..88a62d09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Version changelog +## [Release] Release v0.227.0 + +CLI: + * Added filtering flags for cluster list commands ([#1703](https://github.com/databricks/cli/pull/1703)). + +Bundles: + * Allow users to configure paths (including outside of the bundle root) to synchronize to the workspace. ([#1694](https://github.com/databricks/cli/pull/1694)). + * Add configurable presets for name prefixes, tags, etc. ([#1490](https://github.com/databricks/cli/pull/1490)). + * Add support for requirements libraries in Job Tasks ([#1543](https://github.com/databricks/cli/pull/1543)). + * Remove reference to "dbt" in the default-sql template ([#1696](https://github.com/databricks/cli/pull/1696)). + * Pause continuous pipelines when 'mode: development' is used ([#1590](https://github.com/databricks/cli/pull/1590)). + * Report all empty resources present in error diagnostic ([#1685](https://github.com/databricks/cli/pull/1685)). + * Improves detection of PyPI package names in environment dependencies ([#1699](https://github.com/databricks/cli/pull/1699)). + +Internal: + * Add `import` option for PyDABs ([#1693](https://github.com/databricks/cli/pull/1693)). + * Make fileset take optional list of paths to list ([#1684](https://github.com/databricks/cli/pull/1684)). + * Pass through paths argument to libs/sync ([#1689](https://github.com/databricks/cli/pull/1689)). + * Correctly mark package names with versions as remote libraries ([#1697](https://github.com/databricks/cli/pull/1697)). + * Share test initializer in common helper function ([#1695](https://github.com/databricks/cli/pull/1695)). + * Make `pydabs/venv_path` optional ([#1687](https://github.com/databricks/cli/pull/1687)). + * Use API mocks for duplicate path errors in workspace files extensions client ([#1690](https://github.com/databricks/cli/pull/1690)). + * Fix prefix preset used for UC schemas ([#1704](https://github.com/databricks/cli/pull/1704)). + + + ## [Release] Release v0.226.0 CLI: From 7fe08c2386edfa503985d93b1e6f633aa85e4f74 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Thu, 22 Aug 2024 20:34:26 +0530 Subject: [PATCH 11/37] Revert hc-install version to 0.7.0 (#1711) ## Changes With hc-install version `0.8.0` there was a regression where debug logs would be leaked into stderr. Reported upstream in https://github.com/hashicorp/hc-install/issues/239. Meanwhile we need to revert and pin to version`0.7.0`. This PR also includes a regression test. ## Tests Regression test. --- go.mod | 3 +-- go.sum | 8 ++------ internal/bundle/deploy_test.go | 31 +++++++++++++++++++++++++++++++ internal/bundle/helpers.go | 29 +++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 1457a4d6..838a45f3 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/ghodss/yaml v1.0.0 // MIT + NOTICE github.com/google/uuid v1.6.0 // BSD-3-Clause github.com/hashicorp/go-version v1.7.0 // MPL 2.0 - github.com/hashicorp/hc-install v0.8.0 // MPL 2.0 + github.com/hashicorp/hc-install v0.7.0 // MPL 2.0 github.com/hashicorp/terraform-exec v0.21.0 // MPL 2.0 github.com/hashicorp/terraform-json v0.22.1 // MPL 2.0 github.com/manifoldco/promptui v0.9.0 // BSD-3-Clause @@ -49,7 +49,6 @@ require ( github.com/google/s2a-go v0.1.7 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect - github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect diff --git a/go.sum b/go.sum index b2985955..f55f329f 100644 --- a/go.sum +++ b/go.sum @@ -99,14 +99,10 @@ github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= -github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= -github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= -github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= -github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= -github.com/hashicorp/hc-install v0.8.0 h1:LdpZeXkZYMQhoKPCecJHlKvUkQFixN/nvyR1CdfOLjI= -github.com/hashicorp/hc-install v0.8.0/go.mod h1:+MwJYjDfCruSD/udvBmRB22Nlkwwkwf5sAB6uTIhSaU= +github.com/hashicorp/hc-install v0.7.0 h1:Uu9edVqjKQxxuD28mR5TikkKDd/p55S8vzPC1659aBk= +github.com/hashicorp/hc-install v0.7.0/go.mod h1:ELmmzZlGnEcqoUMKUuykHaPCIR1sYLYX+KSggWSKZuA= github.com/hashicorp/terraform-exec v0.21.0 h1:uNkLAe95ey5Uux6KJdua6+cv8asgILFVWkd/RG0D2XQ= github.com/hashicorp/terraform-exec v0.21.0/go.mod h1:1PPeMYou+KDUSSeRE9szMZ/oHf4fYUmB923Wzbq1ICg= github.com/hashicorp/terraform-json v0.22.1 h1:xft84GZR0QzjPVWs4lRUwvTcPnegqlyS7orfb5Ltvec= diff --git a/internal/bundle/deploy_test.go b/internal/bundle/deploy_test.go index 3da88570..269b7c80 100644 --- a/internal/bundle/deploy_test.go +++ b/internal/bundle/deploy_test.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/internal" "github.com/databricks/cli/internal/acc" + "github.com/databricks/cli/libs/env" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/service/catalog" @@ -123,3 +124,33 @@ func TestAccBundleDeployUcSchemaFailsWithoutAutoApprove(t *testing.T) { assert.EqualError(t, err, root.ErrAlreadyPrinted.Error()) assert.Contains(t, stdout.String(), "the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") } + +func TestAccDeployBasicBundleLogs(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + nodeTypeId := internal.GetNodeTypeId(env.Get(ctx, "CLOUD_ENV")) + uniqueId := uuid.New().String() + root, err := initTestTemplate(t, ctx, "basic", map[string]any{ + "unique_id": uniqueId, + "node_type_id": nodeTypeId, + "spark_version": defaultSparkVersion, + }) + require.NoError(t, err) + + t.Cleanup(func() { + err = destroyBundle(t, ctx, root) + require.NoError(t, err) + }) + + currentUser, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + + stdout, stderr := blackBoxRun(t, root, "bundle", "deploy") + assert.Equal(t, strings.Join([]string{ + fmt.Sprintf("Uploading bundle files to /Users/%s/.bundle/%s/files...", currentUser.UserName, uniqueId), + "Deploying resources...", + "Updating deployment state...", + "Deployment complete!\n", + }, "\n"), stderr) + assert.Equal(t, "", stdout) +} diff --git a/internal/bundle/helpers.go b/internal/bundle/helpers.go index 03d9cff7..3547c175 100644 --- a/internal/bundle/helpers.go +++ b/internal/bundle/helpers.go @@ -1,10 +1,12 @@ package bundle import ( + "bytes" "context" "encoding/json" "fmt" "os" + "os/exec" "path/filepath" "strings" "testing" @@ -15,6 +17,7 @@ import ( "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/template" + "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go" "github.com/stretchr/testify/require" ) @@ -114,3 +117,29 @@ func getBundleRemoteRootPath(w *databricks.WorkspaceClient, t *testing.T, unique root := fmt.Sprintf("/Users/%s/.bundle/%s", me.UserName, uniqueId) return root } + +func blackBoxRun(t *testing.T, root string, args ...string) (stdout string, stderr string) { + cwd := vfs.MustNew(".") + gitRoot, err := vfs.FindLeafInTree(cwd, ".git") + require.NoError(t, err) + + t.Setenv("BUNDLE_ROOT", root) + + // Create the command + cmd := exec.Command("go", append([]string{"run", "main.go"}, args...)...) + cmd.Dir = gitRoot.Native() + + // Create buffers to capture output + var outBuffer, errBuffer bytes.Buffer + cmd.Stdout = &outBuffer + cmd.Stderr = &errBuffer + + // Run the command + err = cmd.Run() + require.NoError(t, err) + + // Get the output + stdout = outBuffer.String() + stderr = errBuffer.String() + return +} From 84b47745e451f6552465243665ad6c897c55ae5e Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Fri, 23 Aug 2024 12:13:21 +0200 Subject: [PATCH 12/37] Ignore CLI version check on development builds of the CLI (#1714) ## Changes This changes makes sure we ignore CLI version check on development builds of the CLI. Before: ``` $ cat databricks.yml | grep cli_version databricks_cli_version: ">= 0.223.1" $ cli bundle deploy Error: Databricks CLI version constraint not satisfied. Required: >= 0.223.1, current: 0.0.0-dev+06b169284737 ``` after ``` ... $ cli bundle deploy ... Warning: Ignoring Databricks CLI version constraint for development build. Required: >= 0.223.1, current: 0.0.0-dev+d52d6f08fcd5 ``` ## Tests --- bundle/config/mutator/verify_cli_version.go | 4 ++++ bundle/config/mutator/verify_cli_version_test.go | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/bundle/config/mutator/verify_cli_version.go b/bundle/config/mutator/verify_cli_version.go index 9c32fcc9..279af44e 100644 --- a/bundle/config/mutator/verify_cli_version.go +++ b/bundle/config/mutator/verify_cli_version.go @@ -40,6 +40,10 @@ func (v *verifyCliVersion) Apply(ctx context.Context, b *bundle.Bundle) diag.Dia } if !c.Check(version) { + if version.Prerelease() == "dev" && version.Major() == 0 { + return diag.Warningf("Ignoring Databricks CLI version constraint for development build. Required: %s, current: %s", constraint, currentVersion) + } + return diag.Errorf("Databricks CLI version constraint not satisfied. Required: %s, current: %s", constraint, currentVersion) } diff --git a/bundle/config/mutator/verify_cli_version_test.go b/bundle/config/mutator/verify_cli_version_test.go index 24f65674..02546129 100644 --- a/bundle/config/mutator/verify_cli_version_test.go +++ b/bundle/config/mutator/verify_cli_version_test.go @@ -107,6 +107,11 @@ func TestVerifyCliVersion(t *testing.T) { constraint: "^0.100", expectedError: "invalid version constraint \"^0.100\" specified. Please specify the version constraint in the format (>=) 0.0.0(, <= 1.0.0)", }, + { + currentVersion: "0.0.0-dev+06b169284737", + constraint: ">= 0.100.0", + expectedError: "Ignoring Databricks CLI version constraint for development build. Required: >= 0.100.0", + }, } t.Cleanup(func() { @@ -130,7 +135,7 @@ func TestVerifyCliVersion(t *testing.T) { diags := bundle.Apply(context.Background(), b, VerifyCliVersion()) if tc.expectedError != "" { require.NotEmpty(t, diags) - require.Equal(t, tc.expectedError, diags.Error().Error()) + require.Contains(t, diags[0].Summary, tc.expectedError) } else { require.Empty(t, diags) } From 783e05c939a694fe722e52ddea9c48f0ea077181 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 26 Aug 2024 12:03:56 +0200 Subject: [PATCH 13/37] Do not treat empty path as a local path (#1717) ## Changes Fixes issue introduced here https://github.com/databricks/cli/pull/1699 where PyPi packages were treated as local library. The reason is that `libraryPath` returns an empty string as a path for PyPi packages and then `IsLibraryLocal` treated empty string as local path. Both of these functions are fixed in this PR. ## Tests Added regression test --- bundle/libraries/helpers.go | 19 +++++++---- bundle/libraries/helpers_test.go | 28 +++++++++++++--- bundle/libraries/libraries.go | 7 +++- bundle/libraries/local_path.go | 4 +++ bundle/libraries/local_path_test.go | 1 + bundle/libraries/workspace_path.go | 4 +-- bundle/python/warning_test.go | 51 +++++++++++++++++++++++++++++ 7 files changed, 99 insertions(+), 15 deletions(-) diff --git a/bundle/libraries/helpers.go b/bundle/libraries/helpers.go index b7e707cc..2149e588 100644 --- a/bundle/libraries/helpers.go +++ b/bundle/libraries/helpers.go @@ -1,19 +1,24 @@ package libraries -import "github.com/databricks/databricks-sdk-go/service/compute" +import ( + "fmt" -func libraryPath(library *compute.Library) string { + "github.com/databricks/databricks-sdk-go/service/compute" +) + +func libraryPath(library *compute.Library) (string, error) { if library.Whl != "" { - return library.Whl + return library.Whl, nil } if library.Jar != "" { - return library.Jar + return library.Jar, nil } if library.Egg != "" { - return library.Egg + return library.Egg, nil } if library.Requirements != "" { - return library.Requirements + return library.Requirements, nil } - return "" + + return "", fmt.Errorf("not supported library type") } diff --git a/bundle/libraries/helpers_test.go b/bundle/libraries/helpers_test.go index e4bd3277..9d7e12ee 100644 --- a/bundle/libraries/helpers_test.go +++ b/bundle/libraries/helpers_test.go @@ -10,9 +10,27 @@ import ( func TestLibraryPath(t *testing.T) { path := "/some/path" - assert.Equal(t, path, libraryPath(&compute.Library{Whl: path})) - assert.Equal(t, path, libraryPath(&compute.Library{Jar: path})) - assert.Equal(t, path, libraryPath(&compute.Library{Egg: path})) - assert.Equal(t, path, libraryPath(&compute.Library{Requirements: path})) - assert.Equal(t, "", libraryPath(&compute.Library{})) + p, err := libraryPath(&compute.Library{Whl: path}) + assert.Equal(t, path, p) + assert.Nil(t, err) + + p, err = libraryPath(&compute.Library{Jar: path}) + assert.Equal(t, path, p) + assert.Nil(t, err) + + p, err = libraryPath(&compute.Library{Egg: path}) + assert.Equal(t, path, p) + assert.Nil(t, err) + + p, err = libraryPath(&compute.Library{Requirements: path}) + assert.Equal(t, path, p) + assert.Nil(t, err) + + p, err = libraryPath(&compute.Library{}) + assert.Equal(t, "", p) + assert.NotNil(t, err) + + p, err = libraryPath(&compute.Library{Pypi: &compute.PythonPyPiLibrary{Package: "pypipackage"}}) + assert.Equal(t, "", p) + assert.NotNil(t, err) } diff --git a/bundle/libraries/libraries.go b/bundle/libraries/libraries.go index 33b848dd..f75e23a8 100644 --- a/bundle/libraries/libraries.go +++ b/bundle/libraries/libraries.go @@ -67,7 +67,12 @@ func FindTasksWithLocalLibraries(b *bundle.Bundle) []jobs.Task { func isTaskWithLocalLibraries(task jobs.Task) bool { for _, l := range task.Libraries { - if IsLibraryLocal(libraryPath(&l)) { + p, err := libraryPath(&l) + // If there's an error, skip the library because it's not of supported type + if err != nil { + continue + } + if IsLibraryLocal(p) { return true } } diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 417bce10..e4956240 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -43,6 +43,10 @@ func IsLocalPath(p string) bool { // We can't use IsLocalPath beacuse environment dependencies can be // a pypi package name which can be misinterpreted as a local path by IsLocalPath. func IsLibraryLocal(dep string) bool { + if dep == "" { + return false + } + possiblePrefixes := []string{ ".", } diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index 7f84b324..667d64ec 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -48,6 +48,7 @@ func TestIsLibraryLocal(t *testing.T) { {path: "../../local/*.whl", expected: true}, {path: "..\\..\\local\\*.whl", expected: true}, {path: "file://path/to/package/whl.whl", expected: true}, + {path: "", expected: false}, {path: "pypipackage", expected: false}, {path: "/Volumes/catalog/schema/volume/path.whl", expected: false}, {path: "/Workspace/my_project/dist.whl", expected: false}, diff --git a/bundle/libraries/workspace_path.go b/bundle/libraries/workspace_path.go index b08ca161..126ad3f1 100644 --- a/bundle/libraries/workspace_path.go +++ b/bundle/libraries/workspace_path.go @@ -29,8 +29,8 @@ func IsWorkspacePath(path string) bool { // IsWorkspaceLibrary returns true if the specified library refers to a workspace path. func IsWorkspaceLibrary(library *compute.Library) bool { - path := libraryPath(library) - if path == "" { + path, err := libraryPath(library) + if err != nil { return false } diff --git a/bundle/python/warning_test.go b/bundle/python/warning_test.go index dd6397f7..b2296392 100644 --- a/bundle/python/warning_test.go +++ b/bundle/python/warning_test.go @@ -223,6 +223,17 @@ func TestNoIncompatibleWheelTasks(t *testing.T) { {Whl: "./dist/test.whl"}, }, }, + { + TaskKey: "key7", + PythonWheelTask: &jobs.PythonWheelTask{}, + ExistingClusterId: "test-key-2", + Libraries: []compute.Library{ + {Whl: "signol_lib-0.4.4-20240822+prod-py3-none-any.whl"}, + {Pypi: &compute.PythonPyPiLibrary{ + Package: "requests==2.25.1", + }}, + }, + }, }, }, }, @@ -241,6 +252,46 @@ func TestNoIncompatibleWheelTasks(t *testing.T) { require.False(t, hasIncompatibleWheelTasks(context.Background(), b)) } +func TestTasksWithPyPiPackageAreCompatible(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": { + JobSettings: &jobs.JobSettings{ + JobClusters: []jobs.JobCluster{ + { + JobClusterKey: "cluster1", + NewCluster: compute.ClusterSpec{ + SparkVersion: "12.2.x-scala2.12", + }, + }, + }, + Tasks: []jobs.Task{ + { + TaskKey: "key1", + PythonWheelTask: &jobs.PythonWheelTask{}, + ExistingClusterId: "test-key-2", + Libraries: []compute.Library{ + {Pypi: &compute.PythonPyPiLibrary{ + Package: "requests==2.25.1", + }}, + }, + }, + }, + }, + }, + }, + }, + }, + } + + m := mocks.NewMockWorkspaceClient(t) + b.SetWorkpaceClient(m.WorkspaceClient) + + require.False(t, hasIncompatibleWheelTasks(context.Background(), b)) +} + func TestNoWarningWhenPythonWheelWrapperIsOn(t *testing.T) { b := &bundle.Bundle{ Config: config.Root{ From 056d2032368ead1e3a7e65f9304508498bc53403 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 27 Aug 2024 10:54:05 +0200 Subject: [PATCH 14/37] Bump github.com/databricks/databricks-sdk-go from 0.44.0 to 0.45.0 (#1719) Bumps [github.com/databricks/databricks-sdk-go](https://github.com/databricks/databricks-sdk-go) from 0.44.0 to 0.45.0.
Release notes

Sourced from github.com/databricks/databricks-sdk-go's releases.

v0.45.0

0.45.0

Bug Fixes

  • Add INVALID_STATE to error code mapping (#1014).
  • Do not specify --tenant flag when fetching managed identity access token from the CLI (#1021).

Internal Changes

  • Add terraform aliases to Entity (#1017).
  • Added Service.NamedIdMap (#1016).
  • Fix billing test for budget configuration update (#1019).

API Changes:

OpenAPI SHA: 3eae49b444cac5a0118a3503e5b7ecef7f96527a, Date: 2024-08-21

Changelog

Sourced from github.com/databricks/databricks-sdk-go's changelog.

[Release] Release v0.45.0

Bug Fixes

  • Add INVALID_STATE to error code mapping (#1014).
  • Do not specify --tenant flag when fetching managed identity access token from the CLI (#1021).

Internal Changes

  • Add terraform aliases to Entity (#1017).
  • Added Service.NamedIdMap (#1016).
  • Fix billing test for budget configuration update (#1019).

API Changes:

OpenAPI SHA: 3eae49b444cac5a0118a3503e5b7ecef7f96527a, Date: 2024-08-21

Commits
  • 6d86788 [Release] Release v0.45.0 (#1023)
  • ba4489b [Fix] Do not specify --tenant flag when fetching managed identity access to...
  • f624809 [Internal] Fix billing test for budget configuration update (#1019)
  • 27a5055 [Internal] Add terraform aliases to Entity (#1017)
  • 382a38d [Internal] Added Service.NamedIdMap (#1016)
  • 1ef9931 [Fix] Add INVALID_STATE to error code mapping (#1014)
  • See full diff in compare view

Most Recent Ignore Conditions Applied to This Pull Request | Dependency Name | Ignore Conditions | | --- | --- | | github.com/databricks/databricks-sdk-go | [>= 0.28.a, < 0.29] |
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/databricks/databricks-sdk-go&package-manager=go_modules&previous-version=0.44.0&new-version=0.45.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
--------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Andrew Nester --- .codegen/_openapi_sha | 2 +- .gitattributes | 3 + bundle/schema/docs/bundle_descriptions.json | 64 +++++ cmd/workspace/cmd.go | 6 + .../external-locations/external-locations.go | 2 + .../policy-compliance-for-clusters.go | 260 +++++++++++++++++ .../policy-compliance-for-jobs.go | 262 ++++++++++++++++++ cmd/workspace/query-history/query-history.go | 8 +- .../resource-quotas/resource-quotas.go | 168 +++++++++++ go.mod | 2 +- go.sum | 4 +- 11 files changed, 773 insertions(+), 8 deletions(-) create mode 100755 cmd/workspace/policy-compliance-for-clusters/policy-compliance-for-clusters.go create mode 100755 cmd/workspace/policy-compliance-for-jobs/policy-compliance-for-jobs.go create mode 100755 cmd/workspace/resource-quotas/resource-quotas.go diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index fef6f268..8b01a242 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -f98c07f9c71f579de65d2587bb0292f83d10e55d \ No newline at end of file +3eae49b444cac5a0118a3503e5b7ecef7f96527a \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index bdb3f398..d82ab769 100755 --- a/.gitattributes +++ b/.gitattributes @@ -75,6 +75,8 @@ cmd/workspace/online-tables/online-tables.go linguist-generated=true cmd/workspace/permission-migration/permission-migration.go linguist-generated=true cmd/workspace/permissions/permissions.go linguist-generated=true cmd/workspace/pipelines/pipelines.go linguist-generated=true +cmd/workspace/policy-compliance-for-clusters/policy-compliance-for-clusters.go linguist-generated=true +cmd/workspace/policy-compliance-for-jobs/policy-compliance-for-jobs.go linguist-generated=true cmd/workspace/policy-families/policy-families.go linguist-generated=true cmd/workspace/provider-exchange-filters/provider-exchange-filters.go linguist-generated=true cmd/workspace/provider-exchanges/provider-exchanges.go linguist-generated=true @@ -94,6 +96,7 @@ cmd/workspace/recipient-activation/recipient-activation.go linguist-generated=tr cmd/workspace/recipients/recipients.go linguist-generated=true cmd/workspace/registered-models/registered-models.go linguist-generated=true cmd/workspace/repos/repos.go linguist-generated=true +cmd/workspace/resource-quotas/resource-quotas.go linguist-generated=true cmd/workspace/restrict-workspace-admins/restrict-workspace-admins.go linguist-generated=true cmd/workspace/schemas/schemas.go linguist-generated=true cmd/workspace/secrets/secrets.go linguist-generated=true diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index d888b366..908a1c2b 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -85,6 +85,12 @@ "enabled": { "description": "" }, + "import": { + "description": "", + "items": { + "description": "" + } + }, "venv_path": { "description": "" } @@ -130,6 +136,29 @@ } } }, + "presets": { + "description": "", + "properties": { + "jobs_max_concurrent_runs": { + "description": "" + }, + "name_prefix": { + "description": "" + }, + "pipelines_development": { + "description": "" + }, + "tags": { + "description": "", + "additionalproperties": { + "description": "" + } + }, + "trigger_pause_status": { + "description": "" + } + } + }, "resources": { "description": "Collection of Databricks resources to deploy.", "properties": { @@ -3079,6 +3108,12 @@ "items": { "description": "" } + }, + "paths": { + "description": "", + "items": { + "description": "" + } } } }, @@ -3202,6 +3237,29 @@ } } }, + "presets": { + "description": "", + "properties": { + "jobs_max_concurrent_runs": { + "description": "" + }, + "name_prefix": { + "description": "" + }, + "pipelines_development": { + "description": "" + }, + "tags": { + "description": "", + "additionalproperties": { + "description": "" + } + }, + "trigger_pause_status": { + "description": "" + } + } + }, "resources": { "description": "Collection of Databricks resources to deploy.", "properties": { @@ -6151,6 +6209,12 @@ "items": { "description": "" } + }, + "paths": { + "description": "", + "items": { + "description": "" + } } } }, diff --git a/cmd/workspace/cmd.go b/cmd/workspace/cmd.go index 75664c79..11be8077 100755 --- a/cmd/workspace/cmd.go +++ b/cmd/workspace/cmd.go @@ -44,6 +44,8 @@ import ( permission_migration "github.com/databricks/cli/cmd/workspace/permission-migration" permissions "github.com/databricks/cli/cmd/workspace/permissions" pipelines "github.com/databricks/cli/cmd/workspace/pipelines" + policy_compliance_for_clusters "github.com/databricks/cli/cmd/workspace/policy-compliance-for-clusters" + policy_compliance_for_jobs "github.com/databricks/cli/cmd/workspace/policy-compliance-for-jobs" policy_families "github.com/databricks/cli/cmd/workspace/policy-families" provider_exchange_filters "github.com/databricks/cli/cmd/workspace/provider-exchange-filters" provider_exchanges "github.com/databricks/cli/cmd/workspace/provider-exchanges" @@ -63,6 +65,7 @@ import ( recipients "github.com/databricks/cli/cmd/workspace/recipients" registered_models "github.com/databricks/cli/cmd/workspace/registered-models" repos "github.com/databricks/cli/cmd/workspace/repos" + resource_quotas "github.com/databricks/cli/cmd/workspace/resource-quotas" schemas "github.com/databricks/cli/cmd/workspace/schemas" secrets "github.com/databricks/cli/cmd/workspace/secrets" service_principals "github.com/databricks/cli/cmd/workspace/service-principals" @@ -130,6 +133,8 @@ func All() []*cobra.Command { out = append(out, permission_migration.New()) out = append(out, permissions.New()) out = append(out, pipelines.New()) + out = append(out, policy_compliance_for_clusters.New()) + out = append(out, policy_compliance_for_jobs.New()) out = append(out, policy_families.New()) out = append(out, provider_exchange_filters.New()) out = append(out, provider_exchanges.New()) @@ -149,6 +154,7 @@ func All() []*cobra.Command { out = append(out, recipients.New()) out = append(out, registered_models.New()) out = append(out, repos.New()) + out = append(out, resource_quotas.New()) out = append(out, schemas.New()) out = append(out, secrets.New()) out = append(out, service_principals.New()) diff --git a/cmd/workspace/external-locations/external-locations.go b/cmd/workspace/external-locations/external-locations.go index 8f0dd346..42493fc4 100755 --- a/cmd/workspace/external-locations/external-locations.go +++ b/cmd/workspace/external-locations/external-locations.go @@ -75,6 +75,7 @@ func newCreate() *cobra.Command { cmd.Flags().StringVar(&createReq.AccessPoint, "access-point", createReq.AccessPoint, `The AWS access point to use when accesing s3 for this external location.`) cmd.Flags().StringVar(&createReq.Comment, "comment", createReq.Comment, `User-provided free-form text description.`) // TODO: complex arg: encryption_details + cmd.Flags().BoolVar(&createReq.Fallback, "fallback", createReq.Fallback, `Indicates whether fallback mode is enabled for this external location.`) cmd.Flags().BoolVar(&createReq.ReadOnly, "read-only", createReq.ReadOnly, `Indicates whether the external location is read-only.`) cmd.Flags().BoolVar(&createReq.SkipValidation, "skip-validation", createReq.SkipValidation, `Skips validation of the storage credential associated with the external location.`) @@ -347,6 +348,7 @@ func newUpdate() *cobra.Command { cmd.Flags().StringVar(&updateReq.Comment, "comment", updateReq.Comment, `User-provided free-form text description.`) cmd.Flags().StringVar(&updateReq.CredentialName, "credential-name", updateReq.CredentialName, `Name of the storage credential used with this location.`) // TODO: complex arg: encryption_details + cmd.Flags().BoolVar(&updateReq.Fallback, "fallback", updateReq.Fallback, `Indicates whether fallback mode is enabled for this external location.`) cmd.Flags().BoolVar(&updateReq.Force, "force", updateReq.Force, `Force update even if changing url invalidates dependent external tables or mounts.`) cmd.Flags().Var(&updateReq.IsolationMode, "isolation-mode", `Whether the current securable is accessible from all workspaces or a specific set of workspaces. Supported values: [ISOLATION_MODE_ISOLATED, ISOLATION_MODE_OPEN]`) cmd.Flags().StringVar(&updateReq.NewName, "new-name", updateReq.NewName, `New name for the external location.`) diff --git a/cmd/workspace/policy-compliance-for-clusters/policy-compliance-for-clusters.go b/cmd/workspace/policy-compliance-for-clusters/policy-compliance-for-clusters.go new file mode 100755 index 00000000..1274c879 --- /dev/null +++ b/cmd/workspace/policy-compliance-for-clusters/policy-compliance-for-clusters.go @@ -0,0 +1,260 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package policy_compliance_for_clusters + +import ( + "fmt" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "policy-compliance-for-clusters", + Short: `The policy compliance APIs allow you to view and manage the policy compliance status of clusters in your workspace.`, + Long: `The policy compliance APIs allow you to view and manage the policy compliance + status of clusters in your workspace. + + A cluster is compliant with its policy if its configuration satisfies all its + policy rules. Clusters could be out of compliance if their policy was updated + after the cluster was last edited. + + The get and list compliance APIs allow you to view the policy compliance + status of a cluster. The enforce compliance API allows you to update a cluster + to be compliant with the current version of its policy.`, + GroupID: "compute", + Annotations: map[string]string{ + "package": "compute", + }, + } + + // Add methods + cmd.AddCommand(newEnforceCompliance()) + cmd.AddCommand(newGetCompliance()) + cmd.AddCommand(newListCompliance()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start enforce-compliance command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var enforceComplianceOverrides []func( + *cobra.Command, + *compute.EnforceClusterComplianceRequest, +) + +func newEnforceCompliance() *cobra.Command { + cmd := &cobra.Command{} + + var enforceComplianceReq compute.EnforceClusterComplianceRequest + var enforceComplianceJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(&enforceComplianceJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + cmd.Flags().BoolVar(&enforceComplianceReq.ValidateOnly, "validate-only", enforceComplianceReq.ValidateOnly, `If set, previews the changes that would be made to a cluster to enforce compliance but does not update the cluster.`) + + cmd.Use = "enforce-compliance CLUSTER_ID" + cmd.Short = `Enforce cluster policy compliance.` + cmd.Long = `Enforce cluster policy compliance. + + Updates a cluster to be compliant with the current version of its policy. A + cluster can be updated if it is in a RUNNING or TERMINATED state. + + If a cluster is updated while in a RUNNING state, it will be restarted so + that the new attributes can take effect. + + If a cluster is updated while in a TERMINATED state, it will remain + TERMINATED. The next time the cluster is started, the new attributes will + take effect. + + Clusters created by the Databricks Jobs, DLT, or Models services cannot be + enforced by this API. Instead, use the "Enforce job policy compliance" API to + enforce policy compliance on jobs. + + Arguments: + CLUSTER_ID: The ID of the cluster you want to enforce policy compliance on.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + if cmd.Flags().Changed("json") { + err := root.ExactArgs(0)(cmd, args) + if err != nil { + return fmt.Errorf("when --json flag is specified, no positional arguments are required. Provide 'cluster_id' in your JSON input") + } + return nil + } + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + if cmd.Flags().Changed("json") { + err = enforceComplianceJson.Unmarshal(&enforceComplianceReq) + if err != nil { + return err + } + } + if !cmd.Flags().Changed("json") { + enforceComplianceReq.ClusterId = args[0] + } + + response, err := w.PolicyComplianceForClusters.EnforceCompliance(ctx, enforceComplianceReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range enforceComplianceOverrides { + fn(cmd, &enforceComplianceReq) + } + + return cmd +} + +// start get-compliance command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getComplianceOverrides []func( + *cobra.Command, + *compute.GetClusterComplianceRequest, +) + +func newGetCompliance() *cobra.Command { + cmd := &cobra.Command{} + + var getComplianceReq compute.GetClusterComplianceRequest + + // TODO: short flags + + cmd.Use = "get-compliance CLUSTER_ID" + cmd.Short = `Get cluster policy compliance.` + cmd.Long = `Get cluster policy compliance. + + Returns the policy compliance status of a cluster. Clusters could be out of + compliance if their policy was updated after the cluster was last edited. + + Arguments: + CLUSTER_ID: The ID of the cluster to get the compliance status` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + getComplianceReq.ClusterId = args[0] + + response, err := w.PolicyComplianceForClusters.GetCompliance(ctx, getComplianceReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getComplianceOverrides { + fn(cmd, &getComplianceReq) + } + + return cmd +} + +// start list-compliance command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var listComplianceOverrides []func( + *cobra.Command, + *compute.ListClusterCompliancesRequest, +) + +func newListCompliance() *cobra.Command { + cmd := &cobra.Command{} + + var listComplianceReq compute.ListClusterCompliancesRequest + + // TODO: short flags + + cmd.Flags().IntVar(&listComplianceReq.PageSize, "page-size", listComplianceReq.PageSize, `Use this field to specify the maximum number of results to be returned by the server.`) + cmd.Flags().StringVar(&listComplianceReq.PageToken, "page-token", listComplianceReq.PageToken, `A page token that can be used to navigate to the next page or previous page as returned by next_page_token or prev_page_token.`) + + cmd.Use = "list-compliance POLICY_ID" + cmd.Short = `List cluster policy compliance.` + cmd.Long = `List cluster policy compliance. + + Returns the policy compliance status of all clusters that use a given policy. + Clusters could be out of compliance if their policy was updated after the + cluster was last edited. + + Arguments: + POLICY_ID: Canonical unique identifier for the cluster policy.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + listComplianceReq.PolicyId = args[0] + + response := w.PolicyComplianceForClusters.ListCompliance(ctx, listComplianceReq) + return cmdio.RenderIterator(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range listComplianceOverrides { + fn(cmd, &listComplianceReq) + } + + return cmd +} + +// end service PolicyComplianceForClusters diff --git a/cmd/workspace/policy-compliance-for-jobs/policy-compliance-for-jobs.go b/cmd/workspace/policy-compliance-for-jobs/policy-compliance-for-jobs.go new file mode 100755 index 00000000..d74caa57 --- /dev/null +++ b/cmd/workspace/policy-compliance-for-jobs/policy-compliance-for-jobs.go @@ -0,0 +1,262 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package policy_compliance_for_jobs + +import ( + "fmt" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/flags" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "policy-compliance-for-jobs", + Short: `The compliance APIs allow you to view and manage the policy compliance status of jobs in your workspace.`, + Long: `The compliance APIs allow you to view and manage the policy compliance status + of jobs in your workspace. This API currently only supports compliance + controls for cluster policies. + + A job is in compliance if its cluster configurations satisfy the rules of all + their respective cluster policies. A job could be out of compliance if a + cluster policy it uses was updated after the job was last edited. The job is + considered out of compliance if any of its clusters no longer comply with + their updated policies. + + The get and list compliance APIs allow you to view the policy compliance + status of a job. The enforce compliance API allows you to update a job so that + it becomes compliant with all of its policies.`, + GroupID: "jobs", + Annotations: map[string]string{ + "package": "jobs", + }, + } + + // Add methods + cmd.AddCommand(newEnforceCompliance()) + cmd.AddCommand(newGetCompliance()) + cmd.AddCommand(newListCompliance()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start enforce-compliance command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var enforceComplianceOverrides []func( + *cobra.Command, + *jobs.EnforcePolicyComplianceRequest, +) + +func newEnforceCompliance() *cobra.Command { + cmd := &cobra.Command{} + + var enforceComplianceReq jobs.EnforcePolicyComplianceRequest + var enforceComplianceJson flags.JsonFlag + + // TODO: short flags + cmd.Flags().Var(&enforceComplianceJson, "json", `either inline JSON string or @path/to/file.json with request body`) + + cmd.Flags().BoolVar(&enforceComplianceReq.ValidateOnly, "validate-only", enforceComplianceReq.ValidateOnly, `If set, previews changes made to the job to comply with its policy, but does not update the job.`) + + cmd.Use = "enforce-compliance JOB_ID" + cmd.Short = `Enforce job policy compliance.` + cmd.Long = `Enforce job policy compliance. + + Updates a job so the job clusters that are created when running the job + (specified in new_cluster) are compliant with the current versions of their + respective cluster policies. All-purpose clusters used in the job will not be + updated. + + Arguments: + JOB_ID: The ID of the job you want to enforce policy compliance on.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + if cmd.Flags().Changed("json") { + err := root.ExactArgs(0)(cmd, args) + if err != nil { + return fmt.Errorf("when --json flag is specified, no positional arguments are required. Provide 'job_id' in your JSON input") + } + return nil + } + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + if cmd.Flags().Changed("json") { + err = enforceComplianceJson.Unmarshal(&enforceComplianceReq) + if err != nil { + return err + } + } + if !cmd.Flags().Changed("json") { + _, err = fmt.Sscan(args[0], &enforceComplianceReq.JobId) + if err != nil { + return fmt.Errorf("invalid JOB_ID: %s", args[0]) + } + } + + response, err := w.PolicyComplianceForJobs.EnforceCompliance(ctx, enforceComplianceReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range enforceComplianceOverrides { + fn(cmd, &enforceComplianceReq) + } + + return cmd +} + +// start get-compliance command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getComplianceOverrides []func( + *cobra.Command, + *jobs.GetPolicyComplianceRequest, +) + +func newGetCompliance() *cobra.Command { + cmd := &cobra.Command{} + + var getComplianceReq jobs.GetPolicyComplianceRequest + + // TODO: short flags + + cmd.Use = "get-compliance JOB_ID" + cmd.Short = `Get job policy compliance.` + cmd.Long = `Get job policy compliance. + + Returns the policy compliance status of a job. Jobs could be out of compliance + if a cluster policy they use was updated after the job was last edited and + some of its job clusters no longer comply with their updated policies. + + Arguments: + JOB_ID: The ID of the job whose compliance status you are requesting.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + _, err = fmt.Sscan(args[0], &getComplianceReq.JobId) + if err != nil { + return fmt.Errorf("invalid JOB_ID: %s", args[0]) + } + + response, err := w.PolicyComplianceForJobs.GetCompliance(ctx, getComplianceReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getComplianceOverrides { + fn(cmd, &getComplianceReq) + } + + return cmd +} + +// start list-compliance command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var listComplianceOverrides []func( + *cobra.Command, + *jobs.ListJobComplianceRequest, +) + +func newListCompliance() *cobra.Command { + cmd := &cobra.Command{} + + var listComplianceReq jobs.ListJobComplianceRequest + + // TODO: short flags + + cmd.Flags().IntVar(&listComplianceReq.PageSize, "page-size", listComplianceReq.PageSize, `Use this field to specify the maximum number of results to be returned by the server.`) + cmd.Flags().StringVar(&listComplianceReq.PageToken, "page-token", listComplianceReq.PageToken, `A page token that can be used to navigate to the next page or previous page as returned by next_page_token or prev_page_token.`) + + cmd.Use = "list-compliance POLICY_ID" + cmd.Short = `List job policy compliance.` + cmd.Long = `List job policy compliance. + + Returns the policy compliance status of all jobs that use a given policy. Jobs + could be out of compliance if a cluster policy they use was updated after the + job was last edited and its job clusters no longer comply with the updated + policy. + + Arguments: + POLICY_ID: Canonical unique identifier for the cluster policy.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + listComplianceReq.PolicyId = args[0] + + response := w.PolicyComplianceForJobs.ListCompliance(ctx, listComplianceReq) + return cmdio.RenderIterator(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range listComplianceOverrides { + fn(cmd, &listComplianceReq) + } + + return cmd +} + +// end service PolicyComplianceForJobs diff --git a/cmd/workspace/query-history/query-history.go b/cmd/workspace/query-history/query-history.go index 5155b5cc..bfa013f2 100755 --- a/cmd/workspace/query-history/query-history.go +++ b/cmd/workspace/query-history/query-history.go @@ -16,9 +16,9 @@ var cmdOverrides []func(*cobra.Command) func New() *cobra.Command { cmd := &cobra.Command{ Use: "query-history", - Short: `A service responsible for storing and retrieving the list of queries run against SQL endpoints, serverless compute, and DLT.`, + Short: `A service responsible for storing and retrieving the list of queries run against SQL endpoints and serverless compute.`, Long: `A service responsible for storing and retrieving the list of queries run - against SQL endpoints, serverless compute, and DLT.`, + against SQL endpoints and serverless compute.`, GroupID: "sql", Annotations: map[string]string{ "package": "sql", @@ -53,6 +53,7 @@ func newList() *cobra.Command { // TODO: short flags // TODO: complex arg: filter_by + cmd.Flags().BoolVar(&listReq.IncludeMetrics, "include-metrics", listReq.IncludeMetrics, `Whether to include the query metrics with each query.`) cmd.Flags().IntVar(&listReq.MaxResults, "max-results", listReq.MaxResults, `Limit the number of results returned in one page.`) cmd.Flags().StringVar(&listReq.PageToken, "page-token", listReq.PageToken, `A token that can be used to get the next page of results.`) @@ -60,8 +61,7 @@ func newList() *cobra.Command { cmd.Short = `List Queries.` cmd.Long = `List Queries. - List the history of queries through SQL warehouses, serverless compute, and - DLT. + List the history of queries through SQL warehouses, and serverless compute. You can filter by user ID, warehouse ID, status, and time range. Most recently started queries are returned first (up to max_results in request). The diff --git a/cmd/workspace/resource-quotas/resource-quotas.go b/cmd/workspace/resource-quotas/resource-quotas.go new file mode 100755 index 00000000..9a0c3068 --- /dev/null +++ b/cmd/workspace/resource-quotas/resource-quotas.go @@ -0,0 +1,168 @@ +// Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +package resource_quotas + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/spf13/cobra" +) + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var cmdOverrides []func(*cobra.Command) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "resource-quotas", + Short: `Unity Catalog enforces resource quotas on all securable objects, which limits the number of resources that can be created.`, + Long: `Unity Catalog enforces resource quotas on all securable objects, which limits + the number of resources that can be created. Quotas are expressed in terms of + a resource type and a parent (for example, tables per metastore or schemas per + catalog). The resource quota APIs enable you to monitor your current usage and + limits. For more information on resource quotas see the [Unity Catalog + documentation]. + + [Unity Catalog documentation]: https://docs.databricks.com/en/data-governance/unity-catalog/index.html#resource-quotas`, + GroupID: "catalog", + Annotations: map[string]string{ + "package": "catalog", + }, + } + + // Add methods + cmd.AddCommand(newGetQuota()) + cmd.AddCommand(newListQuotas()) + + // Apply optional overrides to this command. + for _, fn := range cmdOverrides { + fn(cmd) + } + + return cmd +} + +// start get-quota command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var getQuotaOverrides []func( + *cobra.Command, + *catalog.GetQuotaRequest, +) + +func newGetQuota() *cobra.Command { + cmd := &cobra.Command{} + + var getQuotaReq catalog.GetQuotaRequest + + // TODO: short flags + + cmd.Use = "get-quota PARENT_SECURABLE_TYPE PARENT_FULL_NAME QUOTA_NAME" + cmd.Short = `Get information for a single resource quota.` + cmd.Long = `Get information for a single resource quota. + + The GetQuota API returns usage information for a single resource quota, + defined as a child-parent pair. This API also refreshes the quota count if it + is out of date. Refreshes are triggered asynchronously. The updated count + might not be returned in the first call. + + Arguments: + PARENT_SECURABLE_TYPE: Securable type of the quota parent. + PARENT_FULL_NAME: Full name of the parent resource. Provide the metastore ID if the parent + is a metastore. + QUOTA_NAME: Name of the quota. Follows the pattern of the quota type, with "-quota" + added as a suffix.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(3) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + getQuotaReq.ParentSecurableType = args[0] + getQuotaReq.ParentFullName = args[1] + getQuotaReq.QuotaName = args[2] + + response, err := w.ResourceQuotas.GetQuota(ctx, getQuotaReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range getQuotaOverrides { + fn(cmd, &getQuotaReq) + } + + return cmd +} + +// start list-quotas command + +// Slice with functions to override default command behavior. +// Functions can be added from the `init()` function in manually curated files in this directory. +var listQuotasOverrides []func( + *cobra.Command, + *catalog.ListQuotasRequest, +) + +func newListQuotas() *cobra.Command { + cmd := &cobra.Command{} + + var listQuotasReq catalog.ListQuotasRequest + + // TODO: short flags + + cmd.Flags().IntVar(&listQuotasReq.MaxResults, "max-results", listQuotasReq.MaxResults, `The number of quotas to return.`) + cmd.Flags().StringVar(&listQuotasReq.PageToken, "page-token", listQuotasReq.PageToken, `Opaque token for the next page of results.`) + + cmd.Use = "list-quotas" + cmd.Short = `List all resource quotas under a metastore.` + cmd.Long = `List all resource quotas under a metastore. + + ListQuotas returns all quota values under the metastore. There are no SLAs on + the freshness of the counts returned. This API does not trigger a refresh of + quota counts.` + + cmd.Annotations = make(map[string]string) + + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(0) + return check(cmd, args) + } + + cmd.PreRunE = root.MustWorkspaceClient + cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + + response := w.ResourceQuotas.ListQuotas(ctx, listQuotasReq) + return cmdio.RenderIterator(ctx, response) + } + + // Disable completions since they are not applicable. + // Can be overridden by manual implementation in `override.go`. + cmd.ValidArgsFunction = cobra.NoFileCompletions + + // Apply optional overrides to this command. + for _, fn := range listQuotasOverrides { + fn(cmd, &listQuotasReq) + } + + return cmd +} + +// end service ResourceQuotas diff --git a/go.mod b/go.mod index 838a45f3..4aa27992 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.22 require ( github.com/Masterminds/semver/v3 v3.2.1 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 - github.com/databricks/databricks-sdk-go v0.44.0 // Apache 2.0 + github.com/databricks/databricks-sdk-go v0.45.0 // Apache 2.0 github.com/fatih/color v1.17.0 // MIT github.com/ghodss/yaml v1.0.0 // MIT + NOTICE github.com/google/uuid v1.6.0 // BSD-3-Clause diff --git a/go.sum b/go.sum index f55f329f..2e58948a 100644 --- a/go.sum +++ b/go.sum @@ -32,8 +32,8 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg= github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= -github.com/databricks/databricks-sdk-go v0.44.0 h1:9/FZACv4EFQIOYxfwYVKnY7v46xio9FKCw9tpKB2O/s= -github.com/databricks/databricks-sdk-go v0.44.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= +github.com/databricks/databricks-sdk-go v0.45.0 h1:wdx5Wm/ESrahdHeq62WrjLeGjV4r722LLanD8ahI0Mo= +github.com/databricks/databricks-sdk-go v0.45.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= From edc08149d3d6a4943072f23492fce494082edf90 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 27 Aug 2024 14:51:10 +0200 Subject: [PATCH 15/37] Disable prompt for storage-credentials get command (#1723) ## Changes Fixes #1079 --- .codegen/service.go.tmpl | 1 + .../storage-credentials.go | 22 +++++-------------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/.codegen/service.go.tmpl b/.codegen/service.go.tmpl index 111745e4..281dfd6e 100644 --- a/.codegen/service.go.tmpl +++ b/.codegen/service.go.tmpl @@ -154,6 +154,7 @@ func new{{.PascalName}}() *cobra.Command { "provider-exchanges delete-listing-from-exchange" "provider-exchanges list-exchanges-for-listing" "provider-exchanges list-listings-for-exchange" + "storage-credentials get" -}} {{- $fullCommandName := (print $serviceName " " .KebabName) -}} {{- $noPrompt := or .IsCrudCreate (in $excludeFromPrompts $fullCommandName) }} diff --git a/cmd/workspace/storage-credentials/storage-credentials.go b/cmd/workspace/storage-credentials/storage-credentials.go index 18656a61..f4ec5eb4 100755 --- a/cmd/workspace/storage-credentials/storage-credentials.go +++ b/cmd/workspace/storage-credentials/storage-credentials.go @@ -241,28 +241,16 @@ func newGet() *cobra.Command { cmd.Annotations = make(map[string]string) + cmd.Args = func(cmd *cobra.Command, args []string) error { + check := root.ExactArgs(1) + return check(cmd, args) + } + cmd.PreRunE = root.MustWorkspaceClient cmd.RunE = func(cmd *cobra.Command, args []string) (err error) { ctx := cmd.Context() w := root.WorkspaceClient(ctx) - if len(args) == 0 { - promptSpinner := cmdio.Spinner(ctx) - promptSpinner <- "No NAME argument specified. Loading names for Storage Credentials drop-down." - names, err := w.StorageCredentials.StorageCredentialInfoNameToIdMap(ctx, catalog.ListStorageCredentialsRequest{}) - close(promptSpinner) - if err != nil { - return fmt.Errorf("failed to load names for Storage Credentials drop-down. Please manually specify required arguments. Original error: %w", err) - } - id, err := cmdio.Select(ctx, names, "Name of the storage credential") - if err != nil { - return err - } - args = append(args, id) - } - if len(args) != 1 { - return fmt.Errorf("expected to have name of the storage credential") - } getReq.Name = args[0] response, err := w.StorageCredentials.Get(ctx, getReq) From 70363836d5a462851d8d2196eb288d7bdb1ad01d Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 28 Aug 2024 13:39:06 +0200 Subject: [PATCH 16/37] Correctly mark PyPI package name specs with multiple specifiers as remote libraries (#1725) Correctly mark pypi package name specs with multiple specifiers as remote libraries Fixes this https://github.com/databricks/cli/issues/1728 --- bundle/libraries/local_path.go | 6 ++++-- bundle/libraries/local_path_test.go | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index e4956240..6d60d56b 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -72,9 +72,11 @@ func IsLibraryLocal(dep string) bool { // ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). // \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. -// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?)?: Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?): Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +// ,?: Optionally matches a comma (,) at the end of the specifier which is used to separate multiple specifiers. +// There can be multiple version specifiers separated by commas or no specifiers. // Spec for package name and version specifier: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ -var packageRegex = regexp.MustCompile(`^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$`) +var packageRegex = regexp.MustCompile(`^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?,?)*$`) func isPackage(name string) bool { if packageRegex.MatchString(name) { diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index 667d64ec..dc157cad 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -62,6 +62,8 @@ func TestIsLibraryLocal(t *testing.T) { {path: "beautifulsoup4 ~= 4.12.3", expected: false}, {path: "beautifulsoup4[security, tests]", expected: false}, {path: "beautifulsoup4[security, tests] ~= 4.12.3", expected: false}, + {path: "beautifulsoup4>=1.0.0,<2.0.0", expected: false}, + {path: "beautifulsoup4>=1.0.0,~=1.2.0,<2.0.0", expected: false}, {path: "https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, {path: "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, {path: "requests [security] @ https://github.com/psf/requests/archive/refs/heads/main.zip", expected: false}, From 85459c1963e6e86f3ece1029caae51156f6b87f6 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Wed, 28 Aug 2024 14:14:19 +0200 Subject: [PATCH 17/37] Improve error handling for /Volumes paths in mode: development (#1716) ## Changes * Provide a more helpful error when using an artifact_path based on /Volumes * Allow the use of short_names in /Volumes paths ## Example cases Example of a valid /Volumes artifact_path: * `artifact_path: /Volumes/catalog/schema/${workspace.current_user.short_name}/libs` Example of an invalid /Volumes path (when using `mode: development`): * `artifact_path: /Volumes/catalog/schema/libs` * Resulting error: `artifact_path should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'` --- bundle/config/mutator/process_target_mode.go | 35 +++++++++++++------ .../mutator/process_target_mode_test.go | 12 ++++++- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index 92ed2868..70382f05 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -64,6 +64,7 @@ func transformDevelopmentMode(ctx context.Context, b *bundle.Bundle) { } func validateDevelopmentMode(b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics p := b.Config.Presets u := b.Config.Workspace.CurrentUser @@ -74,44 +75,56 @@ func validateDevelopmentMode(b *bundle.Bundle) diag.Diagnostics { // status to UNPAUSED at the level of an individual object, whic hwas // historically allowed.) if p.TriggerPauseStatus == config.Unpaused { - return diag.Diagnostics{{ + diags = diags.Append(diag.Diagnostic{ Severity: diag.Error, Summary: "target with 'mode: development' cannot set trigger pause status to UNPAUSED by default", Locations: []dyn.Location{b.Config.GetLocation("presets.trigger_pause_status")}, - }} + }) } // Make sure this development copy has unique names and paths to avoid conflicts if path := findNonUserPath(b); path != "" { - return diag.Errorf("%s must start with '~/' or contain the current username when using 'mode: development'", path) + if path == "artifact_path" && strings.HasPrefix(b.Config.Workspace.ArtifactPath, "/Volumes") { + // For Volumes paths we recommend including the current username as a substring + diags = diags.Extend(diag.Errorf("%s should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'", path)) + } else { + // For non-Volumes paths recommend simply putting things in the home folder + diags = diags.Extend(diag.Errorf("%s must start with '~/' or contain the current username to ensure uniqueness when using 'mode: development'", path)) + } } if p.NamePrefix != "" && !strings.Contains(p.NamePrefix, u.ShortName) && !strings.Contains(p.NamePrefix, u.UserName) { // Resources such as pipelines require a unique name, e.g. '[dev steve] my_pipeline'. // For this reason we require the name prefix to contain the current username; // it's a pitfall for users if they don't include it and later find out that // only a single user can do development deployments. - return diag.Diagnostics{{ + diags = diags.Append(diag.Diagnostic{ Severity: diag.Error, Summary: "prefix should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'", Locations: []dyn.Location{b.Config.GetLocation("presets.name_prefix")}, - }} + }) } - return nil + return diags } +// findNonUserPath finds the first workspace path such as root_path that doesn't +// contain the current username or current user's shortname. func findNonUserPath(b *bundle.Bundle) string { - username := b.Config.Workspace.CurrentUser.UserName + containsName := func(path string) bool { + username := b.Config.Workspace.CurrentUser.UserName + shortname := b.Config.Workspace.CurrentUser.ShortName + return strings.Contains(path, username) || strings.Contains(path, shortname) + } - if b.Config.Workspace.RootPath != "" && !strings.Contains(b.Config.Workspace.RootPath, username) { + if b.Config.Workspace.RootPath != "" && !containsName(b.Config.Workspace.RootPath) { return "root_path" } - if b.Config.Workspace.StatePath != "" && !strings.Contains(b.Config.Workspace.StatePath, username) { + if b.Config.Workspace.StatePath != "" && !containsName(b.Config.Workspace.StatePath) { return "state_path" } - if b.Config.Workspace.FilePath != "" && !strings.Contains(b.Config.Workspace.FilePath, username) { + if b.Config.Workspace.FilePath != "" && !containsName(b.Config.Workspace.FilePath) { return "file_path" } - if b.Config.Workspace.ArtifactPath != "" && !strings.Contains(b.Config.Workspace.ArtifactPath, username) { + if b.Config.Workspace.ArtifactPath != "" && !containsName(b.Config.Workspace.ArtifactPath) { return "artifact_path" } return "" diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 1c8671b4..42f1929c 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -230,10 +230,20 @@ func TestValidateDevelopmentMode(t *testing.T) { diags := validateDevelopmentMode(b) require.NoError(t, diags.Error()) + // Test with /Volumes path + b = mockBundle(config.Development) + b.Config.Workspace.ArtifactPath = "/Volumes/catalog/schema/lennart/libs" + diags = validateDevelopmentMode(b) + require.NoError(t, diags.Error()) + b.Config.Workspace.ArtifactPath = "/Volumes/catalog/schema/libs" + diags = validateDevelopmentMode(b) + require.ErrorContains(t, diags.Error(), "artifact_path should contain the current username or ${workspace.current_user.short_name} to ensure uniqueness when using 'mode: development'") + // Test with a bundle that has a non-user path + b = mockBundle(config.Development) b.Config.Workspace.RootPath = "/Shared/.bundle/x/y/state" diags = validateDevelopmentMode(b) - require.ErrorContains(t, diags.Error(), "root_path") + require.ErrorContains(t, diags.Error(), "root_path must start with '~/' or contain the current username to ensure uniqueness when using 'mode: development'") // Test with a bundle that has an unpaused trigger pause status b = mockBundle(config.Development) From 7dcc791b05905a0ac78345651320969212f0123f Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 28 Aug 2024 15:09:07 +0200 Subject: [PATCH 18/37] [Release] Release v0.227.1 (#1729) CLI: * Disable prompt for storage-credentials get command ([#1723](https://github.com/databricks/cli/pull/1723)). Bundles: * Do not treat empty path as a local path ([#1717](https://github.com/databricks/cli/pull/1717)). * Correctly mark PyPI package name specs with multiple specifiers as remote libraries ([#1725](https://github.com/databricks/cli/pull/1725)). * Improve error handling for /Volumes paths in mode: development ([#1716](https://github.com/databricks/cli/pull/1716)). Internal: * Ignore CLI version check on development builds of the CLI ([#1714](https://github.com/databricks/cli/pull/1714)). API Changes: * Added `databricks resource-quotas` command group. * Added `databricks policy-compliance-for-clusters` command group. * Added `databricks policy-compliance-for-jobs` command group. OpenAPI commit 3eae49b444cac5a0118a3503e5b7ecef7f96527a (2024-08-21) Dependency updates: * Bump github.com/databricks/databricks-sdk-go from 0.44.0 to 0.45.0 ([#1719](https://github.com/databricks/cli/pull/1719)). * Revert hc-install version to 0.7.0 ([#1711](https://github.com/databricks/cli/pull/1711)). --- CHANGELOG.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88a62d09..fac7d597 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Version changelog +## [Release] Release v0.227.1 + +CLI: + * Disable prompt for storage-credentials get command ([#1723](https://github.com/databricks/cli/pull/1723)). + +Bundles: + * Do not treat empty path as a local path ([#1717](https://github.com/databricks/cli/pull/1717)). + * Correctly mark PyPI package name specs with multiple specifiers as remote libraries ([#1725](https://github.com/databricks/cli/pull/1725)). + * Improve error handling for /Volumes paths in mode: development ([#1716](https://github.com/databricks/cli/pull/1716)). + +Internal: + * Ignore CLI version check on development builds of the CLI ([#1714](https://github.com/databricks/cli/pull/1714)). + +API Changes: + * Added `databricks resource-quotas` command group. + * Added `databricks policy-compliance-for-clusters` command group. + * Added `databricks policy-compliance-for-jobs` command group. + +OpenAPI commit 3eae49b444cac5a0118a3503e5b7ecef7f96527a (2024-08-21) +Dependency updates: + * Bump github.com/databricks/databricks-sdk-go from 0.44.0 to 0.45.0 ([#1719](https://github.com/databricks/cli/pull/1719)). + * Revert hc-install version to 0.7.0 ([#1711](https://github.com/databricks/cli/pull/1711)). + ## [Release] Release v0.227.0 CLI: From 43ace69bb955a445885d4477a55de88c237a05ba Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 29 Aug 2024 14:47:44 +0200 Subject: [PATCH 19/37] Consider serverless clusters as compatible for Python wheel tasks (#1733) ## Changes Consider serverless clusters as compatible for Python wheel tasks. Fixes a `Python wheel tasks require compute with DBR 13.3+ to include local libraries` warning shown for serverless clusters --- bundle/python/warning.go | 20 ++++++++++++++++---- bundle/python/warning_test.go | 4 +++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/bundle/python/warning.go b/bundle/python/warning.go index d53796d7..0e9d8bef 100644 --- a/bundle/python/warning.go +++ b/bundle/python/warning.go @@ -2,6 +2,7 @@ package python import ( "context" + "strconv" "strings" "github.com/databricks/cli/bundle" @@ -38,7 +39,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { tasks := libraries.FindTasksWithLocalLibraries(b) for _, task := range tasks { if task.NewCluster != nil { - if lowerThanExpectedVersion(ctx, task.NewCluster.SparkVersion) { + if lowerThanExpectedVersion(task.NewCluster.SparkVersion) { return true } } @@ -47,7 +48,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { for _, job := range b.Config.Resources.Jobs { for _, cluster := range job.JobClusters { if task.JobClusterKey == cluster.JobClusterKey && cluster.NewCluster.SparkVersion != "" { - if lowerThanExpectedVersion(ctx, cluster.NewCluster.SparkVersion) { + if lowerThanExpectedVersion(cluster.NewCluster.SparkVersion) { return true } } @@ -64,7 +65,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { return false } - if lowerThanExpectedVersion(ctx, version) { + if lowerThanExpectedVersion(version) { return true } } @@ -73,7 +74,7 @@ func hasIncompatibleWheelTasks(ctx context.Context, b *bundle.Bundle) bool { return false } -func lowerThanExpectedVersion(ctx context.Context, sparkVersion string) bool { +func lowerThanExpectedVersion(sparkVersion string) bool { parts := strings.Split(sparkVersion, ".") if len(parts) < 2 { return false @@ -82,6 +83,17 @@ func lowerThanExpectedVersion(ctx context.Context, sparkVersion string) bool { if parts[1][0] == 'x' { // treat versions like 13.x as the very latest minor (13.99) parts[1] = "99" } + + // if any of the version parts are not numbers, we can't compare + // so consider it as compatible version + if _, err := strconv.Atoi(parts[0]); err != nil { + return false + } + + if _, err := strconv.Atoi(parts[1]); err != nil { + return false + } + v := "v" + parts[0] + "." + parts[1] return semver.Compare(v, "v13.1") < 0 } diff --git a/bundle/python/warning_test.go b/bundle/python/warning_test.go index b2296392..a5ab7563 100644 --- a/bundle/python/warning_test.go +++ b/bundle/python/warning_test.go @@ -344,6 +344,8 @@ func TestSparkVersionLowerThanExpected(t *testing.T) { "14.1.x-scala2.12": false, "13.x-snapshot-scala-2.12": false, "13.x-rc-scala-2.12": false, + "client.1.10-scala2.12": false, + "latest-stable-gpu-scala2.11": false, "10.4.x-aarch64-photon-scala2.12": true, "10.4.x-scala2.12": true, "13.0.x-scala2.12": true, @@ -351,7 +353,7 @@ func TestSparkVersionLowerThanExpected(t *testing.T) { } for k, v := range testCases { - result := lowerThanExpectedVersion(context.Background(), k) + result := lowerThanExpectedVersion(k) require.Equal(t, v, result, k) } } From 0f4891f0fef96fab6bffce7572caa2b97615911a Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 29 Aug 2024 15:02:34 +0200 Subject: [PATCH 20/37] Add `dyn.Time` to box a timestamp with its original string value (#1732) ## Changes If not explicitly quoted, the YAML loader interprets a value like `2024-08-29` as a timestamp. Such a value is usually intended to be a string instead. Our normalization logic was not able to turn a time value back into the original string. This change boxes the time value to include its original string representation. Normalization of one of these values into a string can now use the original input value. ## Tests Unit tests in `libs/dyn/convert`. --- libs/dyn/convert/normalize.go | 2 + libs/dyn/convert/normalize_test.go | 8 ++++ libs/dyn/kind.go | 3 +- libs/dyn/merge/override_test.go | 12 +++--- libs/dyn/time.go | 62 ++++++++++++++++++++++++++++++ libs/dyn/time_test.go | 41 ++++++++++++++++++++ libs/dyn/value.go | 3 +- libs/dyn/value_underlying.go | 7 ++-- libs/dyn/value_underlying_test.go | 2 +- libs/dyn/yamlloader/loader.go | 15 ++------ libs/dyn/yamlsaver/saver.go | 2 +- libs/dyn/yamlsaver/saver_test.go | 9 +++-- 12 files changed, 136 insertions(+), 30 deletions(-) create mode 100644 libs/dyn/time.go create mode 100644 libs/dyn/time_test.go diff --git a/libs/dyn/convert/normalize.go b/libs/dyn/convert/normalize.go index c80a914f..bc80a150 100644 --- a/libs/dyn/convert/normalize.go +++ b/libs/dyn/convert/normalize.go @@ -267,6 +267,8 @@ func (n normalizeOptions) normalizeString(typ reflect.Type, src dyn.Value, path out = strconv.FormatInt(src.MustInt(), 10) case dyn.KindFloat: out = strconv.FormatFloat(src.MustFloat(), 'f', -1, 64) + case dyn.KindTime: + out = src.MustTime().String() case dyn.KindNil: // Return a warning if the field is present but has a null value. return dyn.InvalidValue, diags.Append(nullWarning(dyn.KindString, src, path)) diff --git a/libs/dyn/convert/normalize_test.go b/libs/dyn/convert/normalize_test.go index c2256615..4b2a3c18 100644 --- a/libs/dyn/convert/normalize_test.go +++ b/libs/dyn/convert/normalize_test.go @@ -569,6 +569,14 @@ func TestNormalizeStringFromFloat(t *testing.T) { assert.Equal(t, dyn.NewValue("1.2", vin.Locations()), vout) } +func TestNormalizeStringFromTime(t *testing.T) { + var typ string + vin := dyn.NewValue(dyn.MustTime("2024-08-29"), []dyn.Location{{File: "file", Line: 1, Column: 1}}) + vout, err := Normalize(&typ, vin) + assert.Empty(t, err) + assert.Equal(t, dyn.NewValue("2024-08-29", vin.Locations()), vout) +} + func TestNormalizeStringError(t *testing.T) { var typ string vin := dyn.V(map[string]dyn.Value{"an": dyn.V("error")}) diff --git a/libs/dyn/kind.go b/libs/dyn/kind.go index 9d507fbc..9c0c1442 100644 --- a/libs/dyn/kind.go +++ b/libs/dyn/kind.go @@ -2,7 +2,6 @@ package dyn import ( "fmt" - "time" ) type Kind int @@ -34,7 +33,7 @@ func kindOf(v any) Kind { return KindInt case float32, float64: return KindFloat - case time.Time: + case Time: return KindTime case nil: return KindNil diff --git a/libs/dyn/merge/override_test.go b/libs/dyn/merge/override_test.go index 9d41a526..264c32e5 100644 --- a/libs/dyn/merge/override_test.go +++ b/libs/dyn/merge/override_test.go @@ -83,16 +83,16 @@ func TestOverride_Primitive(t *testing.T) { { name: "time (updated)", state: visitorState{updated: []string{"root"}}, - left: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{leftLocation}), - right: dyn.NewValue(time.UnixMilli(10001), []dyn.Location{rightLocation}), - expected: dyn.NewValue(time.UnixMilli(10001), []dyn.Location{rightLocation}), + left: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{leftLocation}), + right: dyn.NewValue(dyn.FromTime(time.UnixMilli(10001)), []dyn.Location{rightLocation}), + expected: dyn.NewValue(dyn.FromTime(time.UnixMilli(10001)), []dyn.Location{rightLocation}), }, { name: "time (not updated)", state: visitorState{}, - left: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{leftLocation}), - right: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{rightLocation}), - expected: dyn.NewValue(time.UnixMilli(10000), []dyn.Location{leftLocation}), + left: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{leftLocation}), + right: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{rightLocation}), + expected: dyn.NewValue(dyn.FromTime(time.UnixMilli(10000)), []dyn.Location{leftLocation}), }, { name: "different types (updated)", diff --git a/libs/dyn/time.go b/libs/dyn/time.go new file mode 100644 index 00000000..b3b3de12 --- /dev/null +++ b/libs/dyn/time.go @@ -0,0 +1,62 @@ +package dyn + +import ( + "fmt" + "time" +) + +// Time represents a time-like primitive value. +// +// It represents a timestamp and includes the original string value +// that was parsed to create the timestamp. This makes it possible +// to coalesce a value that YAML interprets as a timestamp back into +// a string without losing information. +type Time struct { + t time.Time + s string +} + +// NewTime creates a new Time from the given string. +func NewTime(str string) (Time, error) { + // Try a couple of layouts + for _, layout := range []string{ + "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. + "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". + "2006-1-2 15:4:5.999999999", // space separated with no time zone + "2006-1-2", // date only + } { + t, terr := time.Parse(layout, str) + if terr == nil { + return Time{t: t, s: str}, nil + } + } + + return Time{}, fmt.Errorf("invalid time value: %q", str) +} + +// MustTime creates a new Time from the given string. +// It panics if the string cannot be parsed. +func MustTime(str string) Time { + t, err := NewTime(str) + if err != nil { + panic(err) + } + return t +} + +// FromTime creates a new Time from the given time.Time. +// It uses the RFC3339Nano format for its string representation. +// This guarantees that it can roundtrip into a string without losing information. +func FromTime(t time.Time) Time { + return Time{t: t, s: t.Format(time.RFC3339Nano)} +} + +// Time returns the time.Time value. +func (t Time) Time() time.Time { + return t.t +} + +// String returns the original string value that was parsed to create the timestamp. +func (t Time) String() string { + return t.s +} diff --git a/libs/dyn/time_test.go b/libs/dyn/time_test.go new file mode 100644 index 00000000..653d6855 --- /dev/null +++ b/libs/dyn/time_test.go @@ -0,0 +1,41 @@ +package dyn_test + +import ( + "testing" + "time" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestTimeValid(t *testing.T) { + for _, tc := range []string{ + "2024-08-29", + "2024-01-15T12:34:56.789012345Z", + } { + tm, err := dyn.NewTime(tc) + if assert.NoError(t, err) { + assert.NotEqual(t, time.Time{}, tm.Time()) + assert.Equal(t, tc, tm.String()) + } + } +} + +func TestTimeInvalid(t *testing.T) { + tm, err := dyn.NewTime("invalid") + assert.Error(t, err) + assert.Equal(t, dyn.Time{}, tm) +} + +func TestTimeFromTime(t *testing.T) { + tref := time.Now() + t1 := dyn.FromTime(tref) + + // Verify that the underlying value is the same. + assert.Equal(t, tref, t1.Time()) + + // Verify that the string representation can be used to construct the same. + t2, err := dyn.NewTime(t1.String()) + assert.NoError(t, err) + assert.True(t, t1.Time().Equal(t2.Time())) +} diff --git a/libs/dyn/value.go b/libs/dyn/value.go index 2aed2f6c..81524db7 100644 --- a/libs/dyn/value.go +++ b/libs/dyn/value.go @@ -127,7 +127,8 @@ func (v Value) AsAny() any { case KindFloat: return v.v case KindTime: - return v.v + t := v.v.(Time) + return t.Time() default: // Panic because we only want to deal with known types. panic(fmt.Sprintf("invalid kind: %d", v.k)) diff --git a/libs/dyn/value_underlying.go b/libs/dyn/value_underlying.go index 2f0f26a1..0a867375 100644 --- a/libs/dyn/value_underlying.go +++ b/libs/dyn/value_underlying.go @@ -2,7 +2,6 @@ package dyn import ( "fmt" - "time" ) // AsMap returns the underlying mapping if this value is a map, @@ -123,14 +122,14 @@ func (v Value) MustFloat() float64 { // AsTime returns the underlying time if this value is a time, // the zero value and false otherwise. -func (v Value) AsTime() (time.Time, bool) { - vv, ok := v.v.(time.Time) +func (v Value) AsTime() (Time, bool) { + vv, ok := v.v.(Time) return vv, ok } // MustTime returns the underlying time if this value is a time, // panics otherwise. -func (v Value) MustTime() time.Time { +func (v Value) MustTime() Time { vv, ok := v.AsTime() if !ok || v.k != KindTime { panic(fmt.Sprintf("expected kind %s, got %s", KindTime, v.k)) diff --git a/libs/dyn/value_underlying_test.go b/libs/dyn/value_underlying_test.go index e35cde58..73baeeeb 100644 --- a/libs/dyn/value_underlying_test.go +++ b/libs/dyn/value_underlying_test.go @@ -143,7 +143,7 @@ func TestValueUnderlyingFloat(t *testing.T) { } func TestValueUnderlyingTime(t *testing.T) { - v := dyn.V(time.Now()) + v := dyn.V(dyn.FromTime(time.Now())) vv1, ok := v.AsTime() assert.True(t, ok) diff --git a/libs/dyn/yamlloader/loader.go b/libs/dyn/yamlloader/loader.go index fbb52b50..c3e8d081 100644 --- a/libs/dyn/yamlloader/loader.go +++ b/libs/dyn/yamlloader/loader.go @@ -5,7 +5,6 @@ import ( "math" "strconv" "strings" - "time" "github.com/databricks/cli/libs/dyn" "gopkg.in/yaml.v3" @@ -207,17 +206,9 @@ func (d *loader) loadScalar(node *yaml.Node, loc dyn.Location) (dyn.Value, error case "!!null": return dyn.NewValue(nil, []dyn.Location{loc}), nil case "!!timestamp": - // Try a couple of layouts - for _, layout := range []string{ - "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. - "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". - "2006-1-2 15:4:5.999999999", // space separated with no time zone - "2006-1-2", // date only - } { - t, terr := time.Parse(layout, node.Value) - if terr == nil { - return dyn.NewValue(t, []dyn.Location{loc}), nil - } + t, err := dyn.NewTime(node.Value) + if err == nil { + return dyn.NewValue(t, []dyn.Location{loc}), nil } return dyn.InvalidValue, errorf(loc, "invalid timestamp value: %v", node.Value) default: diff --git a/libs/dyn/yamlsaver/saver.go b/libs/dyn/yamlsaver/saver.go index fe4cfb85..f4c7157f 100644 --- a/libs/dyn/yamlsaver/saver.go +++ b/libs/dyn/yamlsaver/saver.go @@ -129,7 +129,7 @@ func (s *saver) toYamlNodeWithStyle(v dyn.Value, style yaml.Style) (*yaml.Node, case dyn.KindFloat: return &yaml.Node{Kind: yaml.ScalarNode, Value: fmt.Sprint(v.MustFloat()), Style: style}, nil case dyn.KindTime: - return &yaml.Node{Kind: yaml.ScalarNode, Value: v.MustTime().UTC().String(), Style: style}, nil + return &yaml.Node{Kind: yaml.ScalarNode, Value: v.MustTime().String(), Style: style}, nil default: // Panic because we only want to deal with known types. panic(fmt.Sprintf("invalid kind: %d", v.Kind())) diff --git a/libs/dyn/yamlsaver/saver_test.go b/libs/dyn/yamlsaver/saver_test.go index 38709010..aa481c20 100644 --- a/libs/dyn/yamlsaver/saver_test.go +++ b/libs/dyn/yamlsaver/saver_test.go @@ -2,10 +2,10 @@ package yamlsaver import ( "testing" - "time" "github.com/databricks/cli/libs/dyn" assert "github.com/databricks/cli/libs/dyn/dynassert" + "github.com/stretchr/testify/require" "gopkg.in/yaml.v3" ) @@ -45,11 +45,14 @@ func TestMarshalBoolValue(t *testing.T) { } func TestMarshalTimeValue(t *testing.T) { + tm, err := dyn.NewTime("1970-01-01") + require.NoError(t, err) + s := NewSaver() - var timeValue = dyn.V(time.Unix(0, 0)) + var timeValue = dyn.V(tm) v, err := s.toYamlNode(timeValue) assert.NoError(t, err) - assert.Equal(t, "1970-01-01 00:00:00 +0000 UTC", v.Value) + assert.Equal(t, "1970-01-01", v.Value) assert.Equal(t, yaml.ScalarNode, v.Kind) } From 5fac7edcdf5f3e371920c2907a311cd1564bebf8 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 29 Aug 2024 16:41:12 +0200 Subject: [PATCH 21/37] Pass along $AZURE_CONFIG_FILE to Terraform process (#1734) ## Changes This ensures that the CLI and Terraform can both use an Azure CLI session configured under a non-standard path. This is the default behavior on Azure DevOps when using the AzureCLI@2 task. Fixes #1722. ## Tests Unit test. --- bundle/deploy/terraform/init.go | 7 +++++++ bundle/deploy/terraform/init_test.go | 19 ++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/bundle/deploy/terraform/init.go b/bundle/deploy/terraform/init.go index e7f720d0..7d75ee8a 100644 --- a/bundle/deploy/terraform/init.go +++ b/bundle/deploy/terraform/init.go @@ -111,6 +111,13 @@ func inheritEnvVars(ctx context.Context, environ map[string]string) error { environ["PATH"] = path } + // Include $AZURE_CONFIG_FILE in set of environment variables to pass along. + // This is set in Azure DevOps by the AzureCLI@2 task. + azureConfigFile, ok := env.Lookup(ctx, "AZURE_CONFIG_FILE") + if ok { + environ["AZURE_CONFIG_FILE"] = azureConfigFile + } + // Include $TF_CLI_CONFIG_FILE to override terraform provider in development. // See: https://developer.hashicorp.com/terraform/cli/config/config-file#explicit-installation-method-configuration devConfigFile, ok := env.Lookup(ctx, "TF_CLI_CONFIG_FILE") diff --git a/bundle/deploy/terraform/init_test.go b/bundle/deploy/terraform/init_test.go index 94e47dbc..450e7eb6 100644 --- a/bundle/deploy/terraform/init_test.go +++ b/bundle/deploy/terraform/init_test.go @@ -269,19 +269,20 @@ func TestSetUserAgentExtraEnvVar(t *testing.T) { } func TestInheritEnvVars(t *testing.T) { - env := map[string]string{} - t.Setenv("HOME", "/home/testuser") t.Setenv("PATH", "/foo:/bar") t.Setenv("TF_CLI_CONFIG_FILE", "/tmp/config.tfrc") + t.Setenv("AZURE_CONFIG_FILE", "/tmp/foo/bar") - err := inheritEnvVars(context.Background(), env) - - require.NoError(t, err) - - require.Equal(t, env["HOME"], "/home/testuser") - require.Equal(t, env["PATH"], "/foo:/bar") - require.Equal(t, env["TF_CLI_CONFIG_FILE"], "/tmp/config.tfrc") + ctx := context.Background() + env := map[string]string{} + err := inheritEnvVars(ctx, env) + if assert.NoError(t, err) { + assert.Equal(t, "/home/testuser", env["HOME"]) + assert.Equal(t, "/foo:/bar", env["PATH"]) + assert.Equal(t, "/tmp/config.tfrc", env["TF_CLI_CONFIG_FILE"]) + assert.Equal(t, "/tmp/foo/bar", env["AZURE_CONFIG_FILE"]) + } } func TestSetUserProfileFromInheritEnvVars(t *testing.T) { From 2e000f1ebd0f22933bb2fd64ee7a9f44cb53b5d8 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Thu, 29 Aug 2024 21:07:21 +0200 Subject: [PATCH 22/37] Use materialized views in the default-sql template (#1709) ## Changes Materialized views now support `CREATE OR REPLACE` ([docs](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-materialized-view.html))! This makes it possible to use them with Workflows in DABs.This PR updates the template to use a materialized view rather than a regular view. ## Tests Manually validated in production. --- .../template/{{.project_name}}/src/orders_daily.sql.tmpl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl index 8a9d12ea..e5ceb77a 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl @@ -1,10 +1,9 @@ -- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml) -{{- /* We can't use a materialized view here since they don't support 'create or refresh' yet.*/}} USE CATALOG {{"{{"}}catalog{{"}}"}}; USE IDENTIFIER({{"{{"}}schema{{"}}"}}); -CREATE OR REPLACE VIEW +CREATE OR REPLACE MATERIALIZED VIEW orders_daily AS SELECT order_date, count(*) AS number_of_orders From ed4a4585c07602e84a8ce97336688463d9833527 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Fri, 30 Aug 2024 09:32:10 +0200 Subject: [PATCH 23/37] Update templates to latest LTS DBR (#1715) ## Changes This updates the templates to use the latest DBR 15 LTS version. - [x] DB Connect 15.4 must be released + validated before this can go out --- libs/template/templates/dbt-sql/library/versions.tmpl | 4 ++-- libs/template/templates/default-python/library/versions.tmpl | 4 ++-- libs/template/templates/default-sql/library/versions.tmpl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/template/templates/dbt-sql/library/versions.tmpl b/libs/template/templates/dbt-sql/library/versions.tmpl index f9a879d2..7d0c88e7 100644 --- a/libs/template/templates/dbt-sql/library/versions.tmpl +++ b/libs/template/templates/dbt-sql/library/versions.tmpl @@ -1,7 +1,7 @@ {{define "latest_lts_dbr_version" -}} - 13.3.x-scala2.12 + 15.4.x-scala2.12 {{- end}} {{define "latest_lts_db_connect_version_spec" -}} - >=13.3,<13.4 + >=15.4,<15.5 {{- end}} diff --git a/libs/template/templates/default-python/library/versions.tmpl b/libs/template/templates/default-python/library/versions.tmpl index f9a879d2..7d0c88e7 100644 --- a/libs/template/templates/default-python/library/versions.tmpl +++ b/libs/template/templates/default-python/library/versions.tmpl @@ -1,7 +1,7 @@ {{define "latest_lts_dbr_version" -}} - 13.3.x-scala2.12 + 15.4.x-scala2.12 {{- end}} {{define "latest_lts_db_connect_version_spec" -}} - >=13.3,<13.4 + >=15.4,<15.5 {{- end}} diff --git a/libs/template/templates/default-sql/library/versions.tmpl b/libs/template/templates/default-sql/library/versions.tmpl index f9a879d2..7d0c88e7 100644 --- a/libs/template/templates/default-sql/library/versions.tmpl +++ b/libs/template/templates/default-sql/library/versions.tmpl @@ -1,7 +1,7 @@ {{define "latest_lts_dbr_version" -}} - 13.3.x-scala2.12 + 15.4.x-scala2.12 {{- end}} {{define "latest_lts_db_connect_version_spec" -}} - >=13.3,<13.4 + >=15.4,<15.5 {{- end}} From 70ce80251853ae6443ea280ba6b2bc4614c7267a Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Fri, 30 Aug 2024 15:29:00 +0200 Subject: [PATCH 24/37] PythonMutator: preserve normalize diagnostics (#1735) ## Changes Preserve diagnostics if there are any errors or warnings when PythonMutator normalizes output. If anything goes wrong during conversion, diagnostics contain the relevant location and path. ## Tests Unit tests --- .../config/mutator/python/python_mutator.go | 35 +++++++++++-------- .../mutator/python/python_mutator_test.go | 20 ++++++++++- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index 4f44df0a..bf30f070 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -205,10 +205,8 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) } - output, err := loadOutputFile(rootPath, outputPath) - if err != nil { - return dyn.InvalidValue, diag.Errorf("failed to load Python mutator output: %s", err) - } + output, outputDiags := loadOutputFile(rootPath, outputPath) + pythonDiagnostics = pythonDiagnostics.Extend(outputDiags) // we pass through pythonDiagnostic because it contains warnings return output, pythonDiagnostics @@ -225,10 +223,10 @@ func writeInputFile(inputPath string, input dyn.Value) error { return os.WriteFile(inputPath, rootConfigJson, 0600) } -func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) { +func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnostics) { outputFile, err := os.Open(outputPath) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to open output file: %w", err) + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err)) } defer outputFile.Close() @@ -243,27 +241,34 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) { // for that, we pass virtualPath instead of outputPath as file location virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_pydabs__.yml")) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to get absolute path: %w", err) + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err)) } generated, err := yamlloader.LoadYAML(virtualPath, outputFile) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to parse output file: %w", err) + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err)) } - normalized, diagnostic := convert.Normalize(config.Root{}, generated) - if diagnostic.Error() != nil { - return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %w", diagnostic.Error()) - } + return strictNormalize(config.Root{}, generated) +} + +func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) { + normalized, diags := convert.Normalize(dst, generated) // warnings shouldn't happen because output should be already normalized // when it happens, it's a bug in the mutator, and should be treated as an error - for _, d := range diagnostic.Filter(diag.Warning) { - return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %s", d.Summary) + strictDiags := diag.Diagnostics{} + + for _, d := range diags { + if d.Severity == diag.Warning { + d.Severity = diag.Error + } + + strictDiags = strictDiags.Append(d) } - return normalized, nil + return normalized, strictDiags } // loadDiagnosticsFile loads diagnostics from a file. diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index ea02d1ce..26350d88 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -10,6 +10,8 @@ import ( "runtime" "testing" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/dyn/merge" "github.com/databricks/cli/bundle/env" @@ -255,7 +257,7 @@ func TestPythonMutator_badOutput(t *testing.T) { mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) - assert.EqualError(t, diag.Error(), "failed to load Python mutator output: failed to normalize output: unknown field: unknown_property") + assert.EqualError(t, diag.Error(), "unknown field: unknown_property") } func TestPythonMutator_disabled(t *testing.T) { @@ -546,6 +548,22 @@ func TestInterpreterPath(t *testing.T) { } } +func TestStrictNormalize(t *testing.T) { + // NB: there is no way to trigger diag.Error, so we don't test it + + type TestStruct struct { + A int `json:"a"` + } + + value := dyn.NewValue(map[string]dyn.Value{"A": dyn.NewValue("abc", nil)}, nil) + + _, diags := convert.Normalize(TestStruct{}, value) + _, strictDiags := strictNormalize(TestStruct{}, value) + + assert.False(t, diags.HasError()) + assert.True(t, strictDiags.HasError()) +} + func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) From 5d9910c8e0f3119276b6ee4afd534ac82d4fb6c2 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 2 Sep 2024 14:09:08 +0530 Subject: [PATCH 25/37] Make lock optional in the JSON schema (#1738) Fixes https://github.com/databricks/cli/issues/1561 --- bundle/config/deployment.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/config/deployment.go b/bundle/config/deployment.go index 7f0f57a8..b7efb445 100644 --- a/bundle/config/deployment.go +++ b/bundle/config/deployment.go @@ -6,5 +6,5 @@ type Deployment struct { FailOnActiveRuns bool `json:"fail_on_active_runs,omitempty"` // Lock configures locking behavior on deployment. - Lock Lock `json:"lock"` + Lock Lock `json:"lock,omitempty"` } From 582558cac22017c2265b384c5380179975dc6f3d Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 2 Sep 2024 11:17:18 +0200 Subject: [PATCH 26/37] Do not suppress normalisation diagnostics for resolving variables (#1740) ## Changes Tested on the following bundle configuration ``` bundle: name: clusters mode: development variables: webhook_notifications: description: Webhook URL for notifications type: complex default: on_failure: id: 6a6c04c1-389c-4534-95af-b68b62a9dbe6 resources: jobs: test_job: name: "Andrew Nester Test Job" tasks: - task_key: test_task notebook_task: notebook_path: "./src/test.py" new_cluster: num_workers: 2 node_type_id: "i3.xlarge" autoscale: min_workers: 2 max_workers: 7 spark_version: "12.2.x-scala2.12" spark_conf: "spark.executor.memory": "2g" webhook_notifications: ${var.webhook_notifications} ``` bundle validate output is below ``` andrew.nester@HFW9Y94129 wheel % databricks bundle validate Warning: expected sequence, found map at resources.jobs.test_job.webhook_notifications.on_failure in bundle.yml:11:9 Name: clusters Target: default Workspace: User: andrew.nester@databricks.com Path: /Users/andrew.nester@databricks.com/.bundle/clusters/default ``` **Note** that error correctly points to the variable --- .../config/mutator/resolve_variable_references.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 61940be5..5e5b7610 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -10,7 +10,6 @@ import ( "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn/convert" "github.com/databricks/cli/libs/dyn/dynvar" - "github.com/databricks/cli/libs/log" ) type resolveVariableReferences struct { @@ -124,6 +123,7 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) // We rewrite it here to make the resolution logic simpler. varPath := dyn.NewPath(dyn.Key("var")) + var diags diag.Diagnostics err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { // Synthesize a copy of the root that has all fields that are present in the type // but not set in the dynamic value set to their corresponding empty value. @@ -180,14 +180,13 @@ func (m *resolveVariableReferences) Apply(ctx context.Context, b *bundle.Bundle) // Normalize the result because variable resolution may have been applied to non-string fields. // For example, a variable reference may have been resolved to a integer. - root, diags := convert.Normalize(b.Config, root) - for _, diag := range diags { - // This occurs when a variable's resolved value is incompatible with the field's type. - // Log a warning until we have a better way to surface these diagnostics to the user. - log.Warnf(ctx, "normalization diagnostic: %s", diag.Summary) - } + root, normaliseDiags := convert.Normalize(b.Config, root) + diags = diags.Extend(normaliseDiags) return root, nil }) - return diag.FromErr(err) + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + } + return diags } From ed448815b4e9881494d3ef561ec84ef694ea4b11 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Mon, 2 Sep 2024 11:49:30 +0200 Subject: [PATCH 27/37] PythonMutator: explain missing package error (#1736) ## Changes Explain the error when the `databricks-pydabs` package is not installed or the Python environment isn't correctly activated. Example output: ``` Error: python mutator process failed: ".venv/bin/python3 -m databricks.bundles.build --phase load --input .../input.json --output .../output.json --diagnostics .../diagnostics.json: exit status 1", use --debug to enable logging .../.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks') Explanation: 'databricks-pydabs' library is not installed in the Python environment. If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, and that the wheel is installed in the Python environment: $ .venv/bin/pip install -e . If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, or activate the environment before running CLI commands: experimental: pydabs: venv_path: .venv ``` ## Tests Unit tests --- .../config/mutator/python/python_mutator.go | 49 +++++++++++++++++-- .../mutator/python/python_mutator_test.go | 24 +++++++++ 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index bf30f070..fbf3b7e0 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -1,15 +1,21 @@ package python import ( + "bytes" "context" "encoding/json" "errors" "fmt" + "io" "os" "path/filepath" - "github.com/databricks/cli/libs/python" "github.com/databricks/databricks-sdk-go/logger" + "github.com/fatih/color" + + "strings" + + "github.com/databricks/cli/libs/python" "github.com/databricks/cli/bundle/env" @@ -169,7 +175,11 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) } - stderrWriter := newLogWriter(ctx, "stderr: ") + stderrBuf := bytes.Buffer{} + stderrWriter := io.MultiWriter( + newLogWriter(ctx, "stderr: "), + &stderrBuf, + ) stdoutWriter := newLogWriter(ctx, "stdout: ") _, processErr := process.Background( @@ -197,7 +207,13 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r // process can fail without reporting errors in diagnostics file or creating it, for instance, // venv doesn't have PyDABs library installed if processErr != nil { - return dyn.InvalidValue, diag.Errorf("python mutator process failed: %sw, use --debug to enable logging", processErr) + diagnostic := diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("python mutator process failed: %q, use --debug to enable logging", processErr), + Detail: explainProcessErr(stderrBuf.String()), + } + + return dyn.InvalidValue, diag.Diagnostics{diagnostic} } // or we can fail to read diagnostics file, that should always be created @@ -212,6 +228,33 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return output, pythonDiagnostics } +const installExplanation = `If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, +and that the wheel is installed in the Python environment: + + $ .venv/bin/pip install -e . + +If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, +or activate the environment before running CLI commands: + + experimental: + pydabs: + venv_path: .venv +` + +// explainProcessErr provides additional explanation for common errors. +// It's meant to be the best effort, and not all errors are covered. +// Output should be used only used for error reporting. +func explainProcessErr(stderr string) string { + // implemented in cpython/Lib/runpy.py and portable across Python 3.x, including pypy + if strings.Contains(stderr, "Error while finding module specification for 'databricks.bundles.build'") { + summary := color.CyanString("Explanation: ") + "'databricks-pydabs' library is not installed in the Python environment.\n" + + return stderr + "\n" + summary + "\n" + installExplanation + } + + return stderr +} + func writeInputFile(inputPath string, input dyn.Value) error { // we need to marshal dyn.Value instead of bundle.Config to JSON to support // non-string fields assigned with bundle variables diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index 26350d88..bf12b249 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -564,6 +564,30 @@ func TestStrictNormalize(t *testing.T) { assert.True(t, strictDiags.HasError()) } +func TestExplainProcessErr(t *testing.T) { + stderr := "/home/test/.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks')\n" + expected := `/home/test/.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks') + +Explanation: 'databricks-pydabs' library is not installed in the Python environment. + +If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, +and that the wheel is installed in the Python environment: + + $ .venv/bin/pip install -e . + +If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, +or activate the environment before running CLI commands: + + experimental: + pydabs: + venv_path: .venv +` + + out := explainProcessErr(stderr) + + assert.Equal(t, expected, out) +} + func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) From 0ce7be8ff07dcf1f57d41b620b209f73e20df118 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Sep 2024 14:39:16 +0200 Subject: [PATCH 28/37] Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 (#1741) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/Masterminds/semver/v3](https://github.com/Masterminds/semver) from 3.2.1 to 3.3.0.
Release notes

Sourced from github.com/Masterminds/semver/v3's releases.

v3.3.0

What's Changed

New Contributors

Full Changelog: https://github.com/Masterminds/semver/compare/v3.2.1...v3.3.0

Changelog

Sourced from github.com/Masterminds/semver/v3's changelog.

3.3.0 (2024-08-27)

Added

Changed

  • #241: Simplify StrictNewVersion parsing (thanks @​grosser)
  • Testing support up through Go 1.23
  • Minimum version set to 1.21 as this is what's tested now
  • Fuzz testing now supports caching
Commits
  • e6e3d4d Merge pull request #249 from mattfarina/update-changelog-3.3.0
  • e80c4ea Updating changelog for 3.3.0
  • 80427ad Merge pull request #248 from mattfarina/bump-min-version
  • b610837 bumping min version in go.mod based on what's tested
  • a4cccd8 Merge pull request #246 from mattfarina/bump-go-1.23
  • 7c178cf Updating the testing version of Go used
  • 29f94c1 Merge pull request #241 from grosser/grosser/validate
  • 2cf1b16 Merge pull request #245 from mattfarina/remove-vert
  • b55476a Removing reference to vert
  • d07450b simplify StrictNewVersion
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/Masterminds/semver/v3&package-manager=go_modules&previous-version=3.2.1&new-version=3.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 4aa27992..9777106c 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/databricks/cli go 1.22 require ( - github.com/Masterminds/semver/v3 v3.2.1 // MIT + github.com/Masterminds/semver/v3 v3.3.0 // MIT github.com/briandowns/spinner v1.23.1 // Apache 2.0 github.com/databricks/databricks-sdk-go v0.45.0 // Apache 2.0 github.com/fatih/color v1.17.0 // MIT diff --git a/go.sum b/go.sum index 2e58948a..b232e8e4 100644 --- a/go.sum +++ b/go.sum @@ -8,8 +8,8 @@ cloud.google.com/go/compute/metadata v0.3.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1h dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0= -github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= +github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0= +github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= github.com/ProtonMail/go-crypto v1.1.0-alpha.2 h1:bkyFVUP+ROOARdgCiJzNQo2V2kiB97LyUpzH9P6Hrlg= From 096123674afb6c593f2c242c3210fa5735ed98cb Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 2 Sep 2024 19:13:17 +0530 Subject: [PATCH 29/37] Fix streaming of stdout, stdin, stderr in cobra test runner (#1742) ## Changes We were not using the readers and writers set in the test fixtures in the progress logger. This PR fixes that. It also modifies `TestAccAbortBind`, which was implicitly relying on the bug. I encountered this bug while working on https://github.com/databricks/cli/pull/1672. ## Tests Manually. From non-tty: ``` Error: failed to bind the resource, err: This bind operation requires user confirmation, but the current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed. ``` From tty, bind works as expected. ``` Confirm import changes? Changes will be remotely applied only after running 'bundle deploy'. [y/n]: y Updating deployment state... Successfully bound databricks_pipeline with an id '9d2dedbb-f522-4503-96ba-4bc4d5bfa77d'. Run 'bundle deploy' to deploy changes to your workspace ``` --- bundle/deploy/terraform/import.go | 5 +++++ cmd/root/progress_logger.go | 6 ++++++ internal/bundle/bind_resource_test.go | 8 ++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/bundle/deploy/terraform/import.go b/bundle/deploy/terraform/import.go index 7c1a6815..dfe60a58 100644 --- a/bundle/deploy/terraform/import.go +++ b/bundle/deploy/terraform/import.go @@ -69,6 +69,11 @@ func (m *importResource) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn // Remove output starting from Warning until end of output output = output[:bytes.Index([]byte(output), []byte("Warning:"))] cmdio.LogString(ctx, output) + + if !cmdio.IsPromptSupported(ctx) { + return diag.Errorf("This bind operation requires user confirmation, but the current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed.") + } + ans, err := cmdio.AskYesOrNo(ctx, "Confirm import changes? Changes will be remotely applied only after running 'bundle deploy'.") if err != nil { return diag.FromErr(err) diff --git a/cmd/root/progress_logger.go b/cmd/root/progress_logger.go index c05ecb04..7d6a1fa4 100644 --- a/cmd/root/progress_logger.go +++ b/cmd/root/progress_logger.go @@ -29,6 +29,12 @@ func (f *progressLoggerFlag) resolveModeDefault(format flags.ProgressLogFormat) } func (f *progressLoggerFlag) initializeContext(ctx context.Context) (context.Context, error) { + // No need to initialize the logger if it's already set in the context. This + // happens in unit tests where the logger is setup as a fixture. + if _, ok := cmdio.FromContext(ctx); ok { + return ctx, nil + } + if f.log.level.String() != "disabled" && f.log.file.String() == "stderr" && f.ProgressLogFormat == flags.ModeInplace { return nil, fmt.Errorf("inplace progress logging cannot be used when log-file is stderr") diff --git a/internal/bundle/bind_resource_test.go b/internal/bundle/bind_resource_test.go index d44ad2c3..2449c31f 100644 --- a/internal/bundle/bind_resource_test.go +++ b/internal/bundle/bind_resource_test.go @@ -11,6 +11,7 @@ import ( "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/google/uuid" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -101,12 +102,15 @@ func TestAccAbortBind(t *testing.T) { destroyBundle(t, ctx, bundleRoot) }) + // Bind should fail because prompting is not possible. t.Setenv("BUNDLE_ROOT", bundleRoot) + t.Setenv("TERM", "dumb") c := internal.NewCobraTestRunner(t, "bundle", "deployment", "bind", "foo", fmt.Sprint(jobId)) - // Simulate user aborting the bind. This is done by not providing any input to the prompt in non-interactive mode. + // Expect error suggesting to use --auto-approve _, _, err = c.Run() - require.ErrorContains(t, err, "failed to bind the resource") + assert.ErrorContains(t, err, "failed to bind the resource") + assert.ErrorContains(t, err, "This bind operation requires user confirmation, but the current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") err = deployBundle(t, ctx, bundleRoot) require.NoError(t, err) From 072fa812e28c32d374afdb5409e7320f1c5894c7 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Tue, 3 Sep 2024 09:51:54 +0200 Subject: [PATCH 30/37] Include a permissions section in all templates (#1713) ## Changes This updates the templates to include a `permissions` section. Having a permissions section is a best practice, is helpful to understand the notion of permissions, and helps diagnose permission errors (https://github.com/databricks/cli/pull/1386). This is a cherry-pick from https://github.com/databricks/cli/pull/1387. This change was verified to work both in dev and prod. Existing unit tests validate the validity of the templates in these modes. --- .../{{.project_name}}/databricks.yml.tmpl | 17 ++++---- .../{{.project_name}}/databricks.yml.tmpl | 36 ++++------------- .../{{.project_name}}/databricks.yml.tmpl | 39 ++++++------------- 3 files changed, 28 insertions(+), 64 deletions(-) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index fdda03c0..f96ce4fe 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -12,8 +12,10 @@ include: targets: dev: default: true - # We use 'mode: development' to indicate this is a personal development copy. - # Any job schedules and triggers are paused by default. + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development workspace: host: {{workspace_host}} @@ -22,11 +24,10 @@ targets: mode: production workspace: host: {{workspace_host}} - # We always use /Users/{{user_name}} for all resources to make sure we only have a single copy. + # We explicitly specify /Users/{{user_name}} to make sure we only have a single copy. root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} - {{- if not is_service_principal}} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE run_as: - # This runs as {{user_name}} in production. We could also use a service principal here - # using service_principal_name (see the Databricks documentation). - user_name: {{user_name}} - {{- end}} + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl index e3572326..8544dc83 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -7,44 +7,24 @@ include: - resources/*.yml targets: - # The 'dev' target, for development purposes. This target is the default. dev: - # We use 'mode: development' to indicate this is a personal development copy: + # The default target uses 'mode: development' to create a development copy. # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default - # - The 'development' mode is used for Delta Live Tables pipelines + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development default: true workspace: host: {{workspace_host}} - ## Optionally, there could be a 'staging' target here. - ## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/ci-cd.html.) - # - # staging: - # workspace: - # host: {{workspace_host}} - - # The 'prod' target, used for production deployment. prod: - # We use 'mode: production' to indicate this is a production deployment. - # Doing so enables strict verification of the settings below. mode: production workspace: host: {{workspace_host}} - # We always use /Users/{{user_name}} for all resources to make sure we only have a single copy. - {{- /* - Internal note 2023-12: CLI versions v0.211.0 and before would show an error when using `mode: production` - with a path that doesn't say "/Shared". For now, we'll include an extra comment in the template - to explain that customers should update if they see this. - */}} - # If this path results in an error, please make sure you have a recent version of the CLI installed. + # We explicitly specify /Users/{{user_name}} to make sure we only have a single copy. root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE run_as: - {{- if is_service_principal}} - service_principal_name: {{user_name}} - {{- else}} - # This runs as {{user_name}} in production. We could also use a service principal here, - # see https://docs.databricks.com/dev-tools/bundles/permissions.html. - user_name: {{user_name}} - {{- end}} + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl index a47fb7c1..55c1aae4 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -18,16 +18,16 @@ variables: {{- $dev_schema := .shared_schema }} {{- $prod_schema := .shared_schema }} {{- if (regexp "^yes").MatchString .personal_schemas}} -{{- $dev_schema = "${workspace.current_user.short_name}"}} -{{- $prod_schema = "default"}} + {{- $dev_schema = "${workspace.current_user.short_name}"}} + {{- $prod_schema = "default"}} {{- end}} -# Deployment targets. targets: - # The 'dev' target, for development purposes. This target is the default. dev: - # We use 'mode: development' to indicate this is a personal development copy. - # Any job schedules and triggers are paused by default + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development default: true workspace: @@ -37,35 +37,18 @@ targets: catalog: {{.default_catalog}} schema: {{$dev_schema}} - ## Optionally, there could be a 'staging' target here. - ## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/ci-cd.html.) - # - # staging: - # workspace: - # host: {{workspace_host}} - - # The 'prod' target, used for production deployment. prod: - # We use 'mode: production' to indicate this is a production deployment. - # Doing so enables strict verification of the settings below. mode: production workspace: host: {{workspace_host}} - # We always use /Users/{{user_name}} for all resources to make sure we only have a single copy. - {{- /* - Internal note 2023-12: CLI versions v0.211.0 and before would show an error when using `mode: production` - with a path that doesn't say "/Shared". For now, we'll include an extra comment in the template - to explain that customers should update if they see this. - */}} - # If this path results in an error, please make sure you have a recent version of the CLI installed. + # We explicitly specify /Users/{{user_name}} to make sure we only have a single copy. root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} catalog: {{.default_catalog}} schema: {{$prod_schema}} - {{- if not is_service_principal}} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE run_as: - # This runs as {{user_name}} in production. We could also use a service principal here - # using service_principal_name (see https://docs.databricks.com/en/dev-tools/bundles/permissions.html). - user_name: {{user_name}} - {{end -}} + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} From 35cdf4010dad156902d8d8b20b5fc0fe01de6acc Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 4 Sep 2024 12:44:21 +0530 Subject: [PATCH 31/37] Do not error if we cannot prompt for a profile in `auth login` (#1745) ## Changes With https://github.com/databricks/cli/pull/1370 we started to error if a profile name was not provided in a non-tty setting. The Databricks VSCode extension, however, uses the `auth login` command to simply refresh the tokens. Thus, this PR is a regression fix for that use case. ## Tests Manually, `databricks auth login --host https://e2-dogfood.staging.cloud.databricks.com` no longer errors. Instead it successfully refreshes the credentials. --- cmd/auth/login.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/auth/login.go b/cmd/auth/login.go index f87a2a02..79b79546 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -19,7 +19,7 @@ import ( func promptForProfile(ctx context.Context, defaultValue string) (string, error) { if !cmdio.IsInTTY(ctx) { - return "", fmt.Errorf("the command is being run in a non-interactive environment, please specify a profile using --profile") + return "", nil } prompt := cmdio.Prompt(ctx) From a27c24a397f91048677c38e48ee31213f278f640 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 4 Sep 2024 16:41:47 +0530 Subject: [PATCH 32/37] Add prompt when a pipeline recreation happens (#1672) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Changes DLT pipeline recreations are destructive. They can lead to lost history of previous updates, outage of the tables temporarily and are potentially computationally expensive. Thus we make a breaking change where a prompt is shown to the user if there configuration changes will lead to a DLT recreation. Users can skip the prompt by specifying the `--auto-approve` flag. This PR also fixes an issue with our test runner where logs from the cmdio.Logger would not get propagated to the reader returned by our cobra test runner. ## Tests Manually, and new unit and integration tests. ``` ➜ bundle-playground-3 cli bundle deploy Uploading bundle files to /Users/63ec021d-b0c6-49c0-93a0-5123953a1cb2/.bundle/test/development/files... The following DLT pipelines will be recreated. Underlying tables will be unavailable for a transient period until the newly recreated pipelines are run once successfully. History of previous pipeline update runs will be lost because of recreation: recreate pipeline foo Would you like to proceed? [y/n]: n Deployment cancelled! ``` --- bundle/phases/deploy.go | 96 +++++++++++++------ bundle/phases/deploy_test.go | 67 +++++++++++++ .../databricks_template_schema.json | 8 ++ .../template/databricks.yml.tmpl | 25 +++++ .../bundles/recreate_pipeline/template/nb.sql | 2 + internal/bundle/deploy_test.go | 91 +++++++++++++++++- 6 files changed, 260 insertions(+), 29 deletions(-) create mode 100644 bundle/phases/deploy_test.go create mode 100644 internal/bundle/bundles/recreate_pipeline/databricks_template_schema.json create mode 100644 internal/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl create mode 100644 internal/bundle/bundles/recreate_pipeline/template/nb.sql diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index ca967c32..49544227 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -19,9 +19,38 @@ import ( "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/libs/cmdio" terraformlib "github.com/databricks/cli/libs/terraform" + tfjson "github.com/hashicorp/terraform-json" ) -func approvalForUcSchemaDelete(ctx context.Context, b *bundle.Bundle) (bool, error) { +func parseTerraformActions(changes []*tfjson.ResourceChange, toInclude func(typ string, actions tfjson.Actions) bool) []terraformlib.Action { + res := make([]terraformlib.Action, 0) + for _, rc := range changes { + if !toInclude(rc.Type, rc.Change.Actions) { + continue + } + + var actionType terraformlib.ActionType + switch { + case rc.Change.Actions.Delete(): + actionType = terraformlib.ActionTypeDelete + case rc.Change.Actions.Replace(): + actionType = terraformlib.ActionTypeRecreate + default: + // No use case for other action types yet. + continue + } + + res = append(res, terraformlib.Action{ + Action: actionType, + ResourceType: rc.Type, + ResourceName: rc.Name, + }) + } + + return res +} + +func approvalForDeploy(ctx context.Context, b *bundle.Bundle) (bool, error) { tf := b.Terraform if tf == nil { return false, fmt.Errorf("terraform not initialized") @@ -33,41 +62,52 @@ func approvalForUcSchemaDelete(ctx context.Context, b *bundle.Bundle) (bool, err return false, err } - actions := make([]terraformlib.Action, 0) - for _, rc := range plan.ResourceChanges { - // We only care about destructive actions on UC schema resources. - if rc.Type != "databricks_schema" { - continue + schemaActions := parseTerraformActions(plan.ResourceChanges, func(typ string, actions tfjson.Actions) bool { + // Filter in only UC schema resources. + if typ != "databricks_schema" { + return false } - var actionType terraformlib.ActionType + // We only display prompts for destructive actions like deleting or + // recreating a schema. + return actions.Delete() || actions.Replace() + }) - switch { - case rc.Change.Actions.Delete(): - actionType = terraformlib.ActionTypeDelete - case rc.Change.Actions.Replace(): - actionType = terraformlib.ActionTypeRecreate - default: - // We don't need a prompt for non-destructive actions like creating - // or updating a schema. - continue + dltActions := parseTerraformActions(plan.ResourceChanges, func(typ string, actions tfjson.Actions) bool { + // Filter in only DLT pipeline resources. + if typ != "databricks_pipeline" { + return false } - actions = append(actions, terraformlib.Action{ - Action: actionType, - ResourceType: rc.Type, - ResourceName: rc.Name, - }) - } + // Recreating DLT pipeline leads to metadata loss and for a transient period + // the underling tables will be unavailable. + return actions.Replace() || actions.Delete() + }) - // No restricted actions planned. No need for approval. - if len(actions) == 0 { + // We don't need to display any prompts in this case. + if len(dltActions) == 0 && len(schemaActions) == 0 { return true, nil } - cmdio.LogString(ctx, "The following UC schemas will be deleted or recreated. Any underlying data may be lost:") - for _, action := range actions { - cmdio.Log(ctx, action) + // One or more UC schema resources will be deleted or recreated. + if len(schemaActions) != 0 { + cmdio.LogString(ctx, "The following UC schemas will be deleted or recreated. Any underlying data may be lost:") + for _, action := range schemaActions { + cmdio.Log(ctx, action) + } + } + + // One or more DLT pipelines is being recreated. + if len(dltActions) != 0 { + msg := ` +This action will result in the deletion or recreation of the following DLT Pipelines along with the +Streaming Tables (STs) and Materialized Views (MVs) managed by them. Recreating the Pipelines will +restore the defined STs and MVs through full refresh. Note that recreation is necessary when pipeline +properties such as the 'catalog' or 'storage' are changed:` + cmdio.LogString(ctx, msg) + for _, action := range dltActions { + cmdio.Log(ctx, action) + } } if b.AutoApprove { @@ -126,7 +166,7 @@ func Deploy() bundle.Mutator { terraform.CheckRunningResource(), terraform.Plan(terraform.PlanGoal("deploy")), bundle.If( - approvalForUcSchemaDelete, + approvalForDeploy, deployCore, bundle.LogString("Deployment cancelled!"), ), diff --git a/bundle/phases/deploy_test.go b/bundle/phases/deploy_test.go new file mode 100644 index 00000000..e00370b3 --- /dev/null +++ b/bundle/phases/deploy_test.go @@ -0,0 +1,67 @@ +package phases + +import ( + "testing" + + terraformlib "github.com/databricks/cli/libs/terraform" + tfjson "github.com/hashicorp/terraform-json" + "github.com/stretchr/testify/assert" +) + +func TestParseTerraformActions(t *testing.T) { + changes := []*tfjson.ResourceChange{ + { + Type: "databricks_pipeline", + Change: &tfjson.Change{ + Actions: tfjson.Actions{tfjson.ActionCreate}, + }, + Name: "create pipeline", + }, + { + Type: "databricks_pipeline", + Change: &tfjson.Change{ + Actions: tfjson.Actions{tfjson.ActionDelete}, + }, + Name: "delete pipeline", + }, + { + Type: "databricks_pipeline", + Change: &tfjson.Change{ + Actions: tfjson.Actions{tfjson.ActionDelete, tfjson.ActionCreate}, + }, + Name: "recreate pipeline", + }, + { + Type: "databricks_whatever", + Change: &tfjson.Change{ + Actions: tfjson.Actions{tfjson.ActionDelete, tfjson.ActionCreate}, + }, + Name: "recreate whatever", + }, + } + + res := parseTerraformActions(changes, func(typ string, actions tfjson.Actions) bool { + if typ != "databricks_pipeline" { + return false + } + + if actions.Delete() || actions.Replace() { + return true + } + + return false + }) + + assert.Equal(t, []terraformlib.Action{ + { + Action: terraformlib.ActionTypeDelete, + ResourceType: "databricks_pipeline", + ResourceName: "delete pipeline", + }, + { + Action: terraformlib.ActionTypeRecreate, + ResourceType: "databricks_pipeline", + ResourceName: "recreate pipeline", + }, + }, res) +} diff --git a/internal/bundle/bundles/recreate_pipeline/databricks_template_schema.json b/internal/bundle/bundles/recreate_pipeline/databricks_template_schema.json new file mode 100644 index 00000000..762f4470 --- /dev/null +++ b/internal/bundle/bundles/recreate_pipeline/databricks_template_schema.json @@ -0,0 +1,8 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for the schema and pipeline names" + } + } +} diff --git a/internal/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl b/internal/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl new file mode 100644 index 00000000..10350f13 --- /dev/null +++ b/internal/bundle/bundles/recreate_pipeline/template/databricks.yml.tmpl @@ -0,0 +1,25 @@ +bundle: + name: "bundle-playground" + +variables: + catalog: + description: The catalog the DLT pipeline should use. + default: main + + +resources: + pipelines: + foo: + name: test-pipeline-{{.unique_id}} + libraries: + - notebook: + path: ./nb.sql + development: true + catalog: ${var.catalog} + +include: + - "*.yml" + +targets: + development: + default: true diff --git a/internal/bundle/bundles/recreate_pipeline/template/nb.sql b/internal/bundle/bundles/recreate_pipeline/template/nb.sql new file mode 100644 index 00000000..199ff507 --- /dev/null +++ b/internal/bundle/bundles/recreate_pipeline/template/nb.sql @@ -0,0 +1,2 @@ +-- Databricks notebook source +select 1 diff --git a/internal/bundle/deploy_test.go b/internal/bundle/deploy_test.go index 269b7c80..736c880d 100644 --- a/internal/bundle/deploy_test.go +++ b/internal/bundle/deploy_test.go @@ -120,8 +120,97 @@ func TestAccBundleDeployUcSchemaFailsWithoutAutoApprove(t *testing.T) { t.Setenv("BUNDLE_ROOT", bundleRoot) t.Setenv("TERM", "dumb") c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock") - stdout, _, err := c.Run() + stdout, stderr, err := c.Run() + assert.EqualError(t, err, root.ErrAlreadyPrinted.Error()) + assert.Contains(t, stderr.String(), "The following UC schemas will be deleted or recreated. Any underlying data may be lost:\n delete schema bar") + assert.Contains(t, stdout.String(), "the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") +} + +func TestAccBundlePipelineDeleteWithoutAutoApprove(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + w := wt.W + + nodeTypeId := internal.GetNodeTypeId(env.Get(ctx, "CLOUD_ENV")) + uniqueId := uuid.New().String() + bundleRoot, err := initTestTemplate(t, ctx, "deploy_then_remove_resources", map[string]any{ + "unique_id": uniqueId, + "node_type_id": nodeTypeId, + "spark_version": defaultSparkVersion, + }) + require.NoError(t, err) + + // deploy pipeline + err = deployBundle(t, ctx, bundleRoot) + require.NoError(t, err) + + // assert pipeline is created + pipelineName := "test-bundle-pipeline-" + uniqueId + pipeline, err := w.Pipelines.GetByName(ctx, pipelineName) + require.NoError(t, err) + assert.Equal(t, pipeline.Name, pipelineName) + + // assert job is created + jobName := "test-bundle-job-" + uniqueId + job, err := w.Jobs.GetBySettingsName(ctx, jobName) + require.NoError(t, err) + assert.Equal(t, job.Settings.Name, jobName) + + // delete resources.yml + err = os.Remove(filepath.Join(bundleRoot, "resources.yml")) + require.NoError(t, err) + + // Redeploy the bundle. Expect it to fail because deleting the pipeline requires --auto-approve. + t.Setenv("BUNDLE_ROOT", bundleRoot) + t.Setenv("TERM", "dumb") + c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock") + stdout, stderr, err := c.Run() + + assert.EqualError(t, err, root.ErrAlreadyPrinted.Error()) + assert.Contains(t, stderr.String(), `This action will result in the deletion or recreation of the following DLT Pipelines along with the +Streaming Tables (STs) and Materialized Views (MVs) managed by them. Recreating the Pipelines will +restore the defined STs and MVs through full refresh. Note that recreation is necessary when pipeline +properties such as the 'catalog' or 'storage' are changed: + delete pipeline bar`) + assert.Contains(t, stdout.String(), "the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") + +} + +func TestAccBundlePipelineRecreateWithoutAutoApprove(t *testing.T) { + ctx, wt := acc.UcWorkspaceTest(t) + w := wt.W + uniqueId := uuid.New().String() + + bundleRoot, err := initTestTemplate(t, ctx, "recreate_pipeline", map[string]any{ + "unique_id": uniqueId, + }) + require.NoError(t, err) + + err = deployBundle(t, ctx, bundleRoot) + require.NoError(t, err) + + t.Cleanup(func() { + destroyBundle(t, ctx, bundleRoot) + }) + + // Assert the pipeline is created + pipelineName := "test-pipeline-" + uniqueId + pipeline, err := w.Pipelines.GetByName(ctx, pipelineName) + require.NoError(t, err) + require.Equal(t, pipelineName, pipeline.Name) + + // Redeploy the bundle, pointing the DLT pipeline to a different UC catalog. + t.Setenv("BUNDLE_ROOT", bundleRoot) + t.Setenv("TERM", "dumb") + c := internal.NewCobraTestRunnerWithContext(t, ctx, "bundle", "deploy", "--force-lock", "--var=\"catalog=whatever\"") + stdout, stderr, err := c.Run() + + assert.EqualError(t, err, root.ErrAlreadyPrinted.Error()) + assert.Contains(t, stderr.String(), `This action will result in the deletion or recreation of the following DLT Pipelines along with the +Streaming Tables (STs) and Materialized Views (MVs) managed by them. Recreating the Pipelines will +restore the defined STs and MVs through full refresh. Note that recreation is necessary when pipeline +properties such as the 'catalog' or 'storage' are changed: + recreate pipeline foo`) assert.Contains(t, stdout.String(), "the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") } From ca6332a5a4325aff1be848536f45d13bd74d93b3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 4 Sep 2024 13:24:55 +0200 Subject: [PATCH 33/37] Fixed complex variables are not being correctly merged from include files (#1746) ## Changes Fixes an `Error: no value assigned to required variable .` when the main complex variable definition is defined in one file but target override is defined in separate file which is included in the main one. ## Tests Added regression test --- bundle/config/root.go | 12 +++++++++++- bundle/tests/complex_variables_test.go | 19 +++++++++++++++++++ .../complex_multiple_files/databricks.yml | 17 +++++++++++++++++ .../variables/clusters.yml | 11 +++++++++++ 4 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 bundle/tests/variables/complex_multiple_files/databricks.yml create mode 100644 bundle/tests/variables/complex_multiple_files/variables/clusters.yml diff --git a/bundle/config/root.go b/bundle/config/root.go index 86dc3392..281c5c2a 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -433,10 +433,20 @@ func rewriteShorthands(v dyn.Value) (dyn.Value, error) { }, variable.Locations()), nil case dyn.KindMap, dyn.KindSequence: + lookup, err := dyn.Get(variable, "lookup") + // If lookup is set, we don't want to rewrite the variable and return it as is. + if err == nil && lookup.Kind() != dyn.KindInvalid { + return variable, nil + } + // Check if the original definition of variable has a type field. + // Type might not be found if the variable overriden in a separate file + // and configuration is not merged yet. typeV, err := dyn.GetByPath(v, p.Append(dyn.Key("type"))) if err != nil { - return variable, nil + return dyn.NewValue(map[string]dyn.Value{ + "default": variable, + }, variable.Locations()), nil } if typeV.MustString() == "complex" { diff --git a/bundle/tests/complex_variables_test.go b/bundle/tests/complex_variables_test.go index 1badea6d..d46d8d8c 100644 --- a/bundle/tests/complex_variables_test.go +++ b/bundle/tests/complex_variables_test.go @@ -68,3 +68,22 @@ func TestComplexVariablesOverride(t *testing.T) { require.Equal(t, "", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.random"]) require.Equal(t, "", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.PolicyId) } + +func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) { + b, diags := loadTargetWithDiags("variables/complex_multiple_files", "dev") + require.Empty(t, diags) + + diags = bundle.Apply(context.Background(), b, bundle.Seq( + mutator.SetVariables(), + mutator.ResolveVariableReferencesInComplexVariables(), + mutator.ResolveVariableReferences( + "variables", + ), + )) + require.NoError(t, diags.Error()) + + require.Equal(t, "14.2.x-scala2.11", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkVersion) + require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NodeTypeId) + require.Equal(t, 4, b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NumWorkers) + require.Equal(t, "false", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.speculation"]) +} diff --git a/bundle/tests/variables/complex_multiple_files/databricks.yml b/bundle/tests/variables/complex_multiple_files/databricks.yml new file mode 100644 index 00000000..cb5d2439 --- /dev/null +++ b/bundle/tests/variables/complex_multiple_files/databricks.yml @@ -0,0 +1,17 @@ +bundle: + name: complex-variables-multiple-files + +resources: + jobs: + my_job: + job_clusters: + - job_cluster_key: key + new_cluster: ${var.cluster} + +variables: + cluster: + type: complex + description: "A cluster definition" + +include: + - ./variables/*.yml diff --git a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml new file mode 100644 index 00000000..badd4516 --- /dev/null +++ b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml @@ -0,0 +1,11 @@ +targets: + default: + dev: + variables: + cluster: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false From 72030844c52f9499b10e7158544a80fde56451f3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 4 Sep 2024 19:16:40 +0200 Subject: [PATCH 34/37] Fixed variable override in target with full variable syntax (#1749) ## Changes This PR makes sure that both of this override syntax for variables work correctly ``` targets: dev: variables: cluster1: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false cluster2: default: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false ``` ## Tests Added regression test --------- Co-authored-by: Pieter Noordhuis --- bundle/config/root.go | 43 ++++++++++++++----- bundle/tests/complex_variables_test.go | 11 ++--- .../complex_multiple_files/databricks.yml | 43 +++++++++++++++++-- .../variables/clusters.yml | 10 ++++- 4 files changed, 86 insertions(+), 21 deletions(-) diff --git a/bundle/config/root.go b/bundle/config/root.go index 281c5c2a..46578769 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -406,6 +406,30 @@ func (r *Root) MergeTargetOverrides(name string) error { return r.updateWithDynamicValue(root) } +var variableKeywords = []string{"default", "lookup"} + +// isFullVariableOverrideDef checks if the given value is a full syntax varaible override. +// A full syntax variable override is a map with only one of the following +// keys: "default", "lookup". +func isFullVariableOverrideDef(v dyn.Value) bool { + mv, ok := v.AsMap() + if !ok { + return false + } + + if mv.Len() != 1 { + return false + } + + for _, keyword := range variableKeywords { + if _, ok := mv.GetByString(keyword); ok { + return true + } + } + + return false +} + // rewriteShorthands performs lightweight rewriting of the configuration // tree where we allow users to write a shorthand and must rewrite to the full form. func rewriteShorthands(v dyn.Value) (dyn.Value, error) { @@ -433,30 +457,27 @@ func rewriteShorthands(v dyn.Value) (dyn.Value, error) { }, variable.Locations()), nil case dyn.KindMap, dyn.KindSequence: - lookup, err := dyn.Get(variable, "lookup") - // If lookup is set, we don't want to rewrite the variable and return it as is. - if err == nil && lookup.Kind() != dyn.KindInvalid { + // If it's a full variable definition, leave it as is. + if isFullVariableOverrideDef(variable) { return variable, nil } // Check if the original definition of variable has a type field. + // If it has a type field, it means the shorthand is a value of a complex type. // Type might not be found if the variable overriden in a separate file // and configuration is not merged yet. typeV, err := dyn.GetByPath(v, p.Append(dyn.Key("type"))) - if err != nil { - return dyn.NewValue(map[string]dyn.Value{ - "default": variable, - }, variable.Locations()), nil - } - - if typeV.MustString() == "complex" { + if err == nil && typeV.MustString() == "complex" { return dyn.NewValue(map[string]dyn.Value{ "type": typeV, "default": variable, }, variable.Locations()), nil } - return variable, nil + // If it's a shorthand, rewrite it to a full variable definition. + return dyn.NewValue(map[string]dyn.Value{ + "default": variable, + }, variable.Locations()), nil default: return variable, nil diff --git a/bundle/tests/complex_variables_test.go b/bundle/tests/complex_variables_test.go index d46d8d8c..6371071c 100644 --- a/bundle/tests/complex_variables_test.go +++ b/bundle/tests/complex_variables_test.go @@ -81,9 +81,10 @@ func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) { ), )) require.NoError(t, diags.Error()) - - require.Equal(t, "14.2.x-scala2.11", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkVersion) - require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NodeTypeId) - require.Equal(t, 4, b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NumWorkers) - require.Equal(t, "false", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.speculation"]) + for _, cluster := range b.Config.Resources.Jobs["my_job"].JobClusters { + require.Equalf(t, "14.2.x-scala2.11", cluster.NewCluster.SparkVersion, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, "Standard_DS3_v2", cluster.NewCluster.NodeTypeId, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, 4, cluster.NewCluster.NumWorkers, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, "false", cluster.NewCluster.SparkConf["spark.speculation"], "cluster: %v", cluster.JobClusterKey) + } } diff --git a/bundle/tests/variables/complex_multiple_files/databricks.yml b/bundle/tests/variables/complex_multiple_files/databricks.yml index cb5d2439..42a82c61 100644 --- a/bundle/tests/variables/complex_multiple_files/databricks.yml +++ b/bundle/tests/variables/complex_multiple_files/databricks.yml @@ -5,13 +5,48 @@ resources: jobs: my_job: job_clusters: - - job_cluster_key: key - new_cluster: ${var.cluster} - + - job_cluster_key: key1 + new_cluster: ${var.cluster1} + - job_cluster_key: key2 + new_cluster: ${var.cluster2} + - job_cluster_key: key3 + new_cluster: ${var.cluster3} + - job_cluster_key: key4 + new_cluster: ${var.cluster4} variables: - cluster: + cluster1: + type: complex + description: "A cluster definition" + cluster2: + type: complex + description: "A cluster definition" + cluster3: + type: complex + description: "A cluster definition" + cluster4: type: complex description: "A cluster definition" include: - ./variables/*.yml + + +targets: + default: + dev: + variables: + cluster3: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false + cluster4: + default: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false diff --git a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml index badd4516..0186c437 100644 --- a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml +++ b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml @@ -2,10 +2,18 @@ targets: default: dev: variables: - cluster: + cluster1: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false + cluster2: + default: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false From f71d9e76499f9296daad620f6efed651b9864679 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 5 Sep 2024 10:56:52 +0200 Subject: [PATCH 35/37] [Release] Release v0.228.0 (#1752) CLI: * Do not error if we cannot prompt for a profile in `auth login` ([#1745](https://github.com/databricks/cli/pull/1745)). Bundles: As of this release CLI will show a prompt is if there are configuration changes which will lead to a DLT recreation. Users can skip the prompt by specifying the `--auto-approve` flag * Pass along $AZURE_CONFIG_FILE to Terraform process ([#1734](https://github.com/databricks/cli/pull/1734)). * Add prompt when a pipeline recreation happens ([#1672](https://github.com/databricks/cli/pull/1672)). * Use materialized views in the default-sql template ([#1709](https://github.com/databricks/cli/pull/1709)). * Update templates to latest LTS DBR ([#1715](https://github.com/databricks/cli/pull/1715)). * Make lock optional in the JSON schema ([#1738](https://github.com/databricks/cli/pull/1738)). * Do not suppress normalisation diagnostics for resolving variables ([#1740](https://github.com/databricks/cli/pull/1740)). * Include a permissions section in all templates ([#1713](https://github.com/databricks/cli/pull/1713)). * Fixed complex variables are not being correctly merged from include files ([#1746](https://github.com/databricks/cli/pull/1746)). * Fixed variable override in target with full variable syntax ([#1749](https://github.com/databricks/cli/pull/1749)). Internal: * Consider serverless clusters as compatible for Python wheel tasks ([#1733](https://github.com/databricks/cli/pull/1733)). * PythonMutator: explain missing package error ([#1736](https://github.com/databricks/cli/pull/1736)). * Add `dyn.Time` to box a timestamp with its original string value ([#1732](https://github.com/databricks/cli/pull/1732)). * Fix streaming of stdout, stdin, stderr in cobra test runner ([#1742](https://github.com/databricks/cli/pull/1742)). Dependency updates: * Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 ([#1741](https://github.com/databricks/cli/pull/1741)). --------- Co-authored-by: Pieter Noordhuis --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fac7d597..d6383125 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Version changelog +## [Release] Release v0.228.0 + +CLI: + * Do not error if we cannot prompt for a profile in `auth login` ([#1745](https://github.com/databricks/cli/pull/1745)). + +Bundles: + +As of this release, the CLI will show a prompt if there are configuration changes that lead to DLT pipeline recreation. +Users can skip the prompt by specifying the `--auto-approve` flag. + + * Pass along to Terraform process ([#1734](https://github.com/databricks/cli/pull/1734)). + * Add prompt when a pipeline recreation happens ([#1672](https://github.com/databricks/cli/pull/1672)). + * Use materialized views in the default-sql template ([#1709](https://github.com/databricks/cli/pull/1709)). + * Update templates to latest LTS DBR ([#1715](https://github.com/databricks/cli/pull/1715)). + * Make lock optional in the JSON schema ([#1738](https://github.com/databricks/cli/pull/1738)). + * Do not suppress normalisation diagnostics for resolving variables ([#1740](https://github.com/databricks/cli/pull/1740)). + * Include a permissions section in all templates ([#1713](https://github.com/databricks/cli/pull/1713)). + * Fixed complex variables are not being correctly merged from include files ([#1746](https://github.com/databricks/cli/pull/1746)). + * Fixed variable override in target with full variable syntax ([#1749](https://github.com/databricks/cli/pull/1749)). + +Internal: + * Consider serverless clusters as compatible for Python wheel tasks ([#1733](https://github.com/databricks/cli/pull/1733)). + * PythonMutator: explain missing package error ([#1736](https://github.com/databricks/cli/pull/1736)). + * Add `dyn.Time` to box a timestamp with its original string value ([#1732](https://github.com/databricks/cli/pull/1732)). + * Fix streaming of stdout, stdin, stderr in cobra test runner ([#1742](https://github.com/databricks/cli/pull/1742)). + +Dependency updates: + * Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 ([#1741](https://github.com/databricks/cli/pull/1741)). + ## [Release] Release v0.227.1 CLI: From ceefa80d7278eeef9cce500b2c69453da2040f40 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 5 Sep 2024 13:05:16 +0200 Subject: [PATCH 36/37] Pass copy of `dyn.Path` to callback function (#1747) ## Changes Some call sites hold on to the `dyn.Path` provided to them by the callback. It must therefore never be mutated after the callback returns, or these mutations leak out into unknown scope. This change means it is no longer possible for this failure mode to happen. ## Tests Unit test. --- bundle/artifacts/expand_globs.go | 7 +--- .../config/validate/unique_resource_keys.go | 6 +--- bundle/libraries/expand_glob_references.go | 8 ++--- bundle/libraries/upload.go | 2 +- libs/dyn/visit.go | 2 +- libs/dyn/visit_map.go | 4 +-- libs/dyn/visit_test.go | 36 +++++++++++++++++++ 7 files changed, 45 insertions(+), 20 deletions(-) create mode 100644 libs/dyn/visit_test.go diff --git a/bundle/artifacts/expand_globs.go b/bundle/artifacts/expand_globs.go index 61744405..cdf3d459 100644 --- a/bundle/artifacts/expand_globs.go +++ b/bundle/artifacts/expand_globs.go @@ -33,12 +33,7 @@ func createGlobError(v dyn.Value, p dyn.Path, message string) diag.Diagnostic { Severity: diag.Error, Summary: fmt.Sprintf("%s: %s", source, message), Locations: []dyn.Location{v.Location()}, - - Paths: []dyn.Path{ - // Hack to clone the path. This path copy is mutable. - // To be addressed in a later PR. - p.Append(), - }, + Paths: []dyn.Path{p}, } } diff --git a/bundle/config/validate/unique_resource_keys.go b/bundle/config/validate/unique_resource_keys.go index d6212b0a..50295375 100644 --- a/bundle/config/validate/unique_resource_keys.go +++ b/bundle/config/validate/unique_resource_keys.go @@ -3,7 +3,6 @@ package validate import ( "context" "fmt" - "slices" "sort" "github.com/databricks/cli/bundle" @@ -66,10 +65,7 @@ func (m *uniqueResourceKeys) Apply(ctx context.Context, b *bundle.Bundle) diag.D } } - // dyn.Path under the hood is a slice. The code that walks the configuration - // tree uses the same underlying slice to track the path as it walks - // the tree. So, we need to clone it here. - m.paths = append(m.paths, slices.Clone(p)) + m.paths = append(m.paths, p) m.locations = append(m.locations, v.Locations()...) resourceMetadata[k] = m diff --git a/bundle/libraries/expand_glob_references.go b/bundle/libraries/expand_glob_references.go index 9e90a2a1..9322a06b 100644 --- a/bundle/libraries/expand_glob_references.go +++ b/bundle/libraries/expand_glob_references.go @@ -16,12 +16,10 @@ type expand struct { func matchError(p dyn.Path, l []dyn.Location, message string) diag.Diagnostic { return diag.Diagnostic{ - Severity: diag.Error, - Summary: message, - Paths: []dyn.Path{ - p.Append(), - }, + Severity: diag.Error, + Summary: message, Locations: l, + Paths: []dyn.Path{p}, } } diff --git a/bundle/libraries/upload.go b/bundle/libraries/upload.go index be7cc41d..224e7ab2 100644 --- a/bundle/libraries/upload.go +++ b/bundle/libraries/upload.go @@ -76,7 +76,7 @@ func collectLocalLibraries(b *bundle.Bundle) (map[string][]configLocation, error source = filepath.Join(b.RootPath, source) libs[source] = append(libs[source], configLocation{ - configPath: p.Append(), // Hack to get the copy of path + configPath: p, location: v.Location(), }) diff --git a/libs/dyn/visit.go b/libs/dyn/visit.go index 4d3cf501..38adec24 100644 --- a/libs/dyn/visit.go +++ b/libs/dyn/visit.go @@ -70,7 +70,7 @@ type visitOptions struct { func visit(v Value, prefix Path, suffix Pattern, opts visitOptions) (Value, error) { if len(suffix) == 0 { - return opts.fn(prefix, v) + return opts.fn(slices.Clone(prefix), v) } // Initialize prefix if it is empty. diff --git a/libs/dyn/visit_map.go b/libs/dyn/visit_map.go index cd2cd483..3f0cded0 100644 --- a/libs/dyn/visit_map.go +++ b/libs/dyn/visit_map.go @@ -21,7 +21,7 @@ func Foreach(fn MapFunc) MapFunc { for _, pair := range m.Pairs() { pk := pair.Key pv := pair.Value - nv, err := fn(append(p, Key(pk.MustString())), pv) + nv, err := fn(p.Append(Key(pk.MustString())), pv) if err != nil { return InvalidValue, err } @@ -32,7 +32,7 @@ func Foreach(fn MapFunc) MapFunc { s := slices.Clone(v.MustSequence()) for i, value := range s { var err error - s[i], err = fn(append(p, Index(i)), value) + s[i], err = fn(p.Append(Index(i)), value) if err != nil { return InvalidValue, err } diff --git a/libs/dyn/visit_test.go b/libs/dyn/visit_test.go new file mode 100644 index 00000000..5b61399b --- /dev/null +++ b/libs/dyn/visit_test.go @@ -0,0 +1,36 @@ +package dyn_test + +import ( + "testing" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestVisitCallbackPathCopy(t *testing.T) { + vin := dyn.V(map[string]dyn.Value{ + "foo": dyn.V(42), + "bar": dyn.V(43), + }) + + var paths []dyn.Path + + // The callback should receive a copy of the path. + // If the same underlying value is used, all collected paths will be the same. + // This test uses `MapByPattern` to collect all paths in the map. + // Visit itself doesn't have public functions and we exclusively use black-box testing for this package. + _, _ = dyn.MapByPattern(vin, dyn.NewPattern(dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + paths = append(paths, p) + return v, nil + }) + + // Verify that the paths retained their original values. + var strings []string + for _, p := range paths { + strings = append(strings, p.String()) + } + assert.ElementsMatch(t, strings, []string{ + "foo", + "bar", + }) +} From 02e83877f4284589942bb439e849e01fc81a4314 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 6 Sep 2024 13:34:57 +0200 Subject: [PATCH 37/37] Added listing cluster filtering for cluster lookups (#1754) ## Changes We added a custom resolver for the cluster to add filtering for the cluster source when we list all clusters. Without the filtering listing could take a very long time (5-10 mins) which leads to lookup timeouts. ## Tests Existing unit tests passing --- .codegen/lookup.go.tmpl | 4 ++ .../resolve_resource_references_test.go | 33 +++++++++----- bundle/config/variable/lookup.go | 44 +++++++++++++++++++ bundle/config/variable/lookup_overrides.go | 41 +++++++++++++++++ bundle/tests/variables_test.go | 9 +++- 5 files changed, 119 insertions(+), 12 deletions(-) create mode 100644 bundle/config/variable/lookup_overrides.go diff --git a/.codegen/lookup.go.tmpl b/.codegen/lookup.go.tmpl index 431709f9..124b629d 100644 --- a/.codegen/lookup.go.tmpl +++ b/.codegen/lookup.go.tmpl @@ -116,6 +116,10 @@ func allResolvers() *resolvers { {{range .Services -}} {{- if in $allowlist .KebabName -}} r.{{.Singular.PascalName}} = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["{{.Singular.PascalName}}"] + if ok { + return fn(ctx, w, name) + } entity, err := w.{{.PascalName}}.GetBy{{range .NamedIdMap.NamePath}}{{.PascalName}}{{end}}(ctx, name) if err != nil { return "", err diff --git a/bundle/config/mutator/resolve_resource_references_test.go b/bundle/config/mutator/resolve_resource_references_test.go index 86a03b23..ee2f0e2e 100644 --- a/bundle/config/mutator/resolve_resource_references_test.go +++ b/bundle/config/mutator/resolve_resource_references_test.go @@ -2,7 +2,6 @@ package mutator import ( "context" - "fmt" "testing" "github.com/databricks/cli/bundle" @@ -44,11 +43,13 @@ func TestResolveClusterReference(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) clusterApi := m.GetMockClustersAPI() - clusterApi.EXPECT().GetByClusterName(mock.Anything, clusterRef1).Return(&compute.ClusterDetails{ - ClusterId: "1234-5678-abcd", - }, nil) - clusterApi.EXPECT().GetByClusterName(mock.Anything, clusterRef2).Return(&compute.ClusterDetails{ - ClusterId: "9876-5432-xywz", + clusterApi.EXPECT().ListAll(mock.Anything, compute.ListClustersRequest{ + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, + }, + }).Return([]compute.ClusterDetails{ + {ClusterId: "1234-5678-abcd", ClusterName: clusterRef1}, + {ClusterId: "9876-5432-xywz", ClusterName: clusterRef2}, }, nil) diags := bundle.Apply(context.Background(), b, ResolveResourceReferences()) @@ -78,10 +79,16 @@ func TestResolveNonExistentClusterReference(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) clusterApi := m.GetMockClustersAPI() - clusterApi.EXPECT().GetByClusterName(mock.Anything, clusterRef).Return(nil, fmt.Errorf("ClusterDetails named '%s' does not exist", clusterRef)) + clusterApi.EXPECT().ListAll(mock.Anything, compute.ListClustersRequest{ + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, + }, + }).Return([]compute.ClusterDetails{ + {ClusterId: "1234-5678-abcd", ClusterName: "some other cluster"}, + }, nil) diags := bundle.Apply(context.Background(), b, ResolveResourceReferences()) - require.ErrorContains(t, diags.Error(), "failed to resolve cluster: Random, err: ClusterDetails named 'Random' does not exist") + require.ErrorContains(t, diags.Error(), "failed to resolve cluster: Random, err: cluster named 'Random' does not exist") } func TestNoLookupIfVariableIsSet(t *testing.T) { @@ -158,8 +165,14 @@ func TestResolveVariableReferencesInVariableLookups(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) clusterApi := m.GetMockClustersAPI() - clusterApi.EXPECT().GetByClusterName(mock.Anything, "cluster-bar-dev").Return(&compute.ClusterDetails{ - ClusterId: "1234-5678-abcd", + + clusterApi.EXPECT().ListAll(mock.Anything, compute.ListClustersRequest{ + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, + }, + }).Return([]compute.ClusterDetails{ + {ClusterId: "1234-5678-abcd", ClusterName: "cluster-bar-dev"}, + {ClusterId: "9876-5432-xywz", ClusterName: "some other cluster"}, }, nil) diags := bundle.Apply(context.Background(), b, bundle.Seq(ResolveVariableReferencesInLookup(), ResolveResourceReferences())) diff --git a/bundle/config/variable/lookup.go b/bundle/config/variable/lookup.go index 9c85e2a7..e40b0ef7 100755 --- a/bundle/config/variable/lookup.go +++ b/bundle/config/variable/lookup.go @@ -220,6 +220,10 @@ type resolvers struct { func allResolvers() *resolvers { r := &resolvers{} r.Alert = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Alert"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Alerts.GetByDisplayName(ctx, name) if err != nil { return "", err @@ -228,6 +232,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.Id), nil } r.ClusterPolicy = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["ClusterPolicy"] + if ok { + return fn(ctx, w, name) + } entity, err := w.ClusterPolicies.GetByName(ctx, name) if err != nil { return "", err @@ -236,6 +244,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.PolicyId), nil } r.Cluster = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Cluster"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Clusters.GetByClusterName(ctx, name) if err != nil { return "", err @@ -244,6 +256,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.ClusterId), nil } r.Dashboard = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Dashboard"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Dashboards.GetByName(ctx, name) if err != nil { return "", err @@ -252,6 +268,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.Id), nil } r.InstancePool = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["InstancePool"] + if ok { + return fn(ctx, w, name) + } entity, err := w.InstancePools.GetByInstancePoolName(ctx, name) if err != nil { return "", err @@ -260,6 +280,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.InstancePoolId), nil } r.Job = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Job"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Jobs.GetBySettingsName(ctx, name) if err != nil { return "", err @@ -268,6 +292,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.JobId), nil } r.Metastore = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Metastore"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Metastores.GetByName(ctx, name) if err != nil { return "", err @@ -276,6 +304,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.MetastoreId), nil } r.Pipeline = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Pipeline"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Pipelines.GetByName(ctx, name) if err != nil { return "", err @@ -284,6 +316,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.PipelineId), nil } r.Query = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Query"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Queries.GetByDisplayName(ctx, name) if err != nil { return "", err @@ -292,6 +328,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.Id), nil } r.ServicePrincipal = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["ServicePrincipal"] + if ok { + return fn(ctx, w, name) + } entity, err := w.ServicePrincipals.GetByDisplayName(ctx, name) if err != nil { return "", err @@ -300,6 +340,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.ApplicationId), nil } r.Warehouse = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Warehouse"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Warehouses.GetByName(ctx, name) if err != nil { return "", err diff --git a/bundle/config/variable/lookup_overrides.go b/bundle/config/variable/lookup_overrides.go new file mode 100644 index 00000000..1be373dc --- /dev/null +++ b/bundle/config/variable/lookup_overrides.go @@ -0,0 +1,41 @@ +package variable + +import ( + "context" + "fmt" + + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/compute" +) + +var lookupOverrides = map[string]resolverFunc{ + "Cluster": resolveCluster, +} + +// We added a custom resolver for the cluster to add filtering for the cluster source when we list all clusters. +// Without the filtering listing could take a very long time (5-10 mins) which leads to lookup timeouts. +func resolveCluster(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + result, err := w.Clusters.ListAll(ctx, compute.ListClustersRequest{ + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, + }, + }) + + if err != nil { + return "", err + } + + tmp := map[string][]compute.ClusterDetails{} + for _, v := range result { + key := v.ClusterName + tmp[key] = append(tmp[key], v) + } + alternatives, ok := tmp[name] + if !ok || len(alternatives) == 0 { + return "", fmt.Errorf("cluster named '%s' does not exist", name) + } + if len(alternatives) > 1 { + return "", fmt.Errorf("there are %d instances of clusters named '%s'", len(alternatives), name) + } + return alternatives[0].ClusterId, nil +} diff --git a/bundle/tests/variables_test.go b/bundle/tests/variables_test.go index 51a23e5d..9451c5a0 100644 --- a/bundle/tests/variables_test.go +++ b/bundle/tests/variables_test.go @@ -124,8 +124,13 @@ func TestVariablesWithTargetLookupOverrides(t *testing.T) { }, nil) clustersApi := mockWorkspaceClient.GetMockClustersAPI() - clustersApi.EXPECT().GetByClusterName(mock.Anything, "some-test-cluster").Return(&compute.ClusterDetails{ - ClusterId: "4321", + clustersApi.EXPECT().ListAll(mock.Anything, compute.ListClustersRequest{ + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, + }, + }).Return([]compute.ClusterDetails{ + {ClusterId: "4321", ClusterName: "some-test-cluster"}, + {ClusterId: "9876", ClusterName: "some-other-cluster"}, }, nil) clusterPoliciesApi := mockWorkspaceClient.GetMockClusterPoliciesAPI()