From 6771ba09a699b3890316cf8f849b3a51733750e4 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 20 Aug 2024 11:33:03 +0200 Subject: [PATCH 01/11] Correctly mark package names with versions as remote libraries (#1697) ## Changes Fixes https://github.com/databricks/setup-cli/issues/124 ## Tests Added regression test --- bundle/libraries/local_path.go | 5 +++++ bundle/libraries/local_path_test.go | 1 + 2 files changed, 6 insertions(+) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 5b5ec6c0..3e32adfd 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -66,6 +66,11 @@ func IsLibraryLocal(dep string) bool { } func isPackage(name string) bool { + // If the dependency has ==, it's a package with version + if strings.Contains(name, "==") { + return true + } + // If the dependency has no extension, it's a PyPi package name return path.Ext(name) == "" } diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index be4028d5..7299cdc9 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -54,6 +54,7 @@ func TestIsLibraryLocal(t *testing.T) { {path: "-r /Workspace/my_project/requirements.txt", expected: false}, {path: "s3://mybucket/path/to/package", expected: false}, {path: "dbfs:/mnt/path/to/package", expected: false}, + {path: "beautifulsoup4==4.12.3", expected: false}, } for i, tc := range testCases { From af5048e73efab56dd2a13a02132e78d3ee84c5e7 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 20 Aug 2024 14:54:56 +0200 Subject: [PATCH 02/11] Share test initializer in common helper function (#1695) ## Changes These tests inadvertently re-ran mutators, the first time through `loadTarget` and the second time by running `phases.Initialize()` themselves. Some of the mutators that are executed in `phases.Initialize()` are also run as part of `loadTarget`. This is overdue a refactor to make it unambiguous what runs when. Until then, this removes the duplicated execution. ## Tests Unit tests pass. --- bundle/tests/loader.go | 29 +++++++++++++++ bundle/tests/pipeline_glob_paths_test.go | 37 +------------------ .../tests/relative_path_translation_test.go | 29 +-------------- 3 files changed, 33 insertions(+), 62 deletions(-) diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index 069f0935..848132a1 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -8,6 +8,10 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) @@ -43,3 +47,28 @@ func loadTargetWithDiags(path, env string) (*bundle.Bundle, diag.Diagnostics) { )) return b, diags } + +func configureMock(t *testing.T, b *bundle.Bundle) { + // Configure mock workspace client + m := mocks.NewMockWorkspaceClient(t) + m.WorkspaceClient.Config = &config.Config{ + Host: "https://mock.databricks.workspace.com", + } + m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ + UserName: "user@domain.com", + }, nil) + b.SetWorkpaceClient(m.WorkspaceClient) +} + +func initializeTarget(t *testing.T, path, env string) (*bundle.Bundle, diag.Diagnostics) { + b := load(t, path) + configureMock(t, b) + + ctx := context.Background() + diags := bundle.Apply(ctx, b, bundle.Seq( + mutator.SelectTarget(env), + phases.Initialize(), + )) + + return b, diags +} diff --git a/bundle/tests/pipeline_glob_paths_test.go b/bundle/tests/pipeline_glob_paths_test.go index bf5039b5..c1c62cfb 100644 --- a/bundle/tests/pipeline_glob_paths_test.go +++ b/bundle/tests/pipeline_glob_paths_test.go @@ -1,33 +1,13 @@ package config_tests import ( - "context" "testing" - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/phases" - "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" - "github.com/databricks/databricks-sdk-go/service/iam" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) func TestExpandPipelineGlobPaths(t *testing.T) { - b := loadTarget(t, "./pipeline_glob_paths", "default") - - // Configure mock workspace client - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &config.Config{ - Host: "https://mock.databricks.workspace.com", - } - m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ - UserName: "user@domain.com", - }, nil) - b.SetWorkpaceClient(m.WorkspaceClient) - - ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Initialize()) + b, diags := initializeTarget(t, "./pipeline_glob_paths", "default") require.NoError(t, diags.Error()) require.Equal( t, @@ -37,19 +17,6 @@ func TestExpandPipelineGlobPaths(t *testing.T) { } func TestExpandPipelineGlobPathsWithNonExistent(t *testing.T) { - b := loadTarget(t, "./pipeline_glob_paths", "error") - - // Configure mock workspace client - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &config.Config{ - Host: "https://mock.databricks.workspace.com", - } - m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ - UserName: "user@domain.com", - }, nil) - b.SetWorkpaceClient(m.WorkspaceClient) - - ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Initialize()) + _, diags := initializeTarget(t, "./pipeline_glob_paths", "error") require.ErrorContains(t, diags.Error(), "notebook ./non-existent not found") } diff --git a/bundle/tests/relative_path_translation_test.go b/bundle/tests/relative_path_translation_test.go index d5b80bea..199871d2 100644 --- a/bundle/tests/relative_path_translation_test.go +++ b/bundle/tests/relative_path_translation_test.go @@ -1,36 +1,14 @@ package config_tests import ( - "context" "testing" - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/phases" - "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" - "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) -func configureMock(t *testing.T, b *bundle.Bundle) { - // Configure mock workspace client - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &config.Config{ - Host: "https://mock.databricks.workspace.com", - } - m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ - UserName: "user@domain.com", - }, nil) - b.SetWorkpaceClient(m.WorkspaceClient) -} - func TestRelativePathTranslationDefault(t *testing.T) { - b := loadTarget(t, "./relative_path_translation", "default") - configureMock(t, b) - - diags := bundle.Apply(context.Background(), b, phases.Initialize()) + b, diags := initializeTarget(t, "./relative_path_translation", "default") require.NoError(t, diags.Error()) t0 := b.Config.Resources.Jobs["job"].Tasks[0] @@ -40,10 +18,7 @@ func TestRelativePathTranslationDefault(t *testing.T) { } func TestRelativePathTranslationOverride(t *testing.T) { - b := loadTarget(t, "./relative_path_translation", "override") - configureMock(t, b) - - diags := bundle.Apply(context.Background(), b, phases.Initialize()) + b, diags := initializeTarget(t, "./relative_path_translation", "override") require.NoError(t, diags.Error()) t0 := b.Config.Resources.Jobs["job"].Tasks[0] From 44902fa3501033928a5ec46dbfcf4cb23f739788 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Tue, 20 Aug 2024 15:26:57 +0200 Subject: [PATCH 03/11] Make `pydabs/venv_path` optional (#1687) ## Changes Make `pydabs/venv_path` optional. When not specified, CLI detects the Python interpreter using `python.DetectExecutable`, the same way as for `artifacts`. `python.DetectExecutable` works correctly if a virtual environment is activated or `python3` is available on PATH through other means. Extract the venv detection code from PyDABs into `libs/python/detect`. This code will be used when we implement the `python/venv_path` section in `databricks.yml`. ## Tests Unit tests and manually --------- Co-authored-by: Pieter Noordhuis --- bundle/artifacts/whl/infer.go | 2 + bundle/config/experimental.go | 4 +- .../config/mutator/python/python_mutator.go | 33 ++++++------- .../mutator/python/python_mutator_test.go | 21 +++++++-- libs/python/detect.go | 46 +++++++++++++++++++ libs/python/detect_test.go | 46 +++++++++++++++++++ 6 files changed, 128 insertions(+), 24 deletions(-) create mode 100644 libs/python/detect_test.go diff --git a/bundle/artifacts/whl/infer.go b/bundle/artifacts/whl/infer.go index dd4ad295..cb727de0 100644 --- a/bundle/artifacts/whl/infer.go +++ b/bundle/artifacts/whl/infer.go @@ -15,6 +15,8 @@ type infer struct { func (m *infer) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { artifact := b.Config.Artifacts[m.name] + + // TODO use python.DetectVEnvExecutable once bundle has a way to specify venv path py, err := python.DetectExecutable(ctx) if err != nil { return diag.FromErr(err) diff --git a/bundle/config/experimental.go b/bundle/config/experimental.go index 66e97582..061bbdae 100644 --- a/bundle/config/experimental.go +++ b/bundle/config/experimental.go @@ -36,8 +36,8 @@ type PyDABs struct { // VEnvPath is path to the virtual environment. // - // Required if PyDABs is enabled. PyDABs will load the code in the specified - // environment. + // If enabled, PyDABs will execute code within this environment. If disabled, + // it defaults to using the Python interpreter available in the current shell. VEnvPath string `json:"venv_path,omitempty"` // Import contains a list Python packages with PyDABs code. diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index f9febe5b..4f44df0a 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -7,8 +7,8 @@ import ( "fmt" "os" "path/filepath" - "runtime" + "github.com/databricks/cli/libs/python" "github.com/databricks/databricks-sdk-go/logger" "github.com/databricks/cli/bundle/env" @@ -86,23 +86,15 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return nil } - if experimental.PyDABs.VEnvPath == "" { - return diag.Errorf("\"experimental.pydabs.enabled\" can only be used when \"experimental.pydabs.venv_path\" is set") - } - // mutateDiags is used because Mutate returns 'error' instead of 'diag.Diagnostics' var mutateDiags diag.Diagnostics var mutateDiagsHasError = errors.New("unexpected error") err := b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) { - pythonPath := interpreterPath(experimental.PyDABs.VEnvPath) + pythonPath, err := detectExecutable(ctx, experimental.PyDABs.VEnvPath) - if _, err := os.Stat(pythonPath); err != nil { - if os.IsNotExist(err) { - return dyn.InvalidValue, fmt.Errorf("can't find %q, check if venv is created", pythonPath) - } else { - return dyn.InvalidValue, fmt.Errorf("can't find %q: %w", pythonPath, err) - } + if err != nil { + return dyn.InvalidValue, fmt.Errorf("failed to get Python interpreter path: %w", err) } cacheDir, err := createCacheDir(ctx) @@ -423,11 +415,16 @@ func isOmitemptyDelete(left dyn.Value) bool { } } -// interpreterPath returns platform-specific path to Python interpreter in the virtual environment. -func interpreterPath(venvPath string) string { - if runtime.GOOS == "windows" { - return filepath.Join(venvPath, "Scripts", "python3.exe") - } else { - return filepath.Join(venvPath, "bin", "python3") +// detectExecutable lookups Python interpreter in virtual environment, or if not set, in PATH. +func detectExecutable(ctx context.Context, venvPath string) (string, error) { + if venvPath == "" { + interpreter, err := python.DetectExecutable(ctx) + if err != nil { + return "", err + } + + return interpreter, nil } + + return python.DetectVEnvExecutable(venvPath) } diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index fbe835f9..ea02d1ce 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -282,7 +282,7 @@ func TestPythonMutator_venvRequired(t *testing.T) { } func TestPythonMutator_venvNotFound(t *testing.T) { - expectedError := fmt.Sprintf("can't find %q, check if venv is created", interpreterPath("bad_path")) + expectedError := fmt.Sprintf("failed to get Python interpreter path: can't find %q, check if virtualenv is created", interpreterPath("bad_path")) b := loadYaml("databricks.yml", ` experimental: @@ -596,9 +596,7 @@ func loadYaml(name string, content string) *bundle.Bundle { } } -func withFakeVEnv(t *testing.T, path string) { - interpreterPath := interpreterPath(path) - +func withFakeVEnv(t *testing.T, venvPath string) { cwd, err := os.Getwd() if err != nil { panic(err) @@ -608,6 +606,8 @@ func withFakeVEnv(t *testing.T, path string) { panic(err) } + interpreterPath := interpreterPath(venvPath) + err = os.MkdirAll(filepath.Dir(interpreterPath), 0755) if err != nil { panic(err) @@ -618,9 +618,22 @@ func withFakeVEnv(t *testing.T, path string) { panic(err) } + err = os.WriteFile(filepath.Join(venvPath, "pyvenv.cfg"), []byte(""), 0755) + if err != nil { + panic(err) + } + t.Cleanup(func() { if err := os.Chdir(cwd); err != nil { panic(err) } }) } + +func interpreterPath(venvPath string) string { + if runtime.GOOS == "windows" { + return filepath.Join(venvPath, "Scripts", "python3.exe") + } else { + return filepath.Join(venvPath, "bin", "python3") + } +} diff --git a/libs/python/detect.go b/libs/python/detect.go index b0c1475c..8fcc7cd9 100644 --- a/libs/python/detect.go +++ b/libs/python/detect.go @@ -3,9 +3,23 @@ package python import ( "context" "errors" + "fmt" + "io/fs" + "os" "os/exec" + "path/filepath" + "runtime" ) +// DetectExecutable looks up the path to the python3 executable from the PATH +// environment variable. +// +// If virtualenv is activated, executable from the virtualenv is returned, +// because activating virtualenv adds python3 executable on a PATH. +// +// If python3 executable is not found on the PATH, the interpreter with the +// least version that satisfies minimal 3.8 version is returned, e.g. +// python3.10. func DetectExecutable(ctx context.Context) (string, error) { // TODO: add a shortcut if .python-version file is detected somewhere in // the parent directory tree. @@ -32,3 +46,35 @@ func DetectExecutable(ctx context.Context) (string, error) { } return interpreter.Path, nil } + +// DetectVEnvExecutable returns the path to the python3 executable inside venvPath, +// that is not necessarily activated. +// +// If virtualenv is not created, or executable doesn't exist, the error is returned. +func DetectVEnvExecutable(venvPath string) (string, error) { + interpreterPath := filepath.Join(venvPath, "bin", "python3") + if runtime.GOOS == "windows" { + interpreterPath = filepath.Join(venvPath, "Scripts", "python3.exe") + } + + if _, err := os.Stat(interpreterPath); err != nil { + if errors.Is(err, fs.ErrNotExist) { + return "", fmt.Errorf("can't find %q, check if virtualenv is created", interpreterPath) + } else { + return "", fmt.Errorf("can't find %q: %w", interpreterPath, err) + } + } + + // pyvenv.cfg must be always present in correctly configured virtualenv, + // read more in https://snarky.ca/how-virtual-environments-work/ + pyvenvPath := filepath.Join(venvPath, "pyvenv.cfg") + if _, err := os.Stat(pyvenvPath); err != nil { + if errors.Is(err, fs.ErrNotExist) { + return "", fmt.Errorf("expected %q to be virtualenv, but pyvenv.cfg is missing", venvPath) + } else { + return "", fmt.Errorf("can't find %q: %w", pyvenvPath, err) + } + } + + return interpreterPath, nil +} diff --git a/libs/python/detect_test.go b/libs/python/detect_test.go new file mode 100644 index 00000000..78c7067f --- /dev/null +++ b/libs/python/detect_test.go @@ -0,0 +1,46 @@ +package python + +import ( + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDetectVEnvExecutable(t *testing.T) { + dir := t.TempDir() + interpreterPath := interpreterPath(dir) + + err := os.Mkdir(filepath.Dir(interpreterPath), 0755) + require.NoError(t, err) + + err = os.WriteFile(interpreterPath, []byte(""), 0755) + require.NoError(t, err) + + err = os.WriteFile(filepath.Join(dir, "pyvenv.cfg"), []byte(""), 0755) + require.NoError(t, err) + + executable, err := DetectVEnvExecutable(dir) + + assert.NoError(t, err) + assert.Equal(t, interpreterPath, executable) +} + +func TestDetectVEnvExecutable_badLayout(t *testing.T) { + dir := t.TempDir() + + _, err := DetectVEnvExecutable(dir) + + assert.Errorf(t, err, "can't find %q, check if virtualenv is created", interpreterPath(dir)) +} + +func interpreterPath(venvPath string) string { + if runtime.GOOS == "windows" { + return filepath.Join(venvPath, "Scripts", "python3.exe") + } else { + return filepath.Join(venvPath, "bin", "python3") + } +} From a4c1ba3e2827abca29034115436d441310b7ee33 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 21 Aug 2024 13:15:25 +0530 Subject: [PATCH 04/11] Use API mocks for duplicate path errors in workspace files extensions client (#1690) ## Changes `TestAccFilerWorkspaceFilesExtensionsErrorsOnDupName` recently started failing in our nightlies because the upstream `import` API was changed to [prohibit conflicting file paths](https://docs.databricks.com/en/release-notes/product/2024/august.html#files-can-no-longer-have-identical-names-in-workspace-folders). Because existing conflicting file paths can still be grandfathered in, we need to retain coverage for the test. To do this, this PR: 1. Removes the failing `TestAccFilerWorkspaceFilesExtensionsErrorsOnDupName` 2. Add an equivalent unit test with the `list` and `get-status` API calls mocked. --- internal/filer_test.go | 62 ------- libs/filer/workspace_files_client.go | 26 +-- .../workspace_files_extensions_client.go | 8 +- .../workspace_files_extensions_client_test.go | 151 ++++++++++++++++++ 4 files changed, 172 insertions(+), 75 deletions(-) create mode 100644 libs/filer/workspace_files_extensions_client_test.go diff --git a/internal/filer_test.go b/internal/filer_test.go index 27530425..bc4c9480 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -5,7 +5,6 @@ import ( "context" "encoding/json" "errors" - "fmt" "io" "io/fs" "path" @@ -722,67 +721,6 @@ func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) { assert.ErrorIs(t, err, fs.ErrNotExist) } -func TestAccFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { - t.Parallel() - - tcases := []struct { - files []struct{ name, content string } - name string - }{ - { - name: "python", - files: []struct{ name, content string }{ - {"foo.py", "print('foo')"}, - {"foo.py", "# Databricks notebook source\nprint('foo')"}, - }, - }, - { - name: "r", - files: []struct{ name, content string }{ - {"foo.r", "print('foo')"}, - {"foo.r", "# Databricks notebook source\nprint('foo')"}, - }, - }, - { - name: "sql", - files: []struct{ name, content string }{ - {"foo.sql", "SELECT 'foo'"}, - {"foo.sql", "-- Databricks notebook source\nSELECT 'foo'"}, - }, - }, - { - name: "scala", - files: []struct{ name, content string }{ - {"foo.scala", "println('foo')"}, - {"foo.scala", "// Databricks notebook source\nprintln('foo')"}, - }, - }, - // We don't need to test this for ipynb notebooks. The import API - // fails when the file extension is .ipynb but the content is not a - // valid juptyer notebook. - } - - for i := range tcases { - tc := tcases[i] - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - ctx := context.Background() - wf, tmpDir := setupWsfsExtensionsFiler(t) - - for _, f := range tc.files { - err := wf.Write(ctx, f.name, strings.NewReader(f.content), filer.CreateParentDirectories) - require.NoError(t, err) - } - - _, err := wf.ReadDir(ctx, ".") - assert.ErrorAs(t, err, &filer.DuplicatePathError{}) - assert.ErrorContains(t, err, fmt.Sprintf("failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at %s and FILE at %s resolve to the same name %s. Changing the name of one of these objects will resolve this issue", path.Join(tmpDir, "foo"), path.Join(tmpDir, tc.files[0].name), tc.files[0].name)) - }) - } - -} - func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) { t.Parallel() diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index e911f440..d8ab5a6b 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -102,13 +102,21 @@ func (info *wsfsFileInfo) MarshalJSON() ([]byte, error) { return marshal.Marshal(info) } +// Interface for *client.DatabricksClient from the Databricks Go SDK. Abstracted +// as an interface to allow for mocking in tests. +type apiClient interface { + Do(ctx context.Context, method, path string, + headers map[string]string, request, response any, + visitors ...func(*http.Request) error) error +} + // WorkspaceFilesClient implements the files-in-workspace API. // NOTE: This API is available for files under /Repos if a workspace has files-in-repos enabled. // It can access any workspace path if files-in-workspace is enabled. -type WorkspaceFilesClient struct { +type workspaceFilesClient struct { workspaceClient *databricks.WorkspaceClient - apiClient *client.DatabricksClient + apiClient apiClient // File operations will be relative to this path. root WorkspaceRootPath @@ -120,7 +128,7 @@ func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, return nil, err } - return &WorkspaceFilesClient{ + return &workspaceFilesClient{ workspaceClient: w, apiClient: apiClient, @@ -128,7 +136,7 @@ func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, }, nil } -func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { +func (w *workspaceFilesClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { absPath, err := w.root.Join(name) if err != nil { return err @@ -198,7 +206,7 @@ func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io return err } -func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { +func (w *workspaceFilesClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err @@ -222,7 +230,7 @@ func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.ReadCl return w.workspaceClient.Workspace.Download(ctx, absPath) } -func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { +func (w *workspaceFilesClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { absPath, err := w.root.Join(name) if err != nil { return err @@ -266,7 +274,7 @@ func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string, mode ... return err } -func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { +func (w *workspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err @@ -299,7 +307,7 @@ func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.D return wsfsDirEntriesFromObjectInfos(objects), nil } -func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error { +func (w *workspaceFilesClient) Mkdir(ctx context.Context, name string) error { dirPath, err := w.root.Join(name) if err != nil { return err @@ -309,7 +317,7 @@ func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error { }) } -func (w *WorkspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { +func (w *workspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index 844e736b..b24ecf7e 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -133,14 +133,14 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con }, nil } -type DuplicatePathError struct { +type duplicatePathError struct { oi1 workspace.ObjectInfo oi2 workspace.ObjectInfo commonName string } -func (e DuplicatePathError) Error() string { +func (e duplicatePathError) Error() string { return fmt.Sprintf("failed to read files from the workspace file system. Duplicate paths encountered. Both %s at %s and %s at %s resolve to the same name %s. Changing the name of one of these objects will resolve this issue", e.oi1.ObjectType, e.oi1.Path, e.oi2.ObjectType, e.oi2.Path, e.commonName) } @@ -157,7 +157,7 @@ func (e ReadOnlyError) Error() string { // delete, and stat notebooks (and files in general) in the workspace, using their paths // with the extension included. // -// The ReadDir method returns a DuplicatePathError if this traditional file system view is +// The ReadDir method returns a duplicatePathError if this traditional file system view is // not possible. For example, a Python notebook called foo and a Python file called `foo.py` // would resolve to the same path `foo.py` in a tradition file system. // @@ -220,7 +220,7 @@ func (w *workspaceFilesExtensionsClient) ReadDir(ctx context.Context, name strin // Error if we have seen this path before in the current directory. // If not seen before, add it to the seen paths. if _, ok := seenPaths[entries[i].Name()]; ok { - return nil, DuplicatePathError{ + return nil, duplicatePathError{ oi1: seenPaths[entries[i].Name()], oi2: sysInfo, commonName: path.Join(name, entries[i].Name()), diff --git a/libs/filer/workspace_files_extensions_client_test.go b/libs/filer/workspace_files_extensions_client_test.go new file mode 100644 index 00000000..321c4371 --- /dev/null +++ b/libs/filer/workspace_files_extensions_client_test.go @@ -0,0 +1,151 @@ +package filer + +import ( + "context" + "net/http" + "testing" + + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" +) + +// Mocks client.DatabricksClient from the databricks-sdk-go package. +type mockApiClient struct { + mock.Mock +} + +func (m *mockApiClient) Do(ctx context.Context, method, path string, + headers map[string]string, request any, response any, + visitors ...func(*http.Request) error) error { + args := m.Called(ctx, method, path, headers, request, response, visitors) + + // Set the http response from a value provided in the mock call. + p := response.(*wsfsFileInfo) + *p = args.Get(1).(wsfsFileInfo) + return args.Error(0) +} + +func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { + for _, tc := range []struct { + name string + language workspace.Language + notebookExportFormat workspace.ExportFormat + notebookPath string + filePath string + expectedError string + }{ + { + name: "python source notebook and file", + language: workspace.LanguagePython, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.py", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue", + }, + { + name: "python jupyter notebook and file", + language: workspace.LanguagePython, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.py", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "scala source notebook and file", + language: workspace.LanguageScala, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.scala", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue", + }, + { + name: "r source notebook and file", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.r", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue", + }, + { + name: "sql source notebook and file", + language: workspace.LanguageSql, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.sql", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue", + }, + { + name: "python jupyter notebook and file", + language: workspace.LanguagePython, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + } { + t.Run(tc.name, func(t *testing.T) { + mockedWorkspaceClient := mocks.NewMockWorkspaceClient(t) + mockedApiClient := mockApiClient{} + + // Mock the workspace API's ListAll method. + workspaceApi := mockedWorkspaceClient.GetMockWorkspaceAPI() + workspaceApi.EXPECT().ListAll(mock.Anything, workspace.ListWorkspaceRequest{ + Path: "/dir", + }).Return([]workspace.ObjectInfo{ + { + Path: tc.filePath, + Language: tc.language, + ObjectType: workspace.ObjectTypeFile, + }, + { + Path: tc.notebookPath, + Language: tc.language, + ObjectType: workspace.ObjectTypeNotebook, + }, + }, nil) + + // Mock bespoke API calls to /api/2.0/workspace/get-status, that are + // used to figure out the right file extension for the notebook. + statNotebook := wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: tc.notebookPath, + Language: tc.language, + ObjectType: workspace.ObjectTypeNotebook, + }, + ReposExportFormat: tc.notebookExportFormat, + } + + mockedApiClient.On("Do", mock.Anything, http.MethodGet, "/api/2.0/workspace/get-status", map[string]string(nil), map[string]string{ + "path": tc.notebookPath, + "return_export_info": "true", + }, mock.AnythingOfType("*filer.wsfsFileInfo"), []func(*http.Request) error(nil)).Return(nil, statNotebook) + + workspaceFilesClient := workspaceFilesClient{ + workspaceClient: mockedWorkspaceClient.WorkspaceClient, + apiClient: &mockedApiClient, + root: NewWorkspaceRootPath("/dir"), + } + + workspaceFilesExtensionsClient := workspaceFilesExtensionsClient{ + workspaceClient: mockedWorkspaceClient.WorkspaceClient, + wsfs: &workspaceFilesClient, + } + + _, err := workspaceFilesExtensionsClient.ReadDir(context.Background(), "/") + + if tc.expectedError == "" { + assert.NoError(t, err) + } else { + assert.ErrorAs(t, err, &duplicatePathError{}) + assert.EqualError(t, err, tc.expectedError) + } + + // assert the mocked methods were actually called, as a sanity check. + workspaceApi.AssertNumberOfCalls(t, "ListAll", 1) + mockedApiClient.AssertNumberOfCalls(t, "Do", 1) + }) + } +} From c775d251eda6fa567de95e55e4558d5b99abce39 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 21 Aug 2024 10:22:35 +0200 Subject: [PATCH 05/11] Improves detection of PyPI package names in environment dependencies (#1699) ## Changes Improves detection of PyPi package names in environment dependencies ## Tests Added unit tests --- bundle/libraries/local_path.go | 22 ++++++++++++++++++---- bundle/libraries/local_path_test.go | 9 +++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 3e32adfd..417bce10 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -3,6 +3,7 @@ package libraries import ( "net/url" "path" + "regexp" "strings" ) @@ -65,14 +66,27 @@ func IsLibraryLocal(dep string) bool { return IsLocalPath(dep) } +// ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). +// \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. +// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?)?: Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +// Spec for package name and version specifier: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ +var packageRegex = regexp.MustCompile(`^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$`) + func isPackage(name string) bool { - // If the dependency has ==, it's a package with version - if strings.Contains(name, "==") { + if packageRegex.MatchString(name) { return true } - // If the dependency has no extension, it's a PyPi package name - return path.Ext(name) == "" + return isUrlBasedLookup(name) +} + +func isUrlBasedLookup(name string) bool { + parts := strings.Split(name, " @ ") + if len(parts) != 2 { + return false + } + + return packageRegex.MatchString(parts[0]) && isRemoteStorageScheme(parts[1]) } func isRemoteStorageScheme(path string) bool { diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index 7299cdc9..7f84b324 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -54,7 +54,16 @@ func TestIsLibraryLocal(t *testing.T) { {path: "-r /Workspace/my_project/requirements.txt", expected: false}, {path: "s3://mybucket/path/to/package", expected: false}, {path: "dbfs:/mnt/path/to/package", expected: false}, + {path: "beautifulsoup4", expected: false}, {path: "beautifulsoup4==4.12.3", expected: false}, + {path: "beautifulsoup4 >= 4.12.3", expected: false}, + {path: "beautifulsoup4 < 4.12.3", expected: false}, + {path: "beautifulsoup4 ~= 4.12.3", expected: false}, + {path: "beautifulsoup4[security, tests]", expected: false}, + {path: "beautifulsoup4[security, tests] ~= 4.12.3", expected: false}, + {path: "https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, + {path: "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, + {path: "requests [security] @ https://github.com/psf/requests/archive/refs/heads/main.zip", expected: false}, } for i, tc := range testCases { From 192f33bb13a156bebf9d7d2c2b06092d8ae9775d Mon Sep 17 00:00:00 2001 From: Witold Czaplewski Date: Wed, 21 Aug 2024 12:03:56 +0200 Subject: [PATCH 06/11] [DAB] Add support for requirements libraries in Job Tasks (#1543) ## Changes While experimenting with DAB I discovered that requirements libraries are being ignored. One thing worth mentioning is that `bundle validate` runs successfully, but `bundle deploy` fails. This PR only covers the second part. ## Tests Added a unit test --- bundle/config/mutator/translate_paths_jobs.go | 5 +++++ bundle/config/mutator/translate_paths_test.go | 9 +++++++++ bundle/libraries/helpers.go | 3 +++ bundle/libraries/helpers_test.go | 1 + 4 files changed, 18 insertions(+) diff --git a/bundle/config/mutator/translate_paths_jobs.go b/bundle/config/mutator/translate_paths_jobs.go index 6febf4f8..e34eeb2f 100644 --- a/bundle/config/mutator/translate_paths_jobs.go +++ b/bundle/config/mutator/translate_paths_jobs.go @@ -50,6 +50,11 @@ func rewritePatterns(t *translateContext, base dyn.Pattern) []jobRewritePattern t.translateNoOp, noSkipRewrite, }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")), + t.translateFilePath, + noSkipRewrite, + }, } } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 780a540d..fd64593b 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -110,6 +110,7 @@ func TestTranslatePaths(t *testing.T) { touchNotebookFile(t, filepath.Join(dir, "my_pipeline_notebook.py")) touchEmptyFile(t, filepath.Join(dir, "my_python_file.py")) touchEmptyFile(t, filepath.Join(dir, "dist", "task.jar")) + touchEmptyFile(t, filepath.Join(dir, "requirements.txt")) b := &bundle.Bundle{ RootPath: dir, @@ -140,6 +141,9 @@ func TestTranslatePaths(t *testing.T) { NotebookTask: &jobs.NotebookTask{ NotebookPath: "./my_job_notebook.py", }, + Libraries: []compute.Library{ + {Requirements: "./requirements.txt"}, + }, }, { PythonWheelTask: &jobs.PythonWheelTask{ @@ -232,6 +236,11 @@ func TestTranslatePaths(t *testing.T) { "/bundle/my_job_notebook", b.Config.Resources.Jobs["job"].Tasks[2].NotebookTask.NotebookPath, ) + assert.Equal( + t, + "/bundle/requirements.txt", + b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements, + ) assert.Equal( t, "/bundle/my_python_file.py", diff --git a/bundle/libraries/helpers.go b/bundle/libraries/helpers.go index 89679c91..b7e707cc 100644 --- a/bundle/libraries/helpers.go +++ b/bundle/libraries/helpers.go @@ -12,5 +12,8 @@ func libraryPath(library *compute.Library) string { if library.Egg != "" { return library.Egg } + if library.Requirements != "" { + return library.Requirements + } return "" } diff --git a/bundle/libraries/helpers_test.go b/bundle/libraries/helpers_test.go index adc20a24..e4bd3277 100644 --- a/bundle/libraries/helpers_test.go +++ b/bundle/libraries/helpers_test.go @@ -13,5 +13,6 @@ func TestLibraryPath(t *testing.T) { assert.Equal(t, path, libraryPath(&compute.Library{Whl: path})) assert.Equal(t, path, libraryPath(&compute.Library{Jar: path})) assert.Equal(t, path, libraryPath(&compute.Library{Egg: path})) + assert.Equal(t, path, libraryPath(&compute.Library{Requirements: path})) assert.Equal(t, "", libraryPath(&compute.Library{})) } From f5df211320a5fad876c58737d959a0a034040c63 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 21 Aug 2024 18:23:54 +0530 Subject: [PATCH 07/11] Fix prefix preset used for UC schemas (#1704) ## Changes In https://github.com/databricks/cli/pull/1490 we regressed and started using the development mode prefix for UC schemas regardless of the mode of the bundle target. This PR fixes the regression and adds a regression test ## Tests Failing integration tests pass now. --- bundle/config/mutator/apply_presets.go | 3 +- bundle/config/mutator/apply_presets_test.go | 57 +++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/bundle/config/mutator/apply_presets.go b/bundle/config/mutator/apply_presets.go index 42e6ab95..28d015c1 100644 --- a/bundle/config/mutator/apply_presets.go +++ b/bundle/config/mutator/apply_presets.go @@ -155,8 +155,7 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos // Schemas: Prefix for i := range r.Schemas { - prefix = "dev_" + b.Config.Workspace.CurrentUser.ShortName + "_" - r.Schemas[i].Name = prefix + r.Schemas[i].Name + r.Schemas[i].Name = normalizePrefix(prefix) + r.Schemas[i].Name // HTTP API for schemas doesn't yet support tags. It's only supported in // the Databricks UI and via the SQL API. } diff --git a/bundle/config/mutator/apply_presets_test.go b/bundle/config/mutator/apply_presets_test.go index 35dac1f7..ab2478ae 100644 --- a/bundle/config/mutator/apply_presets_test.go +++ b/bundle/config/mutator/apply_presets_test.go @@ -8,6 +8,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/require" ) @@ -68,6 +69,62 @@ func TestApplyPresetsPrefix(t *testing.T) { } } +func TestApplyPresetsPrefixForUcSchema(t *testing.T) { + tests := []struct { + name string + prefix string + schema *resources.Schema + want string + }{ + { + name: "add prefix to schema", + prefix: "[prefix]", + schema: &resources.Schema{ + CreateSchema: &catalog.CreateSchema{ + Name: "schema1", + }, + }, + want: "prefix_schema1", + }, + { + name: "add empty prefix to schema", + prefix: "", + schema: &resources.Schema{ + CreateSchema: &catalog.CreateSchema{ + Name: "schema1", + }, + }, + want: "schema1", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Schemas: map[string]*resources.Schema{ + "schema1": tt.schema, + }, + }, + Presets: config.Presets{ + NamePrefix: tt.prefix, + }, + }, + } + + ctx := context.Background() + diag := bundle.Apply(ctx, b, mutator.ApplyPresets()) + + if diag.HasError() { + t.Fatalf("unexpected error: %v", diag) + } + + require.Equal(t, tt.want, b.Config.Resources.Schemas["schema1"].Name) + }) + } +} + func TestApplyPresetsTags(t *testing.T) { tests := []struct { name string From 6f345293b1e5f4febcd702da8a362b15b606ebd9 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 21 Aug 2024 17:05:49 +0200 Subject: [PATCH 08/11] Added filtering flags for cluster list commands (#1703) ## Changes Fixes #1701 ## Tests ``` Usage: databricks clusters list [flags] Flags: --cluster-sources []string Filter clusters by source --cluster-states []string Filter clusters by states -h, --help help for list --is-pinned Filter clusters by pinned status --page-size int Use this field to specify the maximum number of results to be returned by the server. --page-token string Use next_page_token or prev_page_token returned from the previous request to list the next or previous page of clusters respectively. --policy-id string Filter clusters by policy id ``` --- cmd/workspace/clusters/overrides.go | 68 ++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/cmd/workspace/clusters/overrides.go b/cmd/workspace/clusters/overrides.go index 55976d40..6038978a 100644 --- a/cmd/workspace/clusters/overrides.go +++ b/cmd/workspace/clusters/overrides.go @@ -1,17 +1,83 @@ package clusters import ( + "strings" + "github.com/databricks/cli/libs/cmdio" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/spf13/cobra" ) -func listOverride(listCmd *cobra.Command, _ *compute.ListClustersRequest) { +// Below we add overrides for filter flags for cluster list command to allow for custom filtering +// Auto generating such flags is not yet supported by the CLI generator +func listOverride(listCmd *cobra.Command, listReq *compute.ListClustersRequest) { listCmd.Annotations["headerTemplate"] = cmdio.Heredoc(` {{header "ID"}} {{header "Name"}} {{header "State"}}`) listCmd.Annotations["template"] = cmdio.Heredoc(` {{range .}}{{.ClusterId | green}} {{.ClusterName | cyan}} {{if eq .State "RUNNING"}}{{green "%s" .State}}{{else if eq .State "TERMINATED"}}{{red "%s" .State}}{{else}}{{blue "%s" .State}}{{end}} {{end}}`) + + listReq.FilterBy = &compute.ListClustersFilterBy{} + listCmd.Flags().BoolVar(&listReq.FilterBy.IsPinned, "is-pinned", false, "Filter clusters by pinned status") + listCmd.Flags().StringVar(&listReq.FilterBy.PolicyId, "policy-id", "", "Filter clusters by policy id") + + sources := &clusterSources{source: &listReq.FilterBy.ClusterSources} + listCmd.Flags().Var(sources, "cluster-sources", "Filter clusters by source") + + states := &clusterStates{state: &listReq.FilterBy.ClusterStates} + listCmd.Flags().Var(states, "cluster-states", "Filter clusters by states") +} + +type clusterSources struct { + source *[]compute.ClusterSource +} + +func (c *clusterSources) String() string { + s := make([]string, len(*c.source)) + for i, source := range *c.source { + s[i] = string(source) + } + + return strings.Join(s, ",") +} + +func (c *clusterSources) Set(value string) error { + splits := strings.Split(value, ",") + for _, split := range splits { + *c.source = append(*c.source, compute.ClusterSource(split)) + } + + return nil +} + +func (c *clusterSources) Type() string { + return "[]string" +} + +type clusterStates struct { + state *[]compute.State +} + +func (c *clusterStates) String() string { + s := make([]string, len(*c.state)) + for i, source := range *c.state { + s[i] = string(source) + } + + return strings.Join(s, ",") +} + +func (c *clusterStates) Set(value string) error { + splits := strings.Split(value, ",") + for _, split := range splits { + *c.state = append(*c.state, compute.State(split)) + } + + return nil +} + +func (c *clusterStates) Type() string { + return "[]string" } func listNodeTypesOverride(listNodeTypesCmd *cobra.Command) { From 6e8cd835a3f699ffec0c04e9301e3a49fd61fc9c Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 21 Aug 2024 17:33:25 +0200 Subject: [PATCH 09/11] Add paths field to bundle sync configuration (#1694) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Changes This field allows a user to configure paths to synchronize to the workspace. Allowed values are relative paths to files and directories anchored at the directory where the field is set. If one or more values traverse up the directory tree (to an ancestor of the bundle root directory), the CLI will dynamically determine the root path to use to ensure that the file tree structure remains intact. For example, given a `databricks.yml` in `my_bundle` that includes: ```yaml sync: paths: - ../common - . ``` Then upon synchronization, the workspace will look like: ``` . ├── common │ └── lib.py └── my_bundle ├── databricks.yml └── notebook.py ``` If not set behavior remains identical. ## Tests * Newly added unit tests for the mutators and under `bundle/tests`. * Manually confirmed a bundle without this configuration works the same. * Manually confirmed a bundle with this configuration works. --- bundle/bundle.go | 8 + bundle/bundle_read_only.go | 4 + bundle/config/mutator/configure_wsfs.go | 4 +- bundle/config/mutator/rewrite_sync_paths.go | 4 + .../config/mutator/rewrite_sync_paths_test.go | 16 ++ bundle/config/mutator/sync_default_path.go | 48 +++++ .../config/mutator/sync_default_path_test.go | 82 ++++++++ bundle/config/mutator/sync_infer_root.go | 120 +++++++++++ .../mutator/sync_infer_root_internal_test.go | 72 +++++++ bundle/config/mutator/sync_infer_root_test.go | 198 ++++++++++++++++++ bundle/config/mutator/trampoline.go | 2 +- bundle/config/mutator/trampoline_test.go | 8 +- bundle/config/mutator/translate_paths.go | 12 +- bundle/config/mutator/translate_paths_test.go | 60 +++--- bundle/config/sync.go | 4 + bundle/deploy/files/sync.go | 4 +- bundle/deploy/state_pull.go | 2 +- bundle/deploy/state_pull_test.go | 8 +- bundle/phases/initialize.go | 11 + bundle/python/conditional_transform_test.go | 22 +- bundle/tests/loader.go | 2 + bundle/tests/sync/paths/databricks.yml | 20 ++ .../tests/sync/paths_no_root/databricks.yml | 26 +++ .../sync/shared_code/bundle/databricks.yml | 10 + .../tests/sync/shared_code/common/library.txt | 1 + bundle/tests/sync_test.go | 65 ++++++ cmd/sync/sync_test.go | 6 +- 27 files changed, 760 insertions(+), 59 deletions(-) create mode 100644 bundle/config/mutator/sync_default_path.go create mode 100644 bundle/config/mutator/sync_default_path_test.go create mode 100644 bundle/config/mutator/sync_infer_root.go create mode 100644 bundle/config/mutator/sync_infer_root_internal_test.go create mode 100644 bundle/config/mutator/sync_infer_root_test.go create mode 100644 bundle/tests/sync/paths/databricks.yml create mode 100644 bundle/tests/sync/paths_no_root/databricks.yml create mode 100644 bundle/tests/sync/shared_code/bundle/databricks.yml create mode 100644 bundle/tests/sync/shared_code/common/library.txt diff --git a/bundle/bundle.go b/bundle/bundle.go index 032d98ab..8b5ff976 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -39,6 +39,14 @@ type Bundle struct { // Exclusively use this field for filesystem operations. BundleRoot vfs.Path + // SyncRoot is a virtual filesystem path to the root directory of the files that are synchronized to the workspace. + // It can be an ancestor to [BundleRoot], but not a descendant; that is, [SyncRoot] must contain [BundleRoot]. + SyncRoot vfs.Path + + // SyncRootPath is the local path to the root directory of files that are synchronized to the workspace. + // It is equal to `SyncRoot.Native()` and included as dedicated field for convenient access. + SyncRootPath string + Config config.Root // Metadata about the bundle deployment. This is the interface Databricks services diff --git a/bundle/bundle_read_only.go b/bundle/bundle_read_only.go index 59084f2a..74b9d94d 100644 --- a/bundle/bundle_read_only.go +++ b/bundle/bundle_read_only.go @@ -28,6 +28,10 @@ func (r ReadOnlyBundle) BundleRoot() vfs.Path { return r.b.BundleRoot } +func (r ReadOnlyBundle) SyncRoot() vfs.Path { + return r.b.SyncRoot +} + func (r ReadOnlyBundle) WorkspaceClient() *databricks.WorkspaceClient { return r.b.WorkspaceClient() } diff --git a/bundle/config/mutator/configure_wsfs.go b/bundle/config/mutator/configure_wsfs.go index c7b764f0..1d1bec58 100644 --- a/bundle/config/mutator/configure_wsfs.go +++ b/bundle/config/mutator/configure_wsfs.go @@ -24,7 +24,7 @@ func (m *configureWSFS) Name() string { } func (m *configureWSFS) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - root := b.BundleRoot.Native() + root := b.SyncRoot.Native() // The bundle root must be located in /Workspace/ if !strings.HasPrefix(root, "/Workspace/") { @@ -45,6 +45,6 @@ func (m *configureWSFS) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return diag.FromErr(err) } - b.BundleRoot = p + b.SyncRoot = p return nil } diff --git a/bundle/config/mutator/rewrite_sync_paths.go b/bundle/config/mutator/rewrite_sync_paths.go index cfdc55f3..888714ab 100644 --- a/bundle/config/mutator/rewrite_sync_paths.go +++ b/bundle/config/mutator/rewrite_sync_paths.go @@ -45,6 +45,10 @@ func (m *rewriteSyncPaths) makeRelativeTo(root string) dyn.MapFunc { func (m *rewriteSyncPaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { return dyn.Map(v, "sync", func(_ dyn.Path, v dyn.Value) (nv dyn.Value, err error) { + v, err = dyn.Map(v, "paths", dyn.Foreach(m.makeRelativeTo(b.RootPath))) + if err != nil { + return dyn.InvalidValue, err + } v, err = dyn.Map(v, "include", dyn.Foreach(m.makeRelativeTo(b.RootPath))) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/rewrite_sync_paths_test.go b/bundle/config/mutator/rewrite_sync_paths_test.go index 56ada19e..fa7f124b 100644 --- a/bundle/config/mutator/rewrite_sync_paths_test.go +++ b/bundle/config/mutator/rewrite_sync_paths_test.go @@ -17,6 +17,10 @@ func TestRewriteSyncPathsRelative(t *testing.T) { RootPath: ".", Config: config.Root{ Sync: config.Sync{ + Paths: []string{ + ".", + "../common", + }, Include: []string{ "foo", "bar", @@ -29,6 +33,8 @@ func TestRewriteSyncPathsRelative(t *testing.T) { }, } + bundletest.SetLocation(b, "sync.paths[0]", "./databricks.yml") + bundletest.SetLocation(b, "sync.paths[1]", "./databricks.yml") bundletest.SetLocation(b, "sync.include[0]", "./file.yml") bundletest.SetLocation(b, "sync.include[1]", "./a/file.yml") bundletest.SetLocation(b, "sync.exclude[0]", "./a/b/file.yml") @@ -37,6 +43,8 @@ func TestRewriteSyncPathsRelative(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths()) assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.Clean("."), b.Config.Sync.Paths[0]) + assert.Equal(t, filepath.Clean("../common"), b.Config.Sync.Paths[1]) assert.Equal(t, filepath.Clean("foo"), b.Config.Sync.Include[0]) assert.Equal(t, filepath.Clean("a/bar"), b.Config.Sync.Include[1]) assert.Equal(t, filepath.Clean("a/b/baz"), b.Config.Sync.Exclude[0]) @@ -48,6 +56,10 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { RootPath: "/tmp/dir", Config: config.Root{ Sync: config.Sync{ + Paths: []string{ + ".", + "../common", + }, Include: []string{ "foo", "bar", @@ -60,6 +72,8 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { }, } + bundletest.SetLocation(b, "sync.paths[0]", "/tmp/dir/databricks.yml") + bundletest.SetLocation(b, "sync.paths[1]", "/tmp/dir/databricks.yml") bundletest.SetLocation(b, "sync.include[0]", "/tmp/dir/file.yml") bundletest.SetLocation(b, "sync.include[1]", "/tmp/dir/a/file.yml") bundletest.SetLocation(b, "sync.exclude[0]", "/tmp/dir/a/b/file.yml") @@ -68,6 +82,8 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths()) assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.Clean("."), b.Config.Sync.Paths[0]) + assert.Equal(t, filepath.Clean("../common"), b.Config.Sync.Paths[1]) assert.Equal(t, filepath.Clean("foo"), b.Config.Sync.Include[0]) assert.Equal(t, filepath.Clean("a/bar"), b.Config.Sync.Include[1]) assert.Equal(t, filepath.Clean("a/b/baz"), b.Config.Sync.Exclude[0]) diff --git a/bundle/config/mutator/sync_default_path.go b/bundle/config/mutator/sync_default_path.go new file mode 100644 index 00000000..8e14ce20 --- /dev/null +++ b/bundle/config/mutator/sync_default_path.go @@ -0,0 +1,48 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type syncDefaultPath struct{} + +// SyncDefaultPath configures the default sync path to be equal to the bundle root. +func SyncDefaultPath() bundle.Mutator { + return &syncDefaultPath{} +} + +func (m *syncDefaultPath) Name() string { + return "SyncDefaultPath" +} + +func (m *syncDefaultPath) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + isset := false + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + pv, _ := dyn.Get(v, "sync.paths") + + // If the sync paths field is already set, do nothing. + // We know it is set if its value is either a nil or a sequence (empty or not). + switch pv.Kind() { + case dyn.KindNil, dyn.KindSequence: + isset = true + } + + return v, nil + }) + if err != nil { + return diag.FromErr(err) + } + + // If the sync paths field is already set, do nothing. + if isset { + return nil + } + + // Set the sync paths to the default value. + b.Config.Sync.Paths = []string{"."} + return nil +} diff --git a/bundle/config/mutator/sync_default_path_test.go b/bundle/config/mutator/sync_default_path_test.go new file mode 100644 index 00000000..a37e913d --- /dev/null +++ b/bundle/config/mutator/sync_default_path_test.go @@ -0,0 +1,82 @@ +package mutator_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSyncDefaultPath_DefaultIfUnset(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{}, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncDefaultPath()) + require.NoError(t, diags.Error()) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) +} + +func TestSyncDefaultPath_SkipIfSet(t *testing.T) { + tcases := []struct { + name string + paths dyn.Value + expect []string + }{ + { + name: "nil", + paths: dyn.V(nil), + expect: nil, + }, + { + name: "empty sequence", + paths: dyn.V([]dyn.Value{}), + expect: []string{}, + }, + { + name: "non-empty sequence", + paths: dyn.V([]dyn.Value{dyn.V("something")}), + expect: []string{"something"}, + }, + } + + for _, tcase := range tcases { + t.Run(tcase.name, func(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{}, + } + + diags := bundle.ApplyFunc(context.Background(), b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + v, err := dyn.Set(v, "sync", dyn.V(dyn.NewMapping())) + if err != nil { + return dyn.InvalidValue, err + } + v, err = dyn.Set(v, "sync.paths", tcase.paths) + if err != nil { + return dyn.InvalidValue, err + } + return v, nil + }) + return diag.FromErr(err) + }) + require.NoError(t, diags.Error()) + + ctx := context.Background() + diags = bundle.Apply(ctx, b, mutator.SyncDefaultPath()) + require.NoError(t, diags.Error()) + + // If the sync paths field is already set, do nothing. + assert.Equal(t, tcase.expect, b.Config.Sync.Paths) + }) + } +} diff --git a/bundle/config/mutator/sync_infer_root.go b/bundle/config/mutator/sync_infer_root.go new file mode 100644 index 00000000..012acf80 --- /dev/null +++ b/bundle/config/mutator/sync_infer_root.go @@ -0,0 +1,120 @@ +package mutator + +import ( + "context" + "fmt" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" +) + +type syncInferRoot struct{} + +// SyncInferRoot is a mutator that infers the root path of all files to synchronize by looking at the +// paths in the sync configuration. The sync root may be different from the bundle root +// when the user intends to synchronize files outside the bundle root. +// +// The sync root can be equivalent to or an ancestor of the bundle root, but not a descendant. +// That is, the sync root must contain the bundle root. +// +// This mutator requires all sync-related paths and patterns to be relative to the bundle root path. +// This is done by the [RewriteSyncPaths] mutator, which must run before this mutator. +func SyncInferRoot() bundle.Mutator { + return &syncInferRoot{} +} + +func (m *syncInferRoot) Name() string { + return "SyncInferRoot" +} + +// computeRoot finds the innermost path that contains the specified path. +// It traverses up the root path until it finds the innermost path. +// If the path does not exist, it returns an empty string. +// +// See "sync_infer_root_internal_test.go" for examples. +func (m *syncInferRoot) computeRoot(path string, root string) string { + for !filepath.IsLocal(path) { + // Break if we have reached the root of the filesystem. + dir := filepath.Dir(root) + if dir == root { + return "" + } + + // Update the sync path as we navigate up the directory tree. + path = filepath.Join(filepath.Base(root), path) + + // Move up the directory tree. + root = dir + } + + return filepath.Clean(root) +} + +func (m *syncInferRoot) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + + // Use the bundle root path as the starting point for inferring the sync root path. + bundleRootPath := filepath.Clean(b.RootPath) + + // Infer the sync root path by looking at each one of the sync paths. + // Every sync path must be a descendant of the final sync root path. + syncRootPath := bundleRootPath + for _, path := range b.Config.Sync.Paths { + computedPath := m.computeRoot(path, bundleRootPath) + if computedPath == "" { + continue + } + + // Update sync root path if the computed root path is an ancestor of the current sync root path. + if len(computedPath) < len(syncRootPath) { + syncRootPath = computedPath + } + } + + // The new sync root path can only be an ancestor of the previous root path. + // Compute the relative path from the sync root to the bundle root. + rel, err := filepath.Rel(syncRootPath, bundleRootPath) + if err != nil { + return diag.FromErr(err) + } + + // If during computation of the sync root path we hit the root of the filesystem, + // then one or more of the sync paths are outside the filesystem. + // Check if this happened by verifying that none of the paths escape the root + // when joined with the sync root path. + for i, path := range b.Config.Sync.Paths { + if filepath.IsLocal(filepath.Join(rel, path)) { + continue + } + + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("invalid sync path %q", path), + Locations: b.Config.GetLocations(fmt.Sprintf("sync.paths[%d]", i)), + Paths: []dyn.Path{dyn.NewPath(dyn.Key("sync"), dyn.Key("paths"), dyn.Index(i))}, + }) + } + + if diags.HasError() { + return diags + } + + // Update all paths in the sync configuration to be relative to the sync root. + for i, p := range b.Config.Sync.Paths { + b.Config.Sync.Paths[i] = filepath.Join(rel, p) + } + for i, p := range b.Config.Sync.Include { + b.Config.Sync.Include[i] = filepath.Join(rel, p) + } + for i, p := range b.Config.Sync.Exclude { + b.Config.Sync.Exclude[i] = filepath.Join(rel, p) + } + + // Configure the sync root path. + b.SyncRoot = vfs.MustNew(syncRootPath) + b.SyncRootPath = syncRootPath + return nil +} diff --git a/bundle/config/mutator/sync_infer_root_internal_test.go b/bundle/config/mutator/sync_infer_root_internal_test.go new file mode 100644 index 00000000..9ab9c88f --- /dev/null +++ b/bundle/config/mutator/sync_infer_root_internal_test.go @@ -0,0 +1,72 @@ +package mutator + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSyncInferRootInternal_ComputeRoot(t *testing.T) { + s := syncInferRoot{} + + tcases := []struct { + path string + root string + out string + }{ + { + // Test that "." doesn't change the root. + path: ".", + root: "/tmp/some/dir", + out: "/tmp/some/dir", + }, + { + // Test that a subdirectory doesn't change the root. + path: "sub", + root: "/tmp/some/dir", + out: "/tmp/some/dir", + }, + { + // Test that a parent directory changes the root. + path: "../common", + root: "/tmp/some/dir", + out: "/tmp/some", + }, + { + // Test that a deeply nested parent directory changes the root. + path: "../../../../../../common", + root: "/tmp/some/dir/that/is/very/deeply/nested", + out: "/tmp/some", + }, + { + // Test that a parent directory changes the root at the filesystem root boundary. + path: "../common", + root: "/tmp", + out: "/", + }, + { + // Test that an invalid parent directory doesn't change the root and returns an empty string. + path: "../common", + root: "/", + out: "", + }, + { + // Test that the returned path is cleaned even if the root doesn't change. + path: "sub", + root: "/tmp/some/../dir", + out: "/tmp/dir", + }, + { + // Test that a relative root path also works. + path: "../common", + root: "foo/bar", + out: "foo", + }, + } + + for _, tc := range tcases { + out := s.computeRoot(tc.path, tc.root) + assert.Equal(t, tc.out, filepath.ToSlash(out)) + } +} diff --git a/bundle/config/mutator/sync_infer_root_test.go b/bundle/config/mutator/sync_infer_root_test.go new file mode 100644 index 00000000..383e5676 --- /dev/null +++ b/bundle/config/mutator/sync_infer_root_test.go @@ -0,0 +1,198 @@ +package mutator_test + +import ( + "context" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSyncInferRoot_NominalAbsolute(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + ".", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.FromSlash("/tmp/some/dir"), b.SyncRootPath) + + // Check that the paths are unchanged. + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) + assert.Equal(t, []string{"foo", "bar"}, b.Config.Sync.Include) + assert.Equal(t, []string{"baz", "qux"}, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_NominalRelative(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "./some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + ".", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.FromSlash("some/dir"), b.SyncRootPath) + + // Check that the paths are unchanged. + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) + assert.Equal(t, []string{"foo", "bar"}, b.Config.Sync.Include) + assert.Equal(t, []string{"baz", "qux"}, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_ParentDirectory(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "../common", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.FromSlash("/tmp/some"), b.SyncRootPath) + + // Check that the paths are updated. + assert.Equal(t, []string{"common"}, b.Config.Sync.Paths) + assert.Equal(t, []string{filepath.FromSlash("dir/foo"), filepath.FromSlash("dir/bar")}, b.Config.Sync.Include) + assert.Equal(t, []string{filepath.FromSlash("dir/baz"), filepath.FromSlash("dir/qux")}, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_ManyParentDirectories(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir/that/is/very/deeply/nested", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "../../../../../../common", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.FromSlash("/tmp/some"), b.SyncRootPath) + + // Check that the paths are updated. + assert.Equal(t, []string{"common"}, b.Config.Sync.Paths) + assert.Equal(t, []string{ + filepath.FromSlash("dir/that/is/very/deeply/nested/foo"), + filepath.FromSlash("dir/that/is/very/deeply/nested/bar"), + }, b.Config.Sync.Include) + assert.Equal(t, []string{ + filepath.FromSlash("dir/that/is/very/deeply/nested/baz"), + filepath.FromSlash("dir/that/is/very/deeply/nested/qux"), + }, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_MultiplePaths(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/bundle/root", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "./foo", + "../common", + "./bar", + "../../baz", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.FromSlash("/tmp/some"), b.SyncRootPath) + + // Check that the paths are updated. + assert.Equal(t, filepath.FromSlash("bundle/root/foo"), b.Config.Sync.Paths[0]) + assert.Equal(t, filepath.FromSlash("bundle/common"), b.Config.Sync.Paths[1]) + assert.Equal(t, filepath.FromSlash("bundle/root/bar"), b.Config.Sync.Paths[2]) + assert.Equal(t, filepath.FromSlash("baz"), b.Config.Sync.Paths[3]) +} + +func TestSyncInferRoot_Error(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "../../../../error", + "../../../thisworks", + "../../../../../error", + }, + }, + }, + } + + bundletest.SetLocation(b, "sync.paths", "databricks.yml") + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + require.Len(t, diags, 2) + assert.Equal(t, `invalid sync path "../../../../error"`, diags[0].Summary) + assert.Equal(t, "databricks.yml:0:0", diags[0].Locations[0].String()) + assert.Equal(t, "sync.paths[0]", diags[0].Paths[0].String()) + assert.Equal(t, `invalid sync path "../../../../../error"`, diags[1].Summary) + assert.Equal(t, "databricks.yml:0:0", diags[1].Locations[0].String()) + assert.Equal(t, "sync.paths[2]", diags[1].Paths[0].String()) +} diff --git a/bundle/config/mutator/trampoline.go b/bundle/config/mutator/trampoline.go index dde9a299..dcca5014 100644 --- a/bundle/config/mutator/trampoline.go +++ b/bundle/config/mutator/trampoline.go @@ -82,7 +82,7 @@ func (m *trampoline) generateNotebookWrapper(ctx context.Context, b *bundle.Bund return err } - internalDirRel, err := filepath.Rel(b.RootPath, internalDir) + internalDirRel, err := filepath.Rel(b.SyncRootPath, internalDir) if err != nil { return err } diff --git a/bundle/config/mutator/trampoline_test.go b/bundle/config/mutator/trampoline_test.go index de395c16..08d3c822 100644 --- a/bundle/config/mutator/trampoline_test.go +++ b/bundle/config/mutator/trampoline_test.go @@ -56,8 +56,12 @@ func TestGenerateTrampoline(t *testing.T) { } b := &bundle.Bundle{ - RootPath: tmpDir, + RootPath: filepath.Join(tmpDir, "parent", "my_bundle"), + SyncRootPath: filepath.Join(tmpDir, "parent"), Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/Workspace/files", + }, Bundle: config.Bundle{ Target: "development", }, @@ -89,6 +93,6 @@ func TestGenerateTrampoline(t *testing.T) { require.Equal(t, "Hello from Trampoline", string(bytes)) task := b.Config.Resources.Jobs["test"].Tasks[0] - require.Equal(t, task.NotebookTask.NotebookPath, ".databricks/bundle/development/.internal/notebook_test_to_trampoline") + require.Equal(t, "/Workspace/files/my_bundle/.databricks/bundle/development/.internal/notebook_test_to_trampoline", task.NotebookTask.NotebookPath) require.Nil(t, task.PythonWheelTask) } diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 28f7d3d3..5f22570e 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -93,14 +93,14 @@ func (t *translateContext) rewritePath( return nil } - // Local path must be contained in the bundle root. + // Local path must be contained in the sync root. // If it isn't, it won't be synchronized into the workspace. - localRelPath, err := filepath.Rel(t.b.RootPath, localPath) + localRelPath, err := filepath.Rel(t.b.SyncRootPath, localPath) if err != nil { return err } if strings.HasPrefix(localRelPath, "..") { - return fmt.Errorf("path %s is not contained in bundle root path", localPath) + return fmt.Errorf("path %s is not contained in sync root path", localPath) } // Prefix remote path with its remote root path. @@ -118,7 +118,7 @@ func (t *translateContext) rewritePath( } func (t *translateContext) translateNotebookPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.BundleRoot, filepath.ToSlash(localRelPath)) + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("notebook %s not found", literal) } @@ -134,7 +134,7 @@ func (t *translateContext) translateNotebookPath(literal, localFullPath, localRe } func (t *translateContext) translateFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.BundleRoot, filepath.ToSlash(localRelPath)) + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } @@ -148,7 +148,7 @@ func (t *translateContext) translateFilePath(literal, localFullPath, localRelPat } func (t *translateContext) translateDirectoryPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - info, err := t.b.BundleRoot.Stat(filepath.ToSlash(localRelPath)) + info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) if err != nil { return "", err } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index fd64593b..50fcd3b0 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -41,8 +41,8 @@ func touchEmptyFile(t *testing.T, path string) { func TestTranslatePathsSkippedWithGitSource(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -113,8 +113,8 @@ func TestTranslatePaths(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "requirements.txt")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -289,8 +289,8 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "job", "my_dbt_project", "dbt_project.yml")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -380,12 +380,12 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { ) } -func TestTranslatePathsOutsideBundleRoot(t *testing.T) { +func TestTranslatePathsOutsideSyncRoot(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -411,15 +411,15 @@ func TestTranslatePathsOutsideBundleRoot(t *testing.T) { bundletest.SetLocation(b, ".", filepath.Join(dir, "../resource.yml")) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) - assert.ErrorContains(t, diags.Error(), "is not contained in bundle root") + assert.ErrorContains(t, diags.Error(), "is not contained in sync root path") } func TestJobNotebookDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -449,8 +449,8 @@ func TestJobFileDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -480,8 +480,8 @@ func TestPipelineNotebookDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ @@ -511,8 +511,8 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ @@ -543,8 +543,8 @@ func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) { touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -578,8 +578,8 @@ func TestJobNotebookTaskWithFileSourceError(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "my_file.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -613,8 +613,8 @@ func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "my_file.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -648,8 +648,8 @@ func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) { touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -684,8 +684,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "env2.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -724,8 +724,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { func TestTranslatePathWithComplexVariables(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Variables: map[string]*variable.Variable{ "cluster_libraries": { diff --git a/bundle/config/sync.go b/bundle/config/sync.go index 0580e4c4..377b1333 100644 --- a/bundle/config/sync.go +++ b/bundle/config/sync.go @@ -1,6 +1,10 @@ package config type Sync struct { + // Paths contains a list of paths to synchronize relative to the bundle root path. + // If not configured, this defaults to synchronizing everything in the bundle root path (i.e. `.`). + Paths []string `json:"paths,omitempty"` + // Include contains a list of globs evaluated relative to the bundle root path // to explicitly include files that were excluded by the user's gitignore. Include []string `json:"include,omitempty"` diff --git a/bundle/deploy/files/sync.go b/bundle/deploy/files/sync.go index dc45053f..347ed307 100644 --- a/bundle/deploy/files/sync.go +++ b/bundle/deploy/files/sync.go @@ -28,8 +28,8 @@ func GetSyncOptions(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.SyncOp } opts := &sync.SyncOptions{ - LocalRoot: rb.BundleRoot(), - Paths: []string{"."}, + LocalRoot: rb.SyncRoot(), + Paths: rb.Config().Sync.Paths, Include: includes, Exclude: rb.Config().Sync.Exclude, diff --git a/bundle/deploy/state_pull.go b/bundle/deploy/state_pull.go index 24ed9d36..5e301a6f 100644 --- a/bundle/deploy/state_pull.go +++ b/bundle/deploy/state_pull.go @@ -85,7 +85,7 @@ func (s *statePull) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic } log.Infof(ctx, "Creating new snapshot") - snapshot, err := sync.NewSnapshot(state.Files.ToSlice(b.BundleRoot), opts) + snapshot, err := sync.NewSnapshot(state.Files.ToSlice(b.SyncRoot), opts) if err != nil { return diag.FromErr(err) } diff --git a/bundle/deploy/state_pull_test.go b/bundle/deploy/state_pull_test.go index 38f0b402..f7519306 100644 --- a/bundle/deploy/state_pull_test.go +++ b/bundle/deploy/state_pull_test.go @@ -64,6 +64,10 @@ func testStatePull(t *testing.T, opts statePullOpts) { b := &bundle.Bundle{ RootPath: tmpDir, BundleRoot: vfs.MustNew(tmpDir), + + SyncRootPath: tmpDir, + SyncRoot: vfs.MustNew(tmpDir), + Config: config.Root{ Bundle: config.Bundle{ Target: "default", @@ -81,11 +85,11 @@ func testStatePull(t *testing.T, opts statePullOpts) { ctx := context.Background() for _, file := range opts.localFiles { - testutil.Touch(t, b.RootPath, "bar", file) + testutil.Touch(t, b.SyncRootPath, "bar", file) } for _, file := range opts.localNotebooks { - testutil.TouchNotebook(t, b.RootPath, "bar", file) + testutil.TouchNotebook(t, b.SyncRootPath, "bar", file) } if opts.withExistingSnapshot { diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 7a1081de..8039a4f1 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -21,7 +21,18 @@ func Initialize() bundle.Mutator { "initialize", []bundle.Mutator{ validate.AllResourcesHaveValues(), + + // Update all path fields in the sync block to be relative to the bundle root path. mutator.RewriteSyncPaths(), + + // Configure the default sync path to equal the bundle root if not explicitly configured. + // By default, this means all files in the bundle root directory are synchronized. + mutator.SyncDefaultPath(), + + // Figure out if the sync root path is identical or an ancestor of the bundle root path. + // If it is an ancestor, this updates all paths to be relative to the sync root path. + mutator.SyncInferRoot(), + mutator.MergeJobClusters(), mutator.MergeJobParameters(), mutator.MergeJobTasks(), diff --git a/bundle/python/conditional_transform_test.go b/bundle/python/conditional_transform_test.go index 677970d7..1d397f7a 100644 --- a/bundle/python/conditional_transform_test.go +++ b/bundle/python/conditional_transform_test.go @@ -2,7 +2,6 @@ package python import ( "context" - "path" "path/filepath" "testing" @@ -18,11 +17,15 @@ func TestNoTransformByDefault(t *testing.T) { tmpDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tmpDir, + RootPath: filepath.Join(tmpDir, "parent", "my_bundle"), + SyncRootPath: filepath.Join(tmpDir, "parent"), Config: config.Root{ Bundle: config.Bundle{ Target: "development", }, + Workspace: config.Workspace{ + FilePath: "/Workspace/files", + }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ "job1": { @@ -63,11 +66,15 @@ func TestTransformWithExperimentalSettingSetToTrue(t *testing.T) { tmpDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tmpDir, + RootPath: filepath.Join(tmpDir, "parent", "my_bundle"), + SyncRootPath: filepath.Join(tmpDir, "parent"), Config: config.Root{ Bundle: config.Bundle{ Target: "development", }, + Workspace: config.Workspace{ + FilePath: "/Workspace/files", + }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ "job1": { @@ -102,14 +109,7 @@ func TestTransformWithExperimentalSettingSetToTrue(t *testing.T) { task := b.Config.Resources.Jobs["job1"].Tasks[0] require.Nil(t, task.PythonWheelTask) require.NotNil(t, task.NotebookTask) - - dir, err := b.InternalDir(context.Background()) - require.NoError(t, err) - - internalDirRel, err := filepath.Rel(b.RootPath, dir) - require.NoError(t, err) - - require.Equal(t, path.Join(filepath.ToSlash(internalDirRel), "notebook_job1_key1"), task.NotebookTask.NotebookPath) + require.Equal(t, "/Workspace/files/my_bundle/.databricks/bundle/development/.internal/notebook_job1_key1", task.NotebookTask.NotebookPath) require.Len(t, task.Libraries, 1) require.Equal(t, "/Workspace/Users/test@test.com/bundle/dist/test.jar", task.Libraries[0].Jar) diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index 848132a1..5c48d81c 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -40,6 +40,8 @@ func loadTargetWithDiags(path, env string) (*bundle.Bundle, diag.Diagnostics) { diags := bundle.Apply(ctx, b, bundle.Seq( phases.LoadNamedTarget(env), mutator.RewriteSyncPaths(), + mutator.SyncDefaultPath(), + mutator.SyncInferRoot(), mutator.MergeJobClusters(), mutator.MergeJobParameters(), mutator.MergeJobTasks(), diff --git a/bundle/tests/sync/paths/databricks.yml b/bundle/tests/sync/paths/databricks.yml new file mode 100644 index 00000000..9ef6fa03 --- /dev/null +++ b/bundle/tests/sync/paths/databricks.yml @@ -0,0 +1,20 @@ +bundle: + name: sync_paths + +workspace: + host: https://acme.cloud.databricks.com/ + +sync: + paths: + - src + +targets: + development: + sync: + paths: + - development + + staging: + sync: + paths: + - staging diff --git a/bundle/tests/sync/paths_no_root/databricks.yml b/bundle/tests/sync/paths_no_root/databricks.yml new file mode 100644 index 00000000..df15b12b --- /dev/null +++ b/bundle/tests/sync/paths_no_root/databricks.yml @@ -0,0 +1,26 @@ +bundle: + name: sync_paths + +workspace: + host: https://acme.cloud.databricks.com/ + +targets: + development: + sync: + paths: + - development + + staging: + sync: + paths: + - staging + + undefined: ~ + + nil: + sync: + paths: ~ + + empty: + sync: + paths: [] diff --git a/bundle/tests/sync/shared_code/bundle/databricks.yml b/bundle/tests/sync/shared_code/bundle/databricks.yml new file mode 100644 index 00000000..738b6170 --- /dev/null +++ b/bundle/tests/sync/shared_code/bundle/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: shared_code + +workspace: + host: https://acme.cloud.databricks.com/ + +sync: + paths: + - "../common" + - "." diff --git a/bundle/tests/sync/shared_code/common/library.txt b/bundle/tests/sync/shared_code/common/library.txt new file mode 100644 index 00000000..83b32384 --- /dev/null +++ b/bundle/tests/sync/shared_code/common/library.txt @@ -0,0 +1 @@ +Placeholder for files to be deployed as part of multiple bundles. diff --git a/bundle/tests/sync_test.go b/bundle/tests/sync_test.go index d08e889c..15644b67 100644 --- a/bundle/tests/sync_test.go +++ b/bundle/tests/sync_test.go @@ -12,14 +12,20 @@ func TestSyncOverride(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/override", "development") + assert.Equal(t, filepath.FromSlash("sync/override"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("src/*"), filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override", "staging") + assert.Equal(t, filepath.FromSlash("sync/override"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("src/*"), filepath.FromSlash("fixtures/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override", "prod") + assert.Equal(t, filepath.FromSlash("sync/override"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("src/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) } @@ -28,14 +34,20 @@ func TestSyncOverrideNoRootSync(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/override_no_root", "development") + assert.Equal(t, filepath.FromSlash("sync/override_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override_no_root", "staging") + assert.Equal(t, filepath.FromSlash("sync/override_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("fixtures/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override_no_root", "prod") + assert.Equal(t, filepath.FromSlash("sync/override_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) } @@ -44,10 +56,14 @@ func TestSyncNil(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/nil", "development") + assert.Equal(t, filepath.FromSlash("sync/nil"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.Nil(t, b.Config.Sync.Include) assert.Nil(t, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/nil", "staging") + assert.Equal(t, filepath.FromSlash("sync/nil"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) } @@ -56,10 +72,59 @@ func TestSyncNilRoot(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/nil_root", "development") + assert.Equal(t, filepath.FromSlash("sync/nil_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.Nil(t, b.Config.Sync.Include) assert.Nil(t, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/nil_root", "staging") + assert.Equal(t, filepath.FromSlash("sync/nil_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) } + +func TestSyncPaths(t *testing.T) { + var b *bundle.Bundle + + b = loadTarget(t, "./sync/paths", "development") + assert.Equal(t, filepath.FromSlash("sync/paths"), b.SyncRootPath) + assert.Equal(t, []string{"src", "development"}, b.Config.Sync.Paths) + + b = loadTarget(t, "./sync/paths", "staging") + assert.Equal(t, filepath.FromSlash("sync/paths"), b.SyncRootPath) + assert.Equal(t, []string{"src", "staging"}, b.Config.Sync.Paths) +} + +func TestSyncPathsNoRoot(t *testing.T) { + var b *bundle.Bundle + + b = loadTarget(t, "./sync/paths_no_root", "development") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) + assert.ElementsMatch(t, []string{"development"}, b.Config.Sync.Paths) + + b = loadTarget(t, "./sync/paths_no_root", "staging") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) + assert.ElementsMatch(t, []string{"staging"}, b.Config.Sync.Paths) + + // If not set at all, it defaults to "." + b = loadTarget(t, "./sync/paths_no_root", "undefined") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) + + // If set to nil, it won't sync anything. + b = loadTarget(t, "./sync/paths_no_root", "nil") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) + assert.Len(t, b.Config.Sync.Paths, 0) + + // If set to an empty sequence, it won't sync anything. + b = loadTarget(t, "./sync/paths_no_root", "empty") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) + assert.Len(t, b.Config.Sync.Paths, 0) +} + +func TestSyncSharedCode(t *testing.T) { + b := loadTarget(t, "./sync/shared_code/bundle", "default") + assert.Equal(t, filepath.FromSlash("sync/shared_code"), b.SyncRootPath) + assert.ElementsMatch(t, []string{"common", "bundle"}, b.Config.Sync.Paths) +} diff --git a/cmd/sync/sync_test.go b/cmd/sync/sync_test.go index 0d0c5738..bd03eec9 100644 --- a/cmd/sync/sync_test.go +++ b/cmd/sync/sync_test.go @@ -17,8 +17,10 @@ import ( func TestSyncOptionsFromBundle(t *testing.T) { tempDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tempDir, - BundleRoot: vfs.MustNew(tempDir), + RootPath: tempDir, + BundleRoot: vfs.MustNew(tempDir), + SyncRootPath: tempDir, + SyncRoot: vfs.MustNew(tempDir), Config: config.Root{ Bundle: config.Bundle{ Target: "default", From 35e48be81c634e50164edcfb086c362e948ca57e Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 22 Aug 2024 10:44:22 +0200 Subject: [PATCH 10/11] [Release] Release v0.227.0 (#1705) CLI: * Added filtering flags for cluster list commands ([#1703](https://github.com/databricks/cli/pull/1703)). Bundles: * Remove reference to "dbt" in the default-sql template ([#1696](https://github.com/databricks/cli/pull/1696)). * Pause continuous pipelines when 'mode: development' is used ([#1590](https://github.com/databricks/cli/pull/1590)). * Add configurable presets for name prefixes, tags, etc. ([#1490](https://github.com/databricks/cli/pull/1490)). * Report all empty resources present in error diagnostic ([#1685](https://github.com/databricks/cli/pull/1685)). * Improves detection of PyPI package names in environment dependencies ([#1699](https://github.com/databricks/cli/pull/1699)). * [DAB] Add support for requirements libraries in Job Tasks ([#1543](https://github.com/databricks/cli/pull/1543)). * Add paths field to bundle sync configuration ([#1694](https://github.com/databricks/cli/pull/1694)). Internal: * Add `import` option for PyDABs ([#1693](https://github.com/databricks/cli/pull/1693)). * Make fileset take optional list of paths to list ([#1684](https://github.com/databricks/cli/pull/1684)). * Pass through paths argument to libs/sync ([#1689](https://github.com/databricks/cli/pull/1689)). * Correctly mark package names with versions as remote libraries ([#1697](https://github.com/databricks/cli/pull/1697)). * Share test initializer in common helper function ([#1695](https://github.com/databricks/cli/pull/1695)). * Make `pydabs/venv_path` optional ([#1687](https://github.com/databricks/cli/pull/1687)). * Use API mocks for duplicate path errors in workspace files extensions client ([#1690](https://github.com/databricks/cli/pull/1690)). * Fix prefix preset used for UC schemas ([#1704](https://github.com/databricks/cli/pull/1704)). --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39960e30..88a62d09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Version changelog +## [Release] Release v0.227.0 + +CLI: + * Added filtering flags for cluster list commands ([#1703](https://github.com/databricks/cli/pull/1703)). + +Bundles: + * Allow users to configure paths (including outside of the bundle root) to synchronize to the workspace. ([#1694](https://github.com/databricks/cli/pull/1694)). + * Add configurable presets for name prefixes, tags, etc. ([#1490](https://github.com/databricks/cli/pull/1490)). + * Add support for requirements libraries in Job Tasks ([#1543](https://github.com/databricks/cli/pull/1543)). + * Remove reference to "dbt" in the default-sql template ([#1696](https://github.com/databricks/cli/pull/1696)). + * Pause continuous pipelines when 'mode: development' is used ([#1590](https://github.com/databricks/cli/pull/1590)). + * Report all empty resources present in error diagnostic ([#1685](https://github.com/databricks/cli/pull/1685)). + * Improves detection of PyPI package names in environment dependencies ([#1699](https://github.com/databricks/cli/pull/1699)). + +Internal: + * Add `import` option for PyDABs ([#1693](https://github.com/databricks/cli/pull/1693)). + * Make fileset take optional list of paths to list ([#1684](https://github.com/databricks/cli/pull/1684)). + * Pass through paths argument to libs/sync ([#1689](https://github.com/databricks/cli/pull/1689)). + * Correctly mark package names with versions as remote libraries ([#1697](https://github.com/databricks/cli/pull/1697)). + * Share test initializer in common helper function ([#1695](https://github.com/databricks/cli/pull/1695)). + * Make `pydabs/venv_path` optional ([#1687](https://github.com/databricks/cli/pull/1687)). + * Use API mocks for duplicate path errors in workspace files extensions client ([#1690](https://github.com/databricks/cli/pull/1690)). + * Fix prefix preset used for UC schemas ([#1704](https://github.com/databricks/cli/pull/1704)). + + + ## [Release] Release v0.226.0 CLI: From 7fe08c2386edfa503985d93b1e6f633aa85e4f74 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Thu, 22 Aug 2024 20:34:26 +0530 Subject: [PATCH 11/11] Revert hc-install version to 0.7.0 (#1711) ## Changes With hc-install version `0.8.0` there was a regression where debug logs would be leaked into stderr. Reported upstream in https://github.com/hashicorp/hc-install/issues/239. Meanwhile we need to revert and pin to version`0.7.0`. This PR also includes a regression test. ## Tests Regression test. --- go.mod | 3 +-- go.sum | 8 ++------ internal/bundle/deploy_test.go | 31 +++++++++++++++++++++++++++++++ internal/bundle/helpers.go | 29 +++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 1457a4d6..838a45f3 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/ghodss/yaml v1.0.0 // MIT + NOTICE github.com/google/uuid v1.6.0 // BSD-3-Clause github.com/hashicorp/go-version v1.7.0 // MPL 2.0 - github.com/hashicorp/hc-install v0.8.0 // MPL 2.0 + github.com/hashicorp/hc-install v0.7.0 // MPL 2.0 github.com/hashicorp/terraform-exec v0.21.0 // MPL 2.0 github.com/hashicorp/terraform-json v0.22.1 // MPL 2.0 github.com/manifoldco/promptui v0.9.0 // BSD-3-Clause @@ -49,7 +49,6 @@ require ( github.com/google/s2a-go v0.1.7 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect - github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect diff --git a/go.sum b/go.sum index b2985955..f55f329f 100644 --- a/go.sum +++ b/go.sum @@ -99,14 +99,10 @@ github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= -github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= -github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= -github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= -github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= -github.com/hashicorp/hc-install v0.8.0 h1:LdpZeXkZYMQhoKPCecJHlKvUkQFixN/nvyR1CdfOLjI= -github.com/hashicorp/hc-install v0.8.0/go.mod h1:+MwJYjDfCruSD/udvBmRB22Nlkwwkwf5sAB6uTIhSaU= +github.com/hashicorp/hc-install v0.7.0 h1:Uu9edVqjKQxxuD28mR5TikkKDd/p55S8vzPC1659aBk= +github.com/hashicorp/hc-install v0.7.0/go.mod h1:ELmmzZlGnEcqoUMKUuykHaPCIR1sYLYX+KSggWSKZuA= github.com/hashicorp/terraform-exec v0.21.0 h1:uNkLAe95ey5Uux6KJdua6+cv8asgILFVWkd/RG0D2XQ= github.com/hashicorp/terraform-exec v0.21.0/go.mod h1:1PPeMYou+KDUSSeRE9szMZ/oHf4fYUmB923Wzbq1ICg= github.com/hashicorp/terraform-json v0.22.1 h1:xft84GZR0QzjPVWs4lRUwvTcPnegqlyS7orfb5Ltvec= diff --git a/internal/bundle/deploy_test.go b/internal/bundle/deploy_test.go index 3da88570..269b7c80 100644 --- a/internal/bundle/deploy_test.go +++ b/internal/bundle/deploy_test.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/internal" "github.com/databricks/cli/internal/acc" + "github.com/databricks/cli/libs/env" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/service/catalog" @@ -123,3 +124,33 @@ func TestAccBundleDeployUcSchemaFailsWithoutAutoApprove(t *testing.T) { assert.EqualError(t, err, root.ErrAlreadyPrinted.Error()) assert.Contains(t, stdout.String(), "the deployment requires destructive actions, but current console does not support prompting. Please specify --auto-approve if you would like to skip prompts and proceed") } + +func TestAccDeployBasicBundleLogs(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + nodeTypeId := internal.GetNodeTypeId(env.Get(ctx, "CLOUD_ENV")) + uniqueId := uuid.New().String() + root, err := initTestTemplate(t, ctx, "basic", map[string]any{ + "unique_id": uniqueId, + "node_type_id": nodeTypeId, + "spark_version": defaultSparkVersion, + }) + require.NoError(t, err) + + t.Cleanup(func() { + err = destroyBundle(t, ctx, root) + require.NoError(t, err) + }) + + currentUser, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + + stdout, stderr := blackBoxRun(t, root, "bundle", "deploy") + assert.Equal(t, strings.Join([]string{ + fmt.Sprintf("Uploading bundle files to /Users/%s/.bundle/%s/files...", currentUser.UserName, uniqueId), + "Deploying resources...", + "Updating deployment state...", + "Deployment complete!\n", + }, "\n"), stderr) + assert.Equal(t, "", stdout) +} diff --git a/internal/bundle/helpers.go b/internal/bundle/helpers.go index 03d9cff7..3547c175 100644 --- a/internal/bundle/helpers.go +++ b/internal/bundle/helpers.go @@ -1,10 +1,12 @@ package bundle import ( + "bytes" "context" "encoding/json" "fmt" "os" + "os/exec" "path/filepath" "strings" "testing" @@ -15,6 +17,7 @@ import ( "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/libs/template" + "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go" "github.com/stretchr/testify/require" ) @@ -114,3 +117,29 @@ func getBundleRemoteRootPath(w *databricks.WorkspaceClient, t *testing.T, unique root := fmt.Sprintf("/Users/%s/.bundle/%s", me.UserName, uniqueId) return root } + +func blackBoxRun(t *testing.T, root string, args ...string) (stdout string, stderr string) { + cwd := vfs.MustNew(".") + gitRoot, err := vfs.FindLeafInTree(cwd, ".git") + require.NoError(t, err) + + t.Setenv("BUNDLE_ROOT", root) + + // Create the command + cmd := exec.Command("go", append([]string{"run", "main.go"}, args...)...) + cmd.Dir = gitRoot.Native() + + // Create buffers to capture output + var outBuffer, errBuffer bytes.Buffer + cmd.Stdout = &outBuffer + cmd.Stderr = &errBuffer + + // Run the command + err = cmd.Run() + require.NoError(t, err) + + // Get the output + stdout = outBuffer.String() + stderr = errBuffer.String() + return +}