From f9260125aa7e2c7ffc88e5c5853404d11a5884c4 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Fri, 23 Jun 2023 15:08:22 +0200 Subject: [PATCH 01/10] Remove extra call to filer.Stat in dbfs filer.Read (#515) ## Changes This PR removes the stat call and instead relies on errors returned by the go SDK to return the appropriate errors ## Tests Tested using existing filer integration tests --- libs/filer/dbfs_client.go | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index 85f87ff5..7e59638a 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -8,6 +8,7 @@ import ( "net/http" "path" "sort" + "strings" "time" "github.com/databricks/databricks-sdk-go" @@ -145,22 +146,25 @@ func (w *DbfsClient) Read(ctx context.Context, name string) (io.ReadCloser, erro return nil, err } - // This stat call serves two purposes: - // 1. Checks file at path exists, and throws an error if it does not - // 2. Allows us to error out if the path is a directory. This is needed - // because the Dbfs.Open method on the SDK does not error when the path is - // a directory - // TODO(added 8 June 2023): remove this stat call on go sdk bump. https://github.com/databricks/cli/issues/450 - stat, err := w.Stat(ctx, name) - if err != nil { - return nil, err - } - if stat.IsDir() { - return nil, NotAFile{absPath} - } - handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead) if err != nil { + // Return error if file is a directory + if strings.Contains(err.Error(), "cannot open directory for reading") { + return nil, NotAFile{absPath} + } + + var aerr *apierr.APIError + if !errors.As(err, &aerr) { + return nil, err + } + + // This API returns a 404 if the file doesn't exist. + if aerr.StatusCode == http.StatusNotFound { + if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { + return nil, FileDoesNotExistError{absPath} + } + } + return nil, err } From d0e9953ad9398b2c7ef58578c9a6be0b42cfbadb Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Fri, 23 Jun 2023 15:17:39 +0200 Subject: [PATCH 02/10] Use direct download for workspace filer read (#514) ## Changes Use the download method from the SDK in the read method for the WSFS implementation of the filer interface. Closes #452. ## Tests Tested by existing integration tests --- libs/filer/workspace_files_client.go | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 2b5e718b..12a536bf 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -3,7 +3,6 @@ package filer import ( "bytes" "context" - "encoding/base64" "errors" "fmt" "io" @@ -187,18 +186,7 @@ func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.ReadCl // Export file contents. Note the /workspace/export API has a limit of 10MBs // for the file size - // TODO: use direct download once it's fixed. see: https://github.com/databricks/cli/issues/452 - res, err := w.workspaceClient.Workspace.Export(ctx, workspace.ExportRequest{ - Path: absPath, - }) - if err != nil { - return nil, err - } - b, err := base64.StdEncoding.DecodeString(res.Content) - if err != nil { - return nil, err - } - return io.NopCloser(bytes.NewReader(b)), nil + return w.workspaceClient.Workspace.Download(ctx, absPath) } func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { From 30efe91c6dbda7f858d1b1b15d25647c06e555d5 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Fri, 23 Jun 2023 16:07:09 +0200 Subject: [PATCH 03/10] Make local files default for fs commands (#506) ## Changes ## Tests --- cmd/fs/helpers.go | 63 ++++---- cmd/fs/helpers_test.go | 58 +++++-- internal/fs_cp_test.go | 13 +- libs/filer/dbfs_client.go | 4 +- libs/filer/files_client.go | 4 +- libs/filer/local_client.go | 4 +- libs/filer/local_root_path.go | 27 ++++ libs/filer/local_root_path_test.go | 142 ++++++++++++++++++ libs/filer/workspace_files_client.go | 4 +- .../{root_path.go => workspace_root_path.go} | 12 +- ...th_test.go => workspace_root_path_test.go} | 2 +- 11 files changed, 261 insertions(+), 72 deletions(-) create mode 100644 libs/filer/local_root_path.go create mode 100644 libs/filer/local_root_path_test.go rename libs/filer/{root_path.go => workspace_root_path.go} (65%) rename libs/filer/{root_path_test.go => workspace_root_path_test.go} (98%) diff --git a/cmd/fs/helpers.go b/cmd/fs/helpers.go index aecee9e0..ca85cf1b 100644 --- a/cmd/fs/helpers.go +++ b/cmd/fs/helpers.go @@ -10,47 +10,38 @@ import ( "github.com/databricks/cli/libs/filer" ) -type Scheme string - -const ( - DbfsScheme = Scheme("dbfs") - LocalScheme = Scheme("file") - NoScheme = Scheme("") -) - func filerForPath(ctx context.Context, fullPath string) (filer.Filer, string, error) { - parts := strings.SplitN(fullPath, ":/", 2) + // Split path at : to detect any file schemes + parts := strings.SplitN(fullPath, ":", 2) + + // If no scheme is specified, then local path if len(parts) < 2 { - return nil, "", fmt.Errorf(`no scheme specified for path %s. Please specify scheme "dbfs" or "file". Example: file:/foo/bar or file:/c:/foo/bar`, fullPath) + f, err := filer.NewLocalClient("") + return f, fullPath, err } - scheme := Scheme(parts[0]) + + // On windows systems, paths start with a drive letter. If the scheme + // is a single letter and the OS is windows, then we conclude the path + // is meant to be a local path. + if runtime.GOOS == "windows" && len(parts[0]) == 1 { + f, err := filer.NewLocalClient("") + return f, fullPath, err + } + + if parts[0] != "dbfs" { + return nil, "", fmt.Errorf("invalid scheme: %s", parts[0]) + } + path := parts[1] - switch scheme { - case DbfsScheme: - w := root.WorkspaceClient(ctx) - // If the specified path has the "Volumes" prefix, use the Files API. - if strings.HasPrefix(path, "Volumes/") { - f, err := filer.NewFilesClient(w, "/") - return f, path, err - } - f, err := filer.NewDbfsClient(w, "/") - return f, path, err + w := root.WorkspaceClient(ctx) - case LocalScheme: - if runtime.GOOS == "windows" { - parts := strings.SplitN(path, ":", 2) - if len(parts) < 2 { - return nil, "", fmt.Errorf("no volume specfied for path: %s", path) - } - volume := parts[0] + ":" - relPath := parts[1] - f, err := filer.NewLocalClient(volume) - return f, relPath, err - } - f, err := filer.NewLocalClient("/") + // If the specified path has the "Volumes" prefix, use the Files API. + if strings.HasPrefix(path, "/Volumes/") { + f, err := filer.NewFilesClient(w, "/") return f, path, err - - default: - return nil, "", fmt.Errorf(`unsupported scheme %s specified for path %s. Please specify scheme "dbfs" or "file". Example: file:/foo/bar or file:/c:/foo/bar`, scheme, fullPath) } + + // The file is a dbfs file, and uses the DBFS APIs + f, err := filer.NewDbfsClient(w, "/") + return f, path, err } diff --git a/cmd/fs/helpers_test.go b/cmd/fs/helpers_test.go index 4beda6ca..d86bd46e 100644 --- a/cmd/fs/helpers_test.go +++ b/cmd/fs/helpers_test.go @@ -5,22 +5,58 @@ import ( "runtime" "testing" + "github.com/databricks/cli/libs/filer" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -func TestNotSpecifyingVolumeForWindowsPathErrors(t *testing.T) { +func TestFilerForPathForLocalPaths(t *testing.T) { + tmpDir := t.TempDir() + ctx := context.Background() + + f, path, err := filerForPath(ctx, tmpDir) + assert.NoError(t, err) + assert.Equal(t, tmpDir, path) + + info, err := f.Stat(ctx, path) + require.NoError(t, err) + assert.True(t, info.IsDir()) +} + +func TestFilerForPathForInvalidScheme(t *testing.T) { + ctx := context.Background() + + _, _, err := filerForPath(ctx, "dbf:/a") + assert.ErrorContains(t, err, "invalid scheme") + + _, _, err = filerForPath(ctx, "foo:a") + assert.ErrorContains(t, err, "invalid scheme") + + _, _, err = filerForPath(ctx, "file:/a") + assert.ErrorContains(t, err, "invalid scheme") +} + +func testWindowsFilerForPath(t *testing.T, ctx context.Context, fullPath string) { + f, path, err := filerForPath(ctx, fullPath) + assert.NoError(t, err) + + // Assert path remains unchanged + assert.Equal(t, path, fullPath) + + // Assert local client is created + _, ok := f.(*filer.LocalClient) + assert.True(t, ok) +} + +func TestFilerForWindowsLocalPaths(t *testing.T) { if runtime.GOOS != "windows" { - t.Skip() + t.SkipNow() } ctx := context.Background() - pathWithVolume := `file:/c:/foo/bar` - pathWOVolume := `file:/uno/dos` - - _, path, err := filerForPath(ctx, pathWithVolume) - assert.NoError(t, err) - assert.Equal(t, `/foo/bar`, path) - - _, _, err = filerForPath(ctx, pathWOVolume) - assert.Equal(t, "no volume specfied for path: uno/dos", err.Error()) + testWindowsFilerForPath(t, ctx, `c:\abc`) + testWindowsFilerForPath(t, ctx, `c:abc`) + testWindowsFilerForPath(t, ctx, `d:\abc`) + testWindowsFilerForPath(t, ctx, `d:\abc`) + testWindowsFilerForPath(t, ctx, `f:\abc\ef`) } diff --git a/internal/fs_cp_test.go b/internal/fs_cp_test.go index c9171086..766d6a59 100644 --- a/internal/fs_cp_test.go +++ b/internal/fs_cp_test.go @@ -66,7 +66,7 @@ func setupLocalFiler(t *testing.T) (filer.Filer, string) { f, err := filer.NewLocalClient(tmp) require.NoError(t, err) - return f, path.Join("file:/", filepath.ToSlash(tmp)) + return f, path.Join(filepath.ToSlash(tmp)) } func setupDbfsFiler(t *testing.T) (filer.Filer, string) { @@ -259,21 +259,14 @@ func TestAccFsCpErrorsWhenSourceIsDirWithoutRecursiveFlag(t *testing.T) { tmpDir := temporaryDbfsDir(t, w) _, _, err = RequireErrorRun(t, "fs", "cp", "dbfs:"+tmpDir, "dbfs:/tmp") - assert.Equal(t, fmt.Sprintf("source path %s is a directory. Please specify the --recursive flag", strings.TrimPrefix(tmpDir, "/")), err.Error()) -} - -func TestAccFsCpErrorsOnNoScheme(t *testing.T) { - t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) - - _, _, err := RequireErrorRun(t, "fs", "cp", "/a", "/b") - assert.Equal(t, "no scheme specified for path /a. Please specify scheme \"dbfs\" or \"file\". Example: file:/foo/bar or file:/c:/foo/bar", err.Error()) + assert.Equal(t, fmt.Sprintf("source path %s is a directory. Please specify the --recursive flag", tmpDir), err.Error()) } func TestAccFsCpErrorsOnInvalidScheme(t *testing.T) { t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) _, _, err := RequireErrorRun(t, "fs", "cp", "dbfs:/a", "https:/b") - assert.Equal(t, "unsupported scheme https specified for path https:/b. Please specify scheme \"dbfs\" or \"file\". Example: file:/foo/bar or file:/c:/foo/bar", err.Error()) + assert.Equal(t, "invalid scheme: https", err.Error()) } func TestAccFsCpSourceIsDirectoryButTargetIsFile(t *testing.T) { diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index 7e59638a..64eb4b77 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -68,14 +68,14 @@ type DbfsClient struct { workspaceClient *databricks.WorkspaceClient // File operations will be relative to this path. - root RootPath + root WorkspaceRootPath } func NewDbfsClient(w *databricks.WorkspaceClient, root string) (Filer, error) { return &DbfsClient{ workspaceClient: w, - root: NewRootPath(root), + root: NewWorkspaceRootPath(root), }, nil } diff --git a/libs/filer/files_client.go b/libs/filer/files_client.go index 6c1f5a97..ee7587dc 100644 --- a/libs/filer/files_client.go +++ b/libs/filer/files_client.go @@ -60,7 +60,7 @@ type FilesClient struct { apiClient *client.DatabricksClient // File operations will be relative to this path. - root RootPath + root WorkspaceRootPath } func filesNotImplementedError(fn string) error { @@ -77,7 +77,7 @@ func NewFilesClient(w *databricks.WorkspaceClient, root string) (Filer, error) { workspaceClient: w, apiClient: apiClient, - root: NewRootPath(root), + root: NewWorkspaceRootPath(root), }, nil } diff --git a/libs/filer/local_client.go b/libs/filer/local_client.go index 8df59d25..8d960c84 100644 --- a/libs/filer/local_client.go +++ b/libs/filer/local_client.go @@ -13,12 +13,12 @@ import ( // LocalClient implements the [Filer] interface for the local filesystem. type LocalClient struct { // File operations will be relative to this path. - root RootPath + root localRootPath } func NewLocalClient(root string) (Filer, error) { return &LocalClient{ - root: NewRootPath(root), + root: NewLocalRootPath(root), }, nil } diff --git a/libs/filer/local_root_path.go b/libs/filer/local_root_path.go new file mode 100644 index 00000000..15a54263 --- /dev/null +++ b/libs/filer/local_root_path.go @@ -0,0 +1,27 @@ +package filer + +import ( + "fmt" + "path/filepath" + "strings" +) + +type localRootPath struct { + rootPath string +} + +func NewLocalRootPath(root string) localRootPath { + if root == "" { + return localRootPath{""} + } + return localRootPath{filepath.Clean(root)} +} + +func (rp *localRootPath) Join(name string) (string, error) { + absPath := filepath.Join(rp.rootPath, name) + + if !strings.HasPrefix(absPath, rp.rootPath) { + return "", fmt.Errorf("relative path escapes root: %s", name) + } + return absPath, nil +} diff --git a/libs/filer/local_root_path_test.go b/libs/filer/local_root_path_test.go new file mode 100644 index 00000000..1a39c446 --- /dev/null +++ b/libs/filer/local_root_path_test.go @@ -0,0 +1,142 @@ +package filer + +import ( + "path/filepath" + "runtime" + "testing" + + "github.com/stretchr/testify/assert" +) + +func testUnixLocalRootPath(t *testing.T, uncleanRoot string) { + cleanRoot := filepath.Clean(uncleanRoot) + rp := NewLocalRootPath(uncleanRoot) + + remotePath, err := rp.Join("a/b/c") + assert.NoError(t, err) + assert.Equal(t, cleanRoot+"/a/b/c", remotePath) + + remotePath, err = rp.Join("a/b/../d") + assert.NoError(t, err) + assert.Equal(t, cleanRoot+"/a/d", remotePath) + + remotePath, err = rp.Join("a/../c") + assert.NoError(t, err) + assert.Equal(t, cleanRoot+"/c", remotePath) + + remotePath, err = rp.Join("a/b/c/.") + assert.NoError(t, err) + assert.Equal(t, cleanRoot+"/a/b/c", remotePath) + + remotePath, err = rp.Join("a/b/c/d/./../../f/g") + assert.NoError(t, err) + assert.Equal(t, cleanRoot+"/a/b/f/g", remotePath) + + remotePath, err = rp.Join(".//a/..//./b/..") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("a/b/../..") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join(".") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("/") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + _, err = rp.Join("..") + assert.ErrorContains(t, err, `relative path escapes root: ..`) + + _, err = rp.Join("a/../..") + assert.ErrorContains(t, err, `relative path escapes root: a/../..`) + + _, err = rp.Join("./../.") + assert.ErrorContains(t, err, `relative path escapes root: ./../.`) + + _, err = rp.Join("/./.././..") + assert.ErrorContains(t, err, `relative path escapes root: /./.././..`) + + _, err = rp.Join("./../.") + assert.ErrorContains(t, err, `relative path escapes root: ./../.`) + + _, err = rp.Join("./..") + assert.ErrorContains(t, err, `relative path escapes root: ./..`) + + _, err = rp.Join("./../../..") + assert.ErrorContains(t, err, `relative path escapes root: ./../../..`) + + _, err = rp.Join("./../a/./b../../..") + assert.ErrorContains(t, err, `relative path escapes root: ./../a/./b../../..`) + + _, err = rp.Join("../..") + assert.ErrorContains(t, err, `relative path escapes root: ../..`) +} + +func TestUnixLocalRootPath(t *testing.T) { + if runtime.GOOS != "darwin" && runtime.GOOS != "linux" { + t.SkipNow() + } + + testUnixLocalRootPath(t, "/some/root/path") + testUnixLocalRootPath(t, "/some/root/path/") + testUnixLocalRootPath(t, "/some/root/path/.") + testUnixLocalRootPath(t, "/some/root/../path/") +} + +func testWindowsLocalRootPath(t *testing.T, uncleanRoot string) { + cleanRoot := filepath.Clean(uncleanRoot) + rp := NewLocalRootPath(uncleanRoot) + + remotePath, err := rp.Join(`a\b\c`) + assert.NoError(t, err) + assert.Equal(t, cleanRoot+`\a\b\c`, remotePath) + + remotePath, err = rp.Join(`a\b\..\d`) + assert.NoError(t, err) + assert.Equal(t, cleanRoot+`\a\d`, remotePath) + + remotePath, err = rp.Join(`a\..\c`) + assert.NoError(t, err) + assert.Equal(t, cleanRoot+`\c`, remotePath) + + remotePath, err = rp.Join(`a\b\c\.`) + assert.NoError(t, err) + assert.Equal(t, cleanRoot+`\a\b\c`, remotePath) + + remotePath, err = rp.Join(`a\b\..\..`) + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join("") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + remotePath, err = rp.Join(".") + assert.NoError(t, err) + assert.Equal(t, cleanRoot, remotePath) + + _, err = rp.Join("..") + assert.ErrorContains(t, err, `relative path escapes root`) + + _, err = rp.Join(`a\..\..`) + assert.ErrorContains(t, err, `relative path escapes root`) +} + +func TestWindowsLocalRootPath(t *testing.T) { + if runtime.GOOS != "windows" { + t.SkipNow() + } + + testWindowsLocalRootPath(t, `c:\some\root\path`) + testWindowsLocalRootPath(t, `c:\some\root\path\`) + testWindowsLocalRootPath(t, `c:\some\root\path\.`) + testWindowsLocalRootPath(t, `C:\some\root\..\path\`) +} diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 12a536bf..db06f91c 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -78,7 +78,7 @@ type WorkspaceFilesClient struct { apiClient *client.DatabricksClient // File operations will be relative to this path. - root RootPath + root WorkspaceRootPath } func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, error) { @@ -91,7 +91,7 @@ func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, workspaceClient: w, apiClient: apiClient, - root: NewRootPath(root), + root: NewWorkspaceRootPath(root), }, nil } diff --git a/libs/filer/root_path.go b/libs/filer/workspace_root_path.go similarity index 65% rename from libs/filer/root_path.go rename to libs/filer/workspace_root_path.go index bdeff5d7..d5163924 100644 --- a/libs/filer/root_path.go +++ b/libs/filer/workspace_root_path.go @@ -6,23 +6,23 @@ import ( "strings" ) -// RootPath can be joined with a relative path and ensures that +// WorkspaceRootPath can be joined with a relative path and ensures that // the returned path is always a strict child of the root path. -type RootPath struct { +type WorkspaceRootPath struct { rootPath string } -// NewRootPath constructs and returns [RootPath]. +// NewWorkspaceRootPath constructs and returns [RootPath]. // The named path is cleaned on construction. -func NewRootPath(name string) RootPath { - return RootPath{ +func NewWorkspaceRootPath(name string) WorkspaceRootPath { + return WorkspaceRootPath{ rootPath: path.Clean(name), } } // Join returns the specified path name joined to the root. // It returns an error if the resulting path is not a strict child of the root path. -func (p *RootPath) Join(name string) (string, error) { +func (p *WorkspaceRootPath) Join(name string) (string, error) { absPath := path.Join(p.rootPath, name) // Don't allow escaping the specified root using relative paths. diff --git a/libs/filer/root_path_test.go b/libs/filer/workspace_root_path_test.go similarity index 98% rename from libs/filer/root_path_test.go rename to libs/filer/workspace_root_path_test.go index 965842d0..73746d27 100644 --- a/libs/filer/root_path_test.go +++ b/libs/filer/workspace_root_path_test.go @@ -9,7 +9,7 @@ import ( func testRootPath(t *testing.T, uncleanRoot string) { cleanRoot := path.Clean(uncleanRoot) - rp := NewRootPath(uncleanRoot) + rp := NewWorkspaceRootPath(uncleanRoot) remotePath, err := rp.Join("a/b/c") assert.NoError(t, err) From cf92698eb3ec21270003d1a1c5e8f27dfdb2deaf Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 27 Jun 2023 01:31:20 +0200 Subject: [PATCH 04/10] Application -> Asset (#529) --- bundle/bundle.go | 2 +- cmd/bundle/root.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bundle/bundle.go b/bundle/bundle.go index 1ee6dfb6..02d0eaac 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -1,4 +1,4 @@ -// Package bundle is the top level package for Databricks Application Bundles. +// Package bundle is the top level package for Databricks Asset Bundles. // // A bundle is represented by the [Bundle] type. It consists of configuration // and runtime state, such as a client to a Databricks workspace. diff --git a/cmd/bundle/root.go b/cmd/bundle/root.go index a96c0dc1..395ed383 100644 --- a/cmd/bundle/root.go +++ b/cmd/bundle/root.go @@ -8,7 +8,7 @@ import ( // rootCmd represents the root command for the bundle subcommand. var rootCmd = &cobra.Command{ Use: "bundle", - Short: "Databricks Application Bundles", + Short: "Databricks Asset Bundles", } func AddCommand(cmd *cobra.Command) { From 64fcd3d2ee673a74784f896c716d2c8e2bb05ab8 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 27 Jun 2023 01:37:05 +0200 Subject: [PATCH 05/10] Allow specifying repo by path for repos commands (#526) ## Changes The repos commands take a repo ID. It is more convenient to specify repos by their paths. Closes #523. ## Tests New integration tests pass. --- cmd/workspace/repos/overrides.go | 119 ++++++++++++++++++++++++++++- internal/repos_test.go | 124 +++++++++++++++++++++++++++++++ 2 files changed, 242 insertions(+), 1 deletion(-) create mode 100644 internal/repos_test.go diff --git a/cmd/workspace/repos/overrides.go b/cmd/workspace/repos/overrides.go index e3b58857..9f131c03 100644 --- a/cmd/workspace/repos/overrides.go +++ b/cmd/workspace/repos/overrides.go @@ -1,9 +1,126 @@ package repos -import "github.com/databricks/cli/libs/cmdio" +import ( + "context" + "fmt" + "strconv" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/spf13/cobra" +) func init() { listCmd.Annotations["template"] = cmdio.Heredoc(` {{range .}}{{green "%d" .Id}} {{.Path}} {{.Branch|blue}} {{.Url|cyan}} {{end}}`) + + deleteCmd.Use = "delete REPO_ID_OR_PATH" + deleteCmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + if cmd.Flags().Changed("json") { + err = deleteJson.Unmarshal(&deleteReq) + if err != nil { + return err + } + } else { + deleteReq.RepoId, err = repoArgumentToRepoID(ctx, w, args) + if err != nil { + return err + } + } + err = w.Repos.Delete(ctx, deleteReq) + if err != nil { + return err + } + return nil + } + + getCmd.Use = "get REPO_ID_OR_PATH" + getCmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + if cmd.Flags().Changed("json") { + err = getJson.Unmarshal(&getReq) + if err != nil { + return err + } + } else { + getReq.RepoId, err = repoArgumentToRepoID(ctx, w, args) + if err != nil { + return err + } + } + + response, err := w.Repos.Get(ctx, getReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + + updateCmd.Use = "update REPO_ID_OR_PATH" + updateCmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + if cmd.Flags().Changed("json") { + err = updateJson.Unmarshal(&updateReq) + if err != nil { + return err + } + } else { + updateReq.RepoId, err = repoArgumentToRepoID(ctx, w, args) + if err != nil { + return err + } + } + + err = w.Repos.Update(ctx, updateReq) + if err != nil { + return err + } + return nil + } +} + +func repoArgumentToRepoID(ctx context.Context, w *databricks.WorkspaceClient, args []string) (int64, error) { + // ---- Begin copy from cmd/workspace/repos/repos.go ---- + if len(args) == 0 { + promptSpinner := cmdio.Spinner(ctx) + promptSpinner <- "No REPO_ID argument specified. Loading names for Repos drop-down." + names, err := w.Repos.RepoInfoPathToIdMap(ctx, workspace.ListReposRequest{}) + close(promptSpinner) + if err != nil { + return 0, fmt.Errorf("failed to load names for Repos drop-down. Please manually specify required arguments. Original error: %w", err) + } + id, err := cmdio.Select(ctx, names, "The ID for the corresponding repo to access") + if err != nil { + return 0, err + } + args = append(args, id) + } + if len(args) != 1 { + return 0, fmt.Errorf("expected to have the id for the corresponding repo to access") + } + // ---- End copy from cmd/workspace/repos/repos.go ---- + + // If the argument is a repo ID, return it. + arg := args[0] + id, err := strconv.ParseInt(arg, 10, 64) + if err == nil { + return id, nil + } + + // If the argument cannot be parsed as a repo ID, try to look it up by name. + oi, err := w.Workspace.GetStatusByPath(ctx, arg) + if err != nil { + return 0, fmt.Errorf("failed to look up repo by path: %w", err) + } + if oi.ObjectType != workspace.ObjectTypeRepo { + return 0, fmt.Errorf("object at path %q is not a repo", arg) + } + return oi.ObjectId, nil } diff --git a/internal/repos_test.go b/internal/repos_test.go new file mode 100644 index 00000000..957952ca --- /dev/null +++ b/internal/repos_test.go @@ -0,0 +1,124 @@ +package internal + +import ( + "context" + "fmt" + "strconv" + "testing" + + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func createTemporaryRepo(t *testing.T, w *databricks.WorkspaceClient, ctx context.Context) (int64, string) { + me, err := w.CurrentUser.Me(ctx) + require.NoError(t, err) + + repoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("empty-repo-integration-")) + repoInfo, err := w.Repos.Create(ctx, workspace.CreateRepo{ + Path: repoPath, + Url: repoUrl, + Provider: "gitHub", + }) + require.NoError(t, err) + + t.Cleanup(func() { + err := w.Repos.DeleteByRepoId(ctx, repoInfo.Id) + if !apierr.IsMissing(err) { + assert.NoError(t, err) + } + }) + + return repoInfo.Id, repoPath +} + +func TestReposGet(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + repoId, repoPath := createTemporaryRepo(t, w, ctx) + + // Get by ID + byIdOutput, stderr := RequireSuccessfulRun(t, "repos", "get", strconv.FormatInt(repoId, 10), "--output=json") + assert.Equal(t, "", stderr.String()) + + // Get by path + byPathOutput, stderr := RequireSuccessfulRun(t, "repos", "get", repoPath, "--output=json") + assert.Equal(t, "", stderr.String()) + + // Output should be the same + assert.Equal(t, byIdOutput.String(), byPathOutput.String()) + + // Get by path fails + _, stderr, err = RequireErrorRun(t, "repos", "get", repoPath+"-doesntexist", "--output=json") + assert.ErrorContains(t, err, "failed to look up repo") + + // Get by path resolves to something other than a repo + _, stderr, err = RequireErrorRun(t, "repos", "get", "/Repos", "--output=json") + assert.ErrorContains(t, err, "is not a repo") +} + +func TestReposUpdate(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + repoId, repoPath := createTemporaryRepo(t, w, ctx) + + // Update by ID + byIdOutput, stderr := RequireSuccessfulRun(t, "repos", "update", strconv.FormatInt(repoId, 10), "--branch", "ide") + assert.Equal(t, "", stderr.String()) + + // Update by path + byPathOutput, stderr := RequireSuccessfulRun(t, "repos", "update", repoPath, "--branch", "ide") + assert.Equal(t, "", stderr.String()) + + // Output should be the same + assert.Equal(t, byIdOutput.String(), byPathOutput.String()) +} + +func TestReposDeleteByID(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + repoId, _ := createTemporaryRepo(t, w, ctx) + + // Delete by ID + stdout, stderr := RequireSuccessfulRun(t, "repos", "delete", strconv.FormatInt(repoId, 10)) + assert.Equal(t, "", stdout.String()) + assert.Equal(t, "", stderr.String()) + + // Check it was actually deleted + _, err = w.Repos.GetByRepoId(ctx, repoId) + assert.True(t, apierr.IsMissing(err), err) +} + +func TestReposDeleteByPath(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + repoId, repoPath := createTemporaryRepo(t, w, ctx) + + // Delete by path + stdout, stderr := RequireSuccessfulRun(t, "repos", "delete", repoPath) + assert.Equal(t, "", stdout.String()) + assert.Equal(t, "", stderr.String()) + + // Check it was actually deleted + _, err = w.Repos.GetByRepoId(ctx, repoId) + assert.True(t, apierr.IsMissing(err), err) +} From c64a3336c5b2d76bbc9de4d1071b928963487676 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 27 Jun 2023 14:17:13 +0200 Subject: [PATCH 06/10] Add provider detection to the repos create command (#528) ## Changes Closes #527. ## Tests Integration tests pass. --- cmd/workspace/repos/overrides.go | 36 ++++++++++++++++ cmd/workspace/repos/provider.go | 30 ++++++++++++++ cmd/workspace/repos/provider_test.go | 21 ++++++++++ internal/repos_test.go | 62 +++++++++++++++++++++++----- 4 files changed, 139 insertions(+), 10 deletions(-) create mode 100644 cmd/workspace/repos/provider.go create mode 100644 cmd/workspace/repos/provider_test.go diff --git a/cmd/workspace/repos/overrides.go b/cmd/workspace/repos/overrides.go index 9f131c03..44d9dca4 100644 --- a/cmd/workspace/repos/overrides.go +++ b/cmd/workspace/repos/overrides.go @@ -17,6 +17,42 @@ func init() { {{range .}}{{green "%d" .Id}} {{.Path}} {{.Branch|blue}} {{.Url|cyan}} {{end}}`) + createCmd.Use = "create URL [PROVIDER]" + createCmd.Args = func(cmd *cobra.Command, args []string) error { + // If the provider argument is not specified, we try to detect it from the URL. + check := cobra.RangeArgs(1, 2) + if cmd.Flags().Changed("json") { + check = cobra.ExactArgs(0) + } + return check(cmd, args) + } + createCmd.RunE = func(cmd *cobra.Command, args []string) (err error) { + ctx := cmd.Context() + w := root.WorkspaceClient(ctx) + if cmd.Flags().Changed("json") { + err = createJson.Unmarshal(&createReq) + if err != nil { + return err + } + } else { + createReq.Url = args[0] + if len(args) > 1 { + createReq.Provider = args[1] + } else { + createReq.Provider = DetectProvider(createReq.Url) + if createReq.Provider == "" { + return fmt.Errorf( + "could not detect provider from URL %q; please specify", createReq.Url) + } + } + } + response, err := w.Repos.Create(ctx, createReq) + if err != nil { + return err + } + return cmdio.Render(ctx, response) + } + deleteCmd.Use = "delete REPO_ID_OR_PATH" deleteCmd.RunE = func(cmd *cobra.Command, args []string) (err error) { ctx := cmd.Context() diff --git a/cmd/workspace/repos/provider.go b/cmd/workspace/repos/provider.go new file mode 100644 index 00000000..9e407e2b --- /dev/null +++ b/cmd/workspace/repos/provider.go @@ -0,0 +1,30 @@ +package repos + +import ( + "net/url" + "regexp" + "strings" +) + +var gitProviders = map[string]string{ + "github.com": "gitHub", + "dev.azure.com": "azureDevOpsServices", + "gitlab.com": "gitLab", + "bitbucket.org": "bitbucketCloud", +} + +var awsCodeCommitRegexp = regexp.MustCompile(`^git-codecommit\.[^.]+\.amazonaws.com$`) + +func DetectProvider(rawURL string) string { + provider := "" + u, err := url.Parse(rawURL) + if err != nil { + return provider + } + if v, ok := gitProviders[strings.ToLower(u.Host)]; ok { + provider = v + } else if awsCodeCommitRegexp.MatchString(u.Host) { + provider = "awsCodeCommit" + } + return provider +} diff --git a/cmd/workspace/repos/provider_test.go b/cmd/workspace/repos/provider_test.go new file mode 100644 index 00000000..e719be2b --- /dev/null +++ b/cmd/workspace/repos/provider_test.go @@ -0,0 +1,21 @@ +package repos + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDetectProvider(t *testing.T) { + for url, provider := range map[string]string{ + "https://user@bitbucket.org/user/repo.git": "bitbucketCloud", + "https://github.com//user/repo.git": "gitHub", + "https://user@dev.azure.com/user/project/_git/repo": "azureDevOpsServices", + "https://abc/user/repo.git": "", + "ewfgwergfwe": "", + "https://foo@@bar": "", + "https://git-codecommit.us-east-2.amazonaws.com/v1/repos/MyDemoRepo": "awsCodeCommit", + } { + assert.Equal(t, provider, DetectProvider(url)) + } +} diff --git a/internal/repos_test.go b/internal/repos_test.go index 957952ca..340de334 100644 --- a/internal/repos_test.go +++ b/internal/repos_test.go @@ -13,28 +13,70 @@ import ( "github.com/stretchr/testify/require" ) -func createTemporaryRepo(t *testing.T, w *databricks.WorkspaceClient, ctx context.Context) (int64, string) { +func synthesizeTemporaryRepoPath(t *testing.T, w *databricks.WorkspaceClient, ctx context.Context) string { me, err := w.CurrentUser.Me(ctx) require.NoError(t, err) - repoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("empty-repo-integration-")) + + // Cleanup if repo was created at specified path. + t.Cleanup(func() { + oi, err := w.Workspace.GetStatusByPath(ctx, repoPath) + if apierr.IsMissing(err) { + return + } + require.NoError(t, err) + err = w.Repos.DeleteByRepoId(ctx, oi.ObjectId) + require.NoError(t, err) + }) + + return repoPath +} + +func createTemporaryRepo(t *testing.T, w *databricks.WorkspaceClient, ctx context.Context) (int64, string) { + repoPath := synthesizeTemporaryRepoPath(t, w, ctx) repoInfo, err := w.Repos.Create(ctx, workspace.CreateRepo{ Path: repoPath, Url: repoUrl, Provider: "gitHub", }) require.NoError(t, err) - - t.Cleanup(func() { - err := w.Repos.DeleteByRepoId(ctx, repoInfo.Id) - if !apierr.IsMissing(err) { - assert.NoError(t, err) - } - }) - return repoInfo.Id, repoPath } +func TestReposCreateWithProvider(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + repoPath := synthesizeTemporaryRepoPath(t, w, ctx) + + _, stderr := RequireSuccessfulRun(t, "repos", "create", repoUrl, "gitHub", "--path", repoPath) + assert.Equal(t, "", stderr.String()) + + // Confirm the repo was created. + oi, err := w.Workspace.GetStatusByPath(ctx, repoPath) + assert.NoError(t, err) + assert.Equal(t, workspace.ObjectTypeRepo, oi.ObjectType) +} + +func TestReposCreateWithoutProvider(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + repoPath := synthesizeTemporaryRepoPath(t, w, ctx) + + _, stderr := RequireSuccessfulRun(t, "repos", "create", repoUrl, "--path", repoPath) + assert.Equal(t, "", stderr.String()) + + // Confirm the repo was created. + oi, err := w.Workspace.GetStatusByPath(ctx, repoPath) + assert.NoError(t, err) + assert.Equal(t, workspace.ObjectTypeRepo, oi.ObjectType) +} + func TestReposGet(t *testing.T) { t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) From 1f5cdfda291f444bb8727003a2c9295aed263f31 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 27 Jun 2023 14:42:27 +0200 Subject: [PATCH 07/10] Add dbfs scheme prefix to paths in cp command output (#516) ## Changes Adds the dbfs:/ prefix to paths output by the cp command so they can be used with the CLI ## Tests Manually Currently there are no integration tests for command output, I'll add them in separately --------- Co-authored-by: Pieter Noordhuis --- cmd/fs/cp.go | 59 ++++++++++++++++++++++++++++++++++++----------- cmd/fs/helpers.go | 4 ++++ 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/cmd/fs/cp.go b/cmd/fs/cp.go index 7d9ff24a..399a1aea 100644 --- a/cmd/fs/cp.go +++ b/cmd/fs/cp.go @@ -15,9 +15,11 @@ import ( ) type copy struct { - ctx context.Context - sourceFiler filer.Filer - targetFiler filer.Filer + ctx context.Context + sourceFiler filer.Filer + targetFiler filer.Filer + sourceScheme string + targetScheme string } func (c *copy) cpWriteCallback(sourceDir, targetDir string) fs.WalkDirFunc { @@ -78,29 +80,47 @@ func (c *copy) cpFileToFile(sourcePath, targetPath string) error { err = c.targetFiler.Write(c.ctx, targetPath, r) // skip if file already exists if err != nil && errors.Is(err, fs.ErrExist) { - return emitCpFileSkippedEvent(c.ctx, sourcePath, targetPath) + return c.emitFileSkippedEvent(sourcePath, targetPath) } if err != nil { return err } } - return emitCpFileCopiedEvent(c.ctx, sourcePath, targetPath) + return c.emitFileCopiedEvent(sourcePath, targetPath) } // TODO: emit these events on stderr // TODO: add integration tests for these events -func emitCpFileSkippedEvent(ctx context.Context, sourcePath, targetPath string) error { - event := newFileSkippedEvent(sourcePath, targetPath) +func (c *copy) emitFileSkippedEvent(sourcePath, targetPath string) error { + fullSourcePath := sourcePath + if c.sourceScheme != "" { + fullSourcePath = path.Join(c.sourceScheme+":", sourcePath) + } + fullTargetPath := targetPath + if c.targetScheme != "" { + fullTargetPath = path.Join(c.targetScheme+":", targetPath) + } + + event := newFileSkippedEvent(fullSourcePath, fullTargetPath) template := "{{.SourcePath}} -> {{.TargetPath}} (skipped; already exists)\n" - return cmdio.RenderWithTemplate(ctx, event, template) + return cmdio.RenderWithTemplate(c.ctx, event, template) } -func emitCpFileCopiedEvent(ctx context.Context, sourcePath, targetPath string) error { - event := newFileCopiedEvent(sourcePath, targetPath) +func (c *copy) emitFileCopiedEvent(sourcePath, targetPath string) error { + fullSourcePath := sourcePath + if c.sourceScheme != "" { + fullSourcePath = path.Join(c.sourceScheme+":", sourcePath) + } + fullTargetPath := targetPath + if c.targetScheme != "" { + fullTargetPath = path.Join(c.targetScheme+":", targetPath) + } + + event := newFileCopiedEvent(fullSourcePath, fullTargetPath) template := "{{.SourcePath}} -> {{.TargetPath}}\n" - return cmdio.RenderWithTemplate(ctx, event, template) + return cmdio.RenderWithTemplate(c.ctx, event, template) } var cpOverwrite bool @@ -144,10 +164,21 @@ var cpCmd = &cobra.Command{ return err } + sourceScheme := "" + if isDbfsPath(fullSourcePath) { + sourceScheme = "dbfs" + } + targetScheme := "" + if isDbfsPath(fullTargetPath) { + targetScheme = "dbfs" + } + c := copy{ - ctx: ctx, - sourceFiler: sourceFiler, - targetFiler: targetFiler, + ctx: ctx, + sourceFiler: sourceFiler, + targetFiler: targetFiler, + sourceScheme: sourceScheme, + targetScheme: targetScheme, } // Get information about file at source path diff --git a/cmd/fs/helpers.go b/cmd/fs/helpers.go index ca85cf1b..43d65b5d 100644 --- a/cmd/fs/helpers.go +++ b/cmd/fs/helpers.go @@ -45,3 +45,7 @@ func filerForPath(ctx context.Context, fullPath string) (filer.Filer, string, er f, err := filer.NewDbfsClient(w, "/") return f, path, err } + +func isDbfsPath(path string) bool { + return strings.HasPrefix(path, "dbfs:/") +} From 54148ffedf7b4f2d98654efcc225ddcec3f23063 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 27 Jun 2023 16:06:56 +0200 Subject: [PATCH 08/10] Add --absolute flag for ls command (#508) tested manually --------- Co-authored-by: Pieter Noordhuis --- cmd/fs/ls.go | 14 +++++++++++--- internal/fs_ls_test.go | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/cmd/fs/ls.go b/cmd/fs/ls.go index 1226e937..b06345d5 100644 --- a/cmd/fs/ls.go +++ b/cmd/fs/ls.go @@ -2,6 +2,7 @@ package fs import ( "io/fs" + "path" "sort" "time" @@ -17,14 +18,19 @@ type jsonDirEntry struct { ModTime time.Time `json:"last_modified"` } -func toJsonDirEntry(f fs.DirEntry) (*jsonDirEntry, error) { +func toJsonDirEntry(f fs.DirEntry, baseDir string, isAbsolute bool) (*jsonDirEntry, error) { info, err := f.Info() if err != nil { return nil, err } + name := f.Name() + if isAbsolute { + name = path.Join(baseDir, name) + } + return &jsonDirEntry{ - Name: f.Name(), + Name: name, IsDir: f.IsDir(), Size: info.Size(), ModTime: info.ModTime(), @@ -54,7 +60,7 @@ var lsCmd = &cobra.Command{ jsonDirEntries := make([]jsonDirEntry, len(entries)) for i, entry := range entries { - jsonDirEntry, err := toJsonDirEntry(entry) + jsonDirEntry, err := toJsonDirEntry(entry, args[0], lsAbsolute) if err != nil { return err } @@ -79,8 +85,10 @@ var lsCmd = &cobra.Command{ } var longMode bool +var lsAbsolute bool func init() { lsCmd.Flags().BoolVarP(&longMode, "long", "l", false, "Displays full information including size, file type and modification time since Epoch in milliseconds.") + lsCmd.Flags().BoolVar(&lsAbsolute, "absolute", false, "Displays absolute paths.") fsCmd.AddCommand(lsCmd) } diff --git a/internal/fs_ls_test.go b/internal/fs_ls_test.go index 060c706d..885fc31f 100644 --- a/internal/fs_ls_test.go +++ b/internal/fs_ls_test.go @@ -42,6 +42,7 @@ func TestFsLsForDbfs(t *testing.T) { require.NoError(t, err) // assert on ls output + assert.Len(t, parsedStdout, 2) assert.Equal(t, "a", parsedStdout[0]["name"]) assert.Equal(t, true, parsedStdout[0]["is_directory"]) assert.Equal(t, float64(0), parsedStdout[0]["size"]) @@ -50,6 +51,42 @@ func TestFsLsForDbfs(t *testing.T) { assert.Equal(t, float64(3), parsedStdout[1]["size"]) } +func TestFsLsForDbfsWithAbsolutePaths(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + + tmpDir := temporaryDbfsDir(t, w) + + f, err := filer.NewDbfsClient(w, tmpDir) + require.NoError(t, err) + + err = f.Mkdir(ctx, "a") + require.NoError(t, err) + err = f.Write(ctx, "a/hello.txt", strings.NewReader("abc"), filer.CreateParentDirectories) + require.NoError(t, err) + err = f.Write(ctx, "bye.txt", strings.NewReader("def")) + require.NoError(t, err) + + stdout, stderr := RequireSuccessfulRun(t, "fs", "ls", "dbfs:"+tmpDir, "--output=json", "--absolute") + assert.Equal(t, "", stderr.String()) + var parsedStdout []map[string]any + err = json.Unmarshal(stdout.Bytes(), &parsedStdout) + require.NoError(t, err) + + // assert on ls output + assert.Len(t, parsedStdout, 2) + assert.Equal(t, path.Join("dbfs:", tmpDir, "a"), parsedStdout[0]["name"]) + assert.Equal(t, true, parsedStdout[0]["is_directory"]) + assert.Equal(t, float64(0), parsedStdout[0]["size"]) + + assert.Equal(t, path.Join("dbfs:", tmpDir, "bye.txt"), parsedStdout[1]["name"]) + assert.Equal(t, false, parsedStdout[1]["is_directory"]) + assert.Equal(t, float64(3), parsedStdout[1]["size"]) +} + func TestFsLsForDbfsOnFile(t *testing.T) { t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) From f64c44285f2d77cd8a11f3feef6aae3eb59ceefd Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Tue, 27 Jun 2023 20:42:29 +0200 Subject: [PATCH 09/10] Decode contents by default in workspace export command (#531) ## Changes Also see #525. The direct download flag has been removed in newer versions because of the content type issue. Instead, we can make the command decode the base64 output when the output mode is text. ``` $ databricks workspace export /some/path/script.sh #!/bin/bash echo "this is a script" ``` ## Tests New integration test. --- cmd/workspace/workspace/overrides.go | 3 +++ internal/workspace_test.go | 21 +++++++++++++++++++++ libs/cmdio/render.go | 23 +++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/cmd/workspace/workspace/overrides.go b/cmd/workspace/workspace/overrides.go index 0a00ba25..e1b97c59 100644 --- a/cmd/workspace/workspace/overrides.go +++ b/cmd/workspace/workspace/overrides.go @@ -8,4 +8,7 @@ func init() { {{header "ID"}} {{header "Type"}} {{header "Language"}} {{header "Path"}} {{range .}}{{green "%d" .ObjectId}} {{blue "%s" .ObjectType}} {{cyan "%s" .Language}} {{.Path|cyan}} {{end}}`) + + // The export command prints the contents of the file to stdout by default. + exportCmd.Annotations["template"] = `{{.Content | b64_decode}}` } diff --git a/internal/workspace_test.go b/internal/workspace_test.go index 3aafffe4..dd26bcf4 100644 --- a/internal/workspace_test.go +++ b/internal/workspace_test.go @@ -6,6 +6,7 @@ import ( "io" "net/http" "os" + "path" "path/filepath" "strings" "testing" @@ -39,6 +40,26 @@ func TestWorkpaceGetStatusErrorWhenNoArguments(t *testing.T) { assert.Equal(t, "accepts 1 arg(s), received 0", err.Error()) } +func TestWorkpaceExportPrintsContents(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w := databricks.Must(databricks.NewWorkspaceClient()) + tmpdir := temporaryWorkspaceDir(t, w) + f, err := filer.NewWorkspaceFilesClient(w, tmpdir) + require.NoError(t, err) + + // Write file to workspace + contents := "#!/usr/bin/bash\necho hello, world\n" + err = f.Write(ctx, "file-a", strings.NewReader(contents)) + require.NoError(t, err) + + // Run export + stdout, stderr := RequireSuccessfulRun(t, "workspace", "export", path.Join(tmpdir, "file-a")) + assert.Equal(t, contents, stdout.String()) + assert.Equal(t, "", stderr.String()) +} + func setupWorkspaceImportExportTest(t *testing.T) (context.Context, filer.Filer, string) { t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) diff --git a/libs/cmdio/render.go b/libs/cmdio/render.go index 7b3da190..d641f61d 100644 --- a/libs/cmdio/render.go +++ b/libs/cmdio/render.go @@ -1,6 +1,8 @@ package cmdio import ( + "bytes" + "encoding/base64" "encoding/json" "io" "strings" @@ -93,6 +95,27 @@ func renderTemplate(w io.Writer, tmpl string, v any) error { "pretty_date": func(t time.Time) string { return t.Format("2006-01-02T15:04:05Z") }, + "b64_encode": func(in string) (string, error) { + var out bytes.Buffer + enc := base64.NewEncoder(base64.StdEncoding, &out) + _, err := enc.Write([]byte(in)) + if err != nil { + return "", err + } + err = enc.Close() + if err != nil { + return "", err + } + return out.String(), nil + }, + "b64_decode": func(in string) (string, error) { + dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(in)) + out, err := io.ReadAll(dec) + if err != nil { + return "", err + } + return string(out), nil + }, }).Parse(tmpl) if err != nil { return err From f42279fe47b614910de7df8e786fca407fb4fa71 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 28 Jun 2023 16:23:55 +0200 Subject: [PATCH 10/10] Release v0.200.1 (#532) ## Changes CLI: * Add --absolute flag for ls command ([#508](https://github.com/databricks/cli/pull/508)). * Add dbfs scheme prefix to paths in cp command output ([#516](https://github.com/databricks/cli/pull/516)). * Add provider detection to the repos create command ([#528](https://github.com/databricks/cli/pull/528)). * Added configure-cluster flag for auth login ([#500](https://github.com/databricks/cli/pull/500)). * Added prompts for Databricks profile for auth login command ([#502](https://github.com/databricks/cli/pull/502)). * Allow specifying repo by path for repos commands ([#526](https://github.com/databricks/cli/pull/526)). * Decode contents by default in workspace export command ([#531](https://github.com/databricks/cli/pull/531)). * Fixed jobs create command to only accept JSON payload ([#498](https://github.com/databricks/cli/pull/498)). * Make local files default for fs commands ([#506](https://github.com/databricks/cli/pull/506)). * Remove \r from new line print statments ([#509](https://github.com/databricks/cli/pull/509)). * Remove extra call to filer.Stat in dbfs filer.Read ([#515](https://github.com/databricks/cli/pull/515)). * Update alerts command integration test ([#512](https://github.com/databricks/cli/pull/512)). * Update variable regex to support hyphens ([#503](https://github.com/databricks/cli/pull/503)). Bundles: * Add DATABRICKS_BUNDLE_TMP env variable ([#462](https://github.com/databricks/cli/pull/462)). * Update Terraform provider schema structs ([#504](https://github.com/databricks/cli/pull/504)). Dependencies: * Bump github.com/databricks/databricks-sdk-go from 0.9.1-0.20230614092458-b5bbc1c8dabb to 0.10.0 ([#497](https://github.com/databricks/cli/pull/497)). Internal: * Use direct download for workspace filer read ([#514](https://github.com/databricks/cli/pull/514)). --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7675f7c..2eae4ac1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Version changelog +## 0.200.1 + +CLI: +* Add --absolute flag for ls command ([#508](https://github.com/databricks/cli/pull/508)). +* Add dbfs scheme prefix to paths in cp command output ([#516](https://github.com/databricks/cli/pull/516)). +* Add provider detection to the repos create command ([#528](https://github.com/databricks/cli/pull/528)). +* Added configure-cluster flag for auth login ([#500](https://github.com/databricks/cli/pull/500)). +* Added prompts for Databricks profile for auth login command ([#502](https://github.com/databricks/cli/pull/502)). +* Allow specifying repo by path for repos commands ([#526](https://github.com/databricks/cli/pull/526)). +* Decode contents by default in workspace export command ([#531](https://github.com/databricks/cli/pull/531)). +* Fixed jobs create command to only accept JSON payload ([#498](https://github.com/databricks/cli/pull/498)). +* Make local files default for fs commands ([#506](https://github.com/databricks/cli/pull/506)). +* Remove \r from new line print statments ([#509](https://github.com/databricks/cli/pull/509)). +* Remove extra call to filer.Stat in dbfs filer.Read ([#515](https://github.com/databricks/cli/pull/515)). +* Update alerts command integration test ([#512](https://github.com/databricks/cli/pull/512)). +* Update variable regex to support hyphens ([#503](https://github.com/databricks/cli/pull/503)). + +Bundles: +* Add DATABRICKS_BUNDLE_TMP env variable ([#462](https://github.com/databricks/cli/pull/462)). +* Update Terraform provider schema structs ([#504](https://github.com/databricks/cli/pull/504)). + +Dependencies: +* Bump github.com/databricks/databricks-sdk-go from 0.9.1-0.20230614092458-b5bbc1c8dabb to 0.10.0 ([#497](https://github.com/databricks/cli/pull/497)). + +Internal: +* Use direct download for workspace filer read ([#514](https://github.com/databricks/cli/pull/514)). + ## 0.200.0 This version marks the first version available as public preview.