From 27df4e765c02ef967abc6e1ae2bda2ac3359477e Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 31 May 2023 11:24:20 +0000 Subject: [PATCH 1/5] Implement DBFS filer (#139) Adds a DBFS implementation of the `filer.Filer` interface. The integration tests are reused between the workspace filesystem and DBFS implementations to ensure identical behavior. --- internal/filer_test.go | 225 +++++++++++++++++---------- libs/filer/dbfs_client.go | 199 +++++++++++++++++++++++ libs/filer/filer.go | 8 + libs/filer/workspace_files_client.go | 42 ++++- 4 files changed, 385 insertions(+), 89 deletions(-) create mode 100644 libs/filer/dbfs_client.go diff --git a/internal/filer_test.go b/internal/filer_test.go index 45227b225..6244a5c47 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -1,6 +1,7 @@ package internal import ( + "bytes" "context" "errors" "fmt" @@ -12,6 +13,7 @@ import ( "github.com/databricks/cli/libs/filer" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/files" "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -28,12 +30,98 @@ func (f filerTest) assertContents(ctx context.Context, name string, contents str return } - body, err := io.ReadAll(reader) + var body bytes.Buffer + _, err = io.Copy(&body, reader) if !assert.NoError(f, err) { return } - assert.Equal(f, contents, string(body)) + assert.Equal(f, contents, body.String()) +} + +func runFilerReadWriteTest(t *testing.T, ctx context.Context, f filer.Filer) { + var err error + + // Write should fail because the root path doesn't yet exist. + err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello world`)) + assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{})) + + // Read should fail because the root path doesn't yet exist. + _, err = f.Read(ctx, "/foo/bar") + assert.True(t, errors.As(err, &filer.FileDoesNotExistError{})) + + // Write with CreateParentDirectories flag should succeed. + err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello world`), filer.CreateParentDirectories) + assert.NoError(t, err) + filerTest{t, f}.assertContents(ctx, "/foo/bar", `hello world`) + + // Write should fail because there is an existing file at the specified path. + err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello universe`)) + assert.True(t, errors.As(err, &filer.FileAlreadyExistsError{})) + + // Write with OverwriteIfExists should succeed. + err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello universe`), filer.OverwriteIfExists) + assert.NoError(t, err) + filerTest{t, f}.assertContents(ctx, "/foo/bar", `hello universe`) + + // Delete should fail if the file doesn't exist. + err = f.Delete(ctx, "/doesnt_exist") + assert.True(t, errors.As(err, &filer.FileDoesNotExistError{})) + + // Delete should succeed for file that does exist. + err = f.Delete(ctx, "/foo/bar") + assert.NoError(t, err) +} + +func runFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { + var err error + + // We start with an empty directory. + entries, err := f.ReadDir(ctx, ".") + require.NoError(t, err) + assert.Len(t, entries, 0) + + // Write a file. + err = f.Write(ctx, "/hello.txt", strings.NewReader(`hello world`)) + require.NoError(t, err) + + // Create a directory. + err = f.Mkdir(ctx, "/dir") + require.NoError(t, err) + + // Write a file. + err = f.Write(ctx, "/dir/world.txt", strings.NewReader(`hello world`)) + require.NoError(t, err) + + // Create a nested directory (check that it creates intermediate directories). + err = f.Mkdir(ctx, "/dir/a/b/c") + require.NoError(t, err) + + // Expect an error if the path doesn't exist. + _, err = f.ReadDir(ctx, "/dir/a/b/c/d/e") + assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{}), err) + + // Expect two entries in the root. + entries, err = f.ReadDir(ctx, ".") + require.NoError(t, err) + assert.Len(t, entries, 2) + assert.Equal(t, "dir", entries[0].Name) + assert.Equal(t, "hello.txt", entries[1].Name) + assert.Greater(t, entries[1].ModTime.Unix(), int64(0)) + + // Expect two entries in the directory. + entries, err = f.ReadDir(ctx, "/dir") + require.NoError(t, err) + assert.Len(t, entries, 2) + assert.Equal(t, "a", entries[0].Name) + assert.Equal(t, "world.txt", entries[1].Name) + assert.Greater(t, entries[1].ModTime.Unix(), int64(0)) + + // Expect a single entry in the nested path. + entries, err = f.ReadDir(ctx, "/dir/a/b") + require.NoError(t, err) + assert.Len(t, entries, 1) + assert.Equal(t, "c", entries[0].Name) } func temporaryWorkspaceDir(t *testing.T, w *databricks.WorkspaceClient) string { @@ -41,7 +129,7 @@ func temporaryWorkspaceDir(t *testing.T, w *databricks.WorkspaceClient) string { me, err := w.CurrentUser.Me(ctx) require.NoError(t, err) - path := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("wsfs-files-")) + path := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("integration-test-filer-wsfs-")) // Ensure directory exists, but doesn't exist YET! // Otherwise we could inadvertently remove a directory that already exists on cleanup. @@ -59,7 +147,7 @@ func temporaryWorkspaceDir(t *testing.T, w *databricks.WorkspaceClient) string { if err == nil || apierr.IsMissing(err) { return } - t.Logf("unable to remove temporary workspace path %s: %#v", path, err) + t.Logf("unable to remove temporary workspace directory %s: %#v", path, err) }) return path @@ -85,90 +173,57 @@ func setupWorkspaceFilesTest(t *testing.T) (context.Context, filer.Filer) { } func TestAccFilerWorkspaceFilesReadWrite(t *testing.T) { - var err error - ctx, f := setupWorkspaceFilesTest(t) - - // Write should fail because the root path doesn't yet exist. - err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello world`)) - assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{})) - - // Read should fail because the root path doesn't yet exist. - _, err = f.Read(ctx, "/foo/bar") - assert.True(t, apierr.IsMissing(err)) - - // Write with CreateParentDirectories flag should succeed. - err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello world`), filer.CreateParentDirectories) - assert.NoError(t, err) - filerTest{t, f}.assertContents(ctx, "/foo/bar", `hello world`) - - // Write should fail because there is an existing file at the specified path. - err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello universe`)) - assert.True(t, errors.As(err, &filer.FileAlreadyExistsError{})) - - // Write with OverwriteIfExists should succeed. - err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello universe`), filer.OverwriteIfExists) - assert.NoError(t, err) - filerTest{t, f}.assertContents(ctx, "/foo/bar", `hello universe`) - - // Delete should fail if the file doesn't exist. - err = f.Delete(ctx, "/doesnt_exist") - assert.True(t, apierr.IsMissing(err)) - - // Delete should succeed for file that does exist. - err = f.Delete(ctx, "/foo/bar") - assert.NoError(t, err) + runFilerReadWriteTest(t, ctx, f) } func TestAccFilerWorkspaceFilesReadDir(t *testing.T) { - var err error - ctx, f := setupWorkspaceFilesTest(t) - - // We start with an empty directory. - entries, err := f.ReadDir(ctx, ".") - require.NoError(t, err) - assert.Len(t, entries, 0) - - // Write a file. - err = f.Write(ctx, "/hello.txt", strings.NewReader(`hello world`)) - require.NoError(t, err) - - // Create a directory. - err = f.Mkdir(ctx, "/dir") - require.NoError(t, err) - - // Write a file. - err = f.Write(ctx, "/dir/world.txt", strings.NewReader(`hello world`)) - require.NoError(t, err) - - // Create a nested directory (check that it creates intermediate directories). - err = f.Mkdir(ctx, "/dir/a/b/c") - require.NoError(t, err) - - // Expect an error if the path doesn't exist. - _, err = f.ReadDir(ctx, "/dir/a/b/c/d/e") - assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{})) - - // Expect two entries in the root. - entries, err = f.ReadDir(ctx, ".") - require.NoError(t, err) - assert.Len(t, entries, 2) - assert.Equal(t, "dir", entries[0].Name) - assert.Equal(t, "hello.txt", entries[1].Name) - assert.Greater(t, entries[1].ModTime.Unix(), int64(0)) - - // Expect two entries in the directory. - entries, err = f.ReadDir(ctx, "/dir") - require.NoError(t, err) - assert.Len(t, entries, 2) - assert.Equal(t, "a", entries[0].Name) - assert.Equal(t, "world.txt", entries[1].Name) - assert.Greater(t, entries[1].ModTime.Unix(), int64(0)) - - // Expect a single entry in the nested path. - entries, err = f.ReadDir(ctx, "/dir/a/b") - require.NoError(t, err) - assert.Len(t, entries, 1) - assert.Equal(t, "c", entries[0].Name) + runFilerReadDirTest(t, ctx, f) +} + +func temporaryDbfsDir(t *testing.T, w *databricks.WorkspaceClient) string { + ctx := context.Background() + path := fmt.Sprintf("/tmp/%s", RandomName("integration-test-filer-dbfs-")) + + // This call fails if the path already exists. + t.Logf("mkdir dbfs:%s", path) + err := w.Dbfs.MkdirsByPath(ctx, path) + require.NoError(t, err) + + // Remove test directory on test completion. + t.Cleanup(func() { + t.Logf("rm -rf dbfs:%s", path) + err := w.Dbfs.Delete(ctx, files.Delete{ + Path: path, + Recursive: true, + }) + if err == nil || apierr.IsMissing(err) { + return + } + t.Logf("unable to remove temporary dbfs directory %s: %#v", path, err) + }) + + return path +} + +func setupFilerDbfsTest(t *testing.T) (context.Context, filer.Filer) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + ctx := context.Background() + w := databricks.Must(databricks.NewWorkspaceClient()) + tmpdir := temporaryDbfsDir(t, w) + f, err := filer.NewDbfsClient(w, tmpdir) + require.NoError(t, err) + return ctx, f +} + +func TestAccFilerDbfsReadWrite(t *testing.T) { + ctx, f := setupFilerDbfsTest(t) + runFilerReadWriteTest(t, ctx, f) +} + +func TestAccFilerDbfsReadDir(t *testing.T) { + ctx, f := setupFilerDbfsTest(t) + runFilerReadDirTest(t, ctx, f) } diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go new file mode 100644 index 000000000..2e3242107 --- /dev/null +++ b/libs/filer/dbfs_client.go @@ -0,0 +1,199 @@ +package filer + +import ( + "context" + "errors" + "io" + "net/http" + "path" + "sort" + "time" + + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/files" + "golang.org/x/exp/slices" +) + +// DbfsClient implements the [Filer] interface for the DBFS backend. +type DbfsClient struct { + workspaceClient *databricks.WorkspaceClient + + // File operations will be relative to this path. + root RootPath +} + +func NewDbfsClient(w *databricks.WorkspaceClient, root string) (Filer, error) { + return &DbfsClient{ + workspaceClient: w, + + root: NewRootPath(root), + }, nil +} + +func (w *DbfsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { + absPath, err := w.root.Join(name) + if err != nil { + return err + } + + fileMode := files.FileModeWrite + if slices.Contains(mode, OverwriteIfExists) { + fileMode |= files.FileModeOverwrite + } + + // Issue info call before write because it automatically creates parent directories. + // + // For discussion: we could decide this is actually convenient, remove the call below, + // and apply the same semantics for the WSFS filer. + // + if !slices.Contains(mode, CreateParentDirectories) { + _, err = w.workspaceClient.Dbfs.GetStatusByPath(ctx, path.Dir(absPath)) + if err != nil { + var aerr *apierr.APIError + if !errors.As(err, &aerr) { + return err + } + + // This API returns a 404 if the file doesn't exist. + if aerr.StatusCode == http.StatusNotFound { + if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { + return NoSuchDirectoryError{path.Dir(absPath)} + } + } + + return err + } + } + + handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, fileMode) + if err != nil { + var aerr *apierr.APIError + if !errors.As(err, &aerr) { + return err + } + + // This API returns a 400 if the file already exists. + if aerr.StatusCode == http.StatusBadRequest { + if aerr.ErrorCode == "RESOURCE_ALREADY_EXISTS" { + return FileAlreadyExistsError{absPath} + } + } + + return err + } + + _, err = io.Copy(handle, reader) + cerr := handle.Close() + if err == nil { + err = cerr + } + + return err +} + +func (w *DbfsClient) Read(ctx context.Context, name string) (io.Reader, error) { + absPath, err := w.root.Join(name) + if err != nil { + return nil, err + } + + handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead) + if err != nil { + var aerr *apierr.APIError + if !errors.As(err, &aerr) { + return nil, err + } + + // This API returns a 404 if the file doesn't exist. + if aerr.StatusCode == http.StatusNotFound { + if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { + return nil, FileDoesNotExistError{absPath} + } + } + + return nil, err + } + + return handle, nil +} + +func (w *DbfsClient) Delete(ctx context.Context, name string) error { + absPath, err := w.root.Join(name) + if err != nil { + return err + } + + // Issue info call before delete because delete succeeds if the specified path doesn't exist. + // + // For discussion: we could decide this is actually convenient, remove the call below, + // and apply the same semantics for the WSFS filer. + // + _, err = w.workspaceClient.Dbfs.GetStatusByPath(ctx, absPath) + if err != nil { + var aerr *apierr.APIError + if !errors.As(err, &aerr) { + return err + } + + // This API returns a 404 if the file doesn't exist. + if aerr.StatusCode == http.StatusNotFound { + if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { + return FileDoesNotExistError{absPath} + } + } + + return err + } + + return w.workspaceClient.Dbfs.Delete(ctx, files.Delete{ + Path: absPath, + Recursive: false, + }) +} + +func (w *DbfsClient) ReadDir(ctx context.Context, name string) ([]FileInfo, error) { + absPath, err := w.root.Join(name) + if err != nil { + return nil, err + } + + res, err := w.workspaceClient.Dbfs.ListByPath(ctx, absPath) + if err != nil { + var aerr *apierr.APIError + if !errors.As(err, &aerr) { + return nil, err + } + + // This API returns a 404 if the file doesn't exist. + if aerr.StatusCode == http.StatusNotFound { + if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { + return nil, NoSuchDirectoryError{absPath} + } + } + + return nil, err + } + + info := make([]FileInfo, len(res.Files)) + for i, v := range res.Files { + info[i] = FileInfo{ + Name: path.Base(v.Path), + Size: v.FileSize, + ModTime: time.UnixMilli(v.ModificationTime), + } + } + + // Sort by name for parity with os.ReadDir. + sort.Slice(info, func(i, j int) bool { return info[i].Name < info[j].Name }) + return info, nil +} + +func (w *DbfsClient) Mkdir(ctx context.Context, name string) error { + dirPath, err := w.root.Join(name) + if err != nil { + return err + } + + return w.workspaceClient.Dbfs.MkdirsByPath(ctx, dirPath) +} diff --git a/libs/filer/filer.go b/libs/filer/filer.go index 841d1b831..88de7e466 100644 --- a/libs/filer/filer.go +++ b/libs/filer/filer.go @@ -38,6 +38,14 @@ func (err FileAlreadyExistsError) Error() string { return fmt.Sprintf("file already exists: %s", err.path) } +type FileDoesNotExistError struct { + path string +} + +func (err FileDoesNotExistError) Error() string { + return fmt.Sprintf("file does not exist: %s", err.path) +} + type NoSuchDirectoryError struct { path string } diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 33fefc3d2..df2c0bdbb 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -68,7 +68,12 @@ func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io err = w.apiClient.Do(ctx, http.MethodPost, urlPath, body, nil) - // If we got an API error we deal with it below. + // Return early on success. + if err == nil { + return nil + } + + // Special handling of this error only if it is an API error. var aerr *apierr.APIError if !errors.As(err, &aerr) { return err @@ -112,11 +117,23 @@ func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.Reader var res []byte err = w.apiClient.Do(ctx, http.MethodGet, urlPath, nil, &res) - if err != nil { + + // Return early on success. + if err == nil { + return bytes.NewReader(res), nil + } + + // Special handling of this error only if it is an API error. + var aerr *apierr.APIError + if !errors.As(err, &aerr) { return nil, err } - return bytes.NewReader(res), nil + if aerr.StatusCode == http.StatusNotFound { + return nil, FileDoesNotExistError{absPath} + } + + return nil, err } func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string) error { @@ -125,10 +142,27 @@ func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string) error { return err } - return w.workspaceClient.Workspace.Delete(ctx, workspace.Delete{ + err = w.workspaceClient.Workspace.Delete(ctx, workspace.Delete{ Path: absPath, Recursive: false, }) + + // Return early on success. + if err == nil { + return nil + } + + // Special handling of this error only if it is an API error. + var aerr *apierr.APIError + if !errors.As(err, &aerr) { + return err + } + + if aerr.StatusCode == http.StatusNotFound { + return FileDoesNotExistError{absPath} + } + + return err } func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]FileInfo, error) { From 42cd8daee0d864141339cc4d1f93ce7a3783099f Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 31 May 2023 12:22:26 +0000 Subject: [PATCH 2/5] Make filer.Filer return fs.DirEntry from ReadDir (#415) ## Changes This allows for compatibility with the stdlib functions in io/fs. ## Tests Integration tests pass. --- internal/filer_test.go | 25 ++++++++---- libs/filer/dbfs_client.go | 59 +++++++++++++++++++++++---- libs/filer/filer.go | 20 +-------- libs/filer/workspace_files_client.go | 61 ++++++++++++++++++++++++---- 4 files changed, 123 insertions(+), 42 deletions(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index 6244a5c47..29d08b3eb 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "io/fs" "net/http" "strings" "testing" @@ -75,6 +76,7 @@ func runFilerReadWriteTest(t *testing.T, ctx context.Context, f filer.Filer) { func runFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { var err error + var info fs.FileInfo // We start with an empty directory. entries, err := f.ReadDir(ctx, ".") @@ -105,23 +107,32 @@ func runFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { entries, err = f.ReadDir(ctx, ".") require.NoError(t, err) assert.Len(t, entries, 2) - assert.Equal(t, "dir", entries[0].Name) - assert.Equal(t, "hello.txt", entries[1].Name) - assert.Greater(t, entries[1].ModTime.Unix(), int64(0)) + assert.Equal(t, "dir", entries[0].Name()) + assert.True(t, entries[0].IsDir()) + assert.Equal(t, "hello.txt", entries[1].Name()) + assert.False(t, entries[1].IsDir()) + info, err = entries[1].Info() + require.NoError(t, err) + assert.Greater(t, info.ModTime().Unix(), int64(0)) // Expect two entries in the directory. entries, err = f.ReadDir(ctx, "/dir") require.NoError(t, err) assert.Len(t, entries, 2) - assert.Equal(t, "a", entries[0].Name) - assert.Equal(t, "world.txt", entries[1].Name) - assert.Greater(t, entries[1].ModTime.Unix(), int64(0)) + assert.Equal(t, "a", entries[0].Name()) + assert.True(t, entries[0].IsDir()) + assert.Equal(t, "world.txt", entries[1].Name()) + assert.False(t, entries[1].IsDir()) + info, err = entries[1].Info() + require.NoError(t, err) + assert.Greater(t, info.ModTime().Unix(), int64(0)) // Expect a single entry in the nested path. entries, err = f.ReadDir(ctx, "/dir/a/b") require.NoError(t, err) assert.Len(t, entries, 1) - assert.Equal(t, "c", entries[0].Name) + assert.Equal(t, "c", entries[0].Name()) + assert.True(t, entries[0].IsDir()) } func temporaryWorkspaceDir(t *testing.T, w *databricks.WorkspaceClient) string { diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index 2e3242107..7a3084ac6 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -4,6 +4,7 @@ import ( "context" "errors" "io" + "io/fs" "net/http" "path" "sort" @@ -15,6 +16,52 @@ import ( "golang.org/x/exp/slices" ) +// Type that implements fs.DirEntry for DBFS. +type dbfsDirEntry struct { + dbfsFileInfo +} + +func (entry dbfsDirEntry) Type() fs.FileMode { + typ := fs.ModePerm + if entry.fi.IsDir { + typ |= fs.ModeDir + } + return typ +} + +func (entry dbfsDirEntry) Info() (fs.FileInfo, error) { + return entry.dbfsFileInfo, nil +} + +// Type that implements fs.FileInfo for DBFS. +type dbfsFileInfo struct { + fi files.FileInfo +} + +func (info dbfsFileInfo) Name() string { + return path.Base(info.fi.Path) +} + +func (info dbfsFileInfo) Size() int64 { + return info.fi.FileSize +} + +func (info dbfsFileInfo) Mode() fs.FileMode { + return fs.ModePerm +} + +func (info dbfsFileInfo) ModTime() time.Time { + return time.UnixMilli(info.fi.ModificationTime) +} + +func (info dbfsFileInfo) IsDir() bool { + return info.fi.IsDir +} + +func (info dbfsFileInfo) Sys() any { + return nil +} + // DbfsClient implements the [Filer] interface for the DBFS backend. type DbfsClient struct { workspaceClient *databricks.WorkspaceClient @@ -152,7 +199,7 @@ func (w *DbfsClient) Delete(ctx context.Context, name string) error { }) } -func (w *DbfsClient) ReadDir(ctx context.Context, name string) ([]FileInfo, error) { +func (w *DbfsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err @@ -175,17 +222,13 @@ func (w *DbfsClient) ReadDir(ctx context.Context, name string) ([]FileInfo, erro return nil, err } - info := make([]FileInfo, len(res.Files)) + info := make([]fs.DirEntry, len(res.Files)) for i, v := range res.Files { - info[i] = FileInfo{ - Name: path.Base(v.Path), - Size: v.FileSize, - ModTime: time.UnixMilli(v.ModificationTime), - } + info[i] = dbfsDirEntry{dbfsFileInfo: dbfsFileInfo{fi: v}} } // Sort by name for parity with os.ReadDir. - sort.Slice(info, func(i, j int) bool { return info[i].Name < info[j].Name }) + sort.Slice(info, func(i, j int) bool { return info[i].Name() < info[j].Name() }) return info, nil } diff --git a/libs/filer/filer.go b/libs/filer/filer.go index 88de7e466..61412b97e 100644 --- a/libs/filer/filer.go +++ b/libs/filer/filer.go @@ -4,7 +4,7 @@ import ( "context" "fmt" "io" - "time" + "io/fs" ) type WriteMode int @@ -14,22 +14,6 @@ const ( CreateParentDirectories = iota << 1 ) -// FileInfo abstracts over file information from different file systems. -// Inspired by https://pkg.go.dev/io/fs#FileInfo. -type FileInfo struct { - // The type of the file in workspace. - Type string - - // Base name. - Name string - - // Size in bytes. - Size int64 - - // Modification time. - ModTime time.Time -} - type FileAlreadyExistsError struct { path string } @@ -68,7 +52,7 @@ type Filer interface { Delete(ctx context.Context, path string) error // Return contents of directory at `path`. - ReadDir(ctx context.Context, path string) ([]FileInfo, error) + ReadDir(ctx context.Context, path string) ([]fs.DirEntry, error) // Creates directory at `path`, creating any intermediate directories as required. Mkdir(ctx context.Context, path string) error diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index df2c0bdbb..b9f0f3db3 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "io" + "io/fs" "net/http" "net/url" "path" @@ -20,6 +21,53 @@ import ( "golang.org/x/exp/slices" ) +// Type that implements fs.DirEntry for WSFS. +type wsfsDirEntry struct { + wsfsFileInfo +} + +func (entry wsfsDirEntry) Type() fs.FileMode { + return entry.wsfsFileInfo.Mode() +} + +func (entry wsfsDirEntry) Info() (fs.FileInfo, error) { + return entry.wsfsFileInfo, nil +} + +// Type that implements fs.FileInfo for WSFS. +type wsfsFileInfo struct { + oi workspace.ObjectInfo +} + +func (info wsfsFileInfo) Name() string { + return path.Base(info.oi.Path) +} + +func (info wsfsFileInfo) Size() int64 { + return info.oi.Size +} + +func (info wsfsFileInfo) Mode() fs.FileMode { + switch info.oi.ObjectType { + case workspace.ObjectTypeDirectory: + return fs.ModeDir + default: + return fs.ModePerm + } +} + +func (info wsfsFileInfo) ModTime() time.Time { + return time.UnixMilli(info.oi.ModifiedAt) +} + +func (info wsfsFileInfo) IsDir() bool { + return info.oi.ObjectType == workspace.ObjectTypeDirectory +} + +func (info wsfsFileInfo) Sys() any { + return nil +} + // WorkspaceFilesClient implements the files-in-workspace API. // NOTE: This API is available for files under /Repos if a workspace has files-in-repos enabled. @@ -165,7 +213,7 @@ func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string) error { return err } -func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]FileInfo, error) { +func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err @@ -189,18 +237,13 @@ func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]File return nil, err } - info := make([]FileInfo, len(objects)) + info := make([]fs.DirEntry, len(objects)) for i, v := range objects { - info[i] = FileInfo{ - Type: string(v.ObjectType), - Name: path.Base(v.Path), - Size: v.Size, - ModTime: time.UnixMilli(v.ModifiedAt), - } + info[i] = wsfsDirEntry{wsfsFileInfo{oi: v}} } // Sort by name for parity with os.ReadDir. - sort.Slice(info, func(i, j int) bool { return info[i].Name < info[j].Name }) + sort.Slice(info, func(i, j int) bool { return info[i].Name() < info[j].Name() }) return info, nil } From 349e2aff40110353a7a6af70a57b76fb75799df0 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 31 May 2023 18:47:00 +0000 Subject: [PATCH 3/5] Allow equivalence checking of filer errors to fs errors (#416) ## Changes The pattern `errors.Is(err, fs.ErrNotExist)` is common to check for an error type. Errors can implement `Is(error) bool` with a custom equivalence checker. ## Tests New asserts all pass in the integration test. --- bundle/deploy/terraform/state_pull.go | 5 +++-- internal/filer_test.go | 5 +++++ libs/filer/filer.go | 12 ++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/bundle/deploy/terraform/state_pull.go b/bundle/deploy/terraform/state_pull.go index cfdeef808..e5a42d89b 100644 --- a/bundle/deploy/terraform/state_pull.go +++ b/bundle/deploy/terraform/state_pull.go @@ -2,14 +2,15 @@ package terraform import ( "context" + "errors" "io" + "io/fs" "os" "path/filepath" "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/filer" "github.com/databricks/cli/libs/log" - "github.com/databricks/databricks-sdk-go/apierr" ) type statePull struct{} @@ -34,7 +35,7 @@ func (l *statePull) Apply(ctx context.Context, b *bundle.Bundle) error { remote, err := f.Read(ctx, TerraformStateFileName) if err != nil { // On first deploy this state file doesn't yet exist. - if apierr.IsMissing(err) { + if errors.Is(err, fs.ErrNotExist) { log.Infof(ctx, "Remote state file does not exist") return nil } diff --git a/internal/filer_test.go b/internal/filer_test.go index 29d08b3eb..b8fd63657 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -46,10 +46,12 @@ func runFilerReadWriteTest(t *testing.T, ctx context.Context, f filer.Filer) { // Write should fail because the root path doesn't yet exist. err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello world`)) assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{})) + assert.True(t, errors.Is(err, fs.ErrNotExist)) // Read should fail because the root path doesn't yet exist. _, err = f.Read(ctx, "/foo/bar") assert.True(t, errors.As(err, &filer.FileDoesNotExistError{})) + assert.True(t, errors.Is(err, fs.ErrNotExist)) // Write with CreateParentDirectories flag should succeed. err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello world`), filer.CreateParentDirectories) @@ -59,6 +61,7 @@ func runFilerReadWriteTest(t *testing.T, ctx context.Context, f filer.Filer) { // Write should fail because there is an existing file at the specified path. err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello universe`)) assert.True(t, errors.As(err, &filer.FileAlreadyExistsError{})) + assert.True(t, errors.Is(err, fs.ErrExist)) // Write with OverwriteIfExists should succeed. err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello universe`), filer.OverwriteIfExists) @@ -68,6 +71,7 @@ func runFilerReadWriteTest(t *testing.T, ctx context.Context, f filer.Filer) { // Delete should fail if the file doesn't exist. err = f.Delete(ctx, "/doesnt_exist") assert.True(t, errors.As(err, &filer.FileDoesNotExistError{})) + assert.True(t, errors.Is(err, fs.ErrNotExist)) // Delete should succeed for file that does exist. err = f.Delete(ctx, "/foo/bar") @@ -102,6 +106,7 @@ func runFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { // Expect an error if the path doesn't exist. _, err = f.ReadDir(ctx, "/dir/a/b/c/d/e") assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{}), err) + assert.True(t, errors.Is(err, fs.ErrNotExist)) // Expect two entries in the root. entries, err = f.ReadDir(ctx, ".") diff --git a/libs/filer/filer.go b/libs/filer/filer.go index 61412b97e..e54efb96c 100644 --- a/libs/filer/filer.go +++ b/libs/filer/filer.go @@ -22,10 +22,18 @@ func (err FileAlreadyExistsError) Error() string { return fmt.Sprintf("file already exists: %s", err.path) } +func (err FileAlreadyExistsError) Is(other error) bool { + return other == fs.ErrExist +} + type FileDoesNotExistError struct { path string } +func (err FileDoesNotExistError) Is(other error) bool { + return other == fs.ErrNotExist +} + func (err FileDoesNotExistError) Error() string { return fmt.Sprintf("file does not exist: %s", err.path) } @@ -38,6 +46,10 @@ func (err NoSuchDirectoryError) Error() string { return fmt.Sprintf("no such directory: %s", err.path) } +func (err NoSuchDirectoryError) Is(other error) bool { + return other == fs.ErrNotExist +} + // Filer is used to access files in a workspace. // It has implementations for accessing files in WSFS and in DBFS. type Filer interface { From 9ae86e3ae3ea3af29efc2f9573a2be868d5ff9ad Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 1 Jun 2023 07:38:03 +0000 Subject: [PATCH 4/5] Fix locker integration test (#417) ## Changes The failure was caused by swapping out the error types returned by the filer in #139. ## Tests Integration tests pass again. --- internal/locker_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/locker_test.go b/internal/locker_test.go index b7cfc1ea1..bc26bdaa4 100644 --- a/internal/locker_test.go +++ b/internal/locker_test.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "io/fs" "math/rand" "sync" "testing" @@ -142,7 +143,7 @@ func TestAccLock(t *testing.T) { err = lockers[indexOfActiveLocker].Unlock(ctx) assert.NoError(t, err) remoteLocker, err = locker.GetActiveLockState(ctx) - assert.ErrorContains(t, err, "File not found.", "remote lock file not deleted on unlock") + assert.ErrorIs(t, err, fs.ErrNotExist, "remote lock file not deleted on unlock") assert.Nil(t, remoteLocker) assert.False(t, lockers[indexOfActiveLocker].Active) From d44d8ff2dcdc0353c77ff71a2178923d0d756d16 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Jun 2023 09:45:54 +0200 Subject: [PATCH 5/5] Bump github.com/mattn/go-isatty from 0.0.18 to 0.0.19 (#412) --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index eb9460be2..14a42762d 100644 --- a/go.mod +++ b/go.mod @@ -14,7 +14,7 @@ require ( github.com/hashicorp/terraform-json v0.16.0 // MPL 2.0 github.com/imdario/mergo v0.3.13 // BSD-3-Clause github.com/manifoldco/promptui v0.9.0 // BSD-3-Clause - github.com/mattn/go-isatty v0.0.18 // MIT + github.com/mattn/go-isatty v0.0.19 // MIT github.com/nwidger/jsoncolor v0.3.2 // MIT github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // BSD-2-Clause github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 // MIT diff --git a/go.sum b/go.sum index 034bbc003..fe027d6fb 100644 --- a/go.sum +++ b/go.sum @@ -118,8 +118,8 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= -github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/nwidger/jsoncolor v0.3.2 h1:rVJJlwAWDJShnbTYOQ5RM7yTA20INyKXlJ/fg4JMhHQ= github.com/nwidger/jsoncolor v0.3.2/go.mod h1:Cs34umxLbJvgBMnVNVqhji9BhoT/N/KinHqZptQ7cf4= github.com/pjbgf/sha1cd v0.3.0 h1:4D5XXmUUBUl/xQ6IjCkEAbqXskkq/4O7LmGn0AqMDs4=