databricks-cli/libs/filer/dbfs_client.go

311 lines
6.9 KiB
Go
Raw Normal View History

package filer
import (
"context"
"errors"
"io"
"io/fs"
"net/http"
"path"
Upgraded Go version to 1.21 (#664) ## Changes Upgraded Go version to 1.21 Upgraded to use `slices` and `slog` from core instead of experimental. Still use `exp/maps` as our code relies on `maps.Keys` which is not part of core package and therefore refactoring required. ### Tests Integration tests passed ``` [DEBUG] Test execution command: /opt/homebrew/opt/go@1.21/bin/go test ./... -json -timeout 1h -run ^TestAcc [DEBUG] Test execution directory: /Users/andrew.nester/cli 2023/08/15 13:20:51 [INFO] ✅ TestAccAlertsCreateErrWhenNoArguments (2.150s) 2023/08/15 13:20:52 [INFO] ✅ TestAccApiGet (0.580s) 2023/08/15 13:20:53 [INFO] ✅ TestAccClustersList (0.900s) 2023/08/15 13:20:54 [INFO] ✅ TestAccClustersGet (0.870s) 2023/08/15 13:21:06 [INFO] ✅ TestAccFilerWorkspaceFilesReadWrite (11.980s) 2023/08/15 13:21:13 [INFO] ✅ TestAccFilerWorkspaceFilesReadDir (7.060s) 2023/08/15 13:21:25 [INFO] ✅ TestAccFilerDbfsReadWrite (12.810s) 2023/08/15 13:21:33 [INFO] ✅ TestAccFilerDbfsReadDir (7.380s) 2023/08/15 13:21:41 [INFO] ✅ TestAccFilerWorkspaceNotebookConflict (7.760s) 2023/08/15 13:21:49 [INFO] ✅ TestAccFilerWorkspaceNotebookWithOverwriteFlag (8.660s) 2023/08/15 13:21:49 [INFO] ✅ TestAccFilerLocalReadWrite (0.020s) 2023/08/15 13:21:49 [INFO] ✅ TestAccFilerLocalReadDir (0.010s) 2023/08/15 13:21:52 [INFO] ✅ TestAccFsCatForDbfs (3.190s) 2023/08/15 13:21:53 [INFO] ✅ TestAccFsCatForDbfsOnNonExistentFile (0.890s) 2023/08/15 13:21:54 [INFO] ✅ TestAccFsCatForDbfsInvalidScheme (0.600s) 2023/08/15 13:21:57 [INFO] ✅ TestAccFsCatDoesNotSupportOutputModeJson (2.960s) 2023/08/15 13:22:28 [INFO] ✅ TestAccFsCpDir (31.480s) 2023/08/15 13:22:43 [INFO] ✅ TestAccFsCpFileToFile (14.530s) 2023/08/15 13:22:58 [INFO] ✅ TestAccFsCpFileToDir (14.610s) 2023/08/15 13:23:29 [INFO] ✅ TestAccFsCpDirToDirFileNotOverwritten (31.810s) 2023/08/15 13:23:47 [INFO] ✅ TestAccFsCpFileToDirFileNotOverwritten (17.500s) 2023/08/15 13:24:04 [INFO] ✅ TestAccFsCpFileToFileFileNotOverwritten (17.260s) 2023/08/15 13:24:37 [INFO] ✅ TestAccFsCpDirToDirWithOverwriteFlag (32.690s) 2023/08/15 13:24:56 [INFO] ✅ TestAccFsCpFileToFileWithOverwriteFlag (19.290s) 2023/08/15 13:25:15 [INFO] ✅ TestAccFsCpFileToDirWithOverwriteFlag (19.230s) 2023/08/15 13:25:17 [INFO] ✅ TestAccFsCpErrorsWhenSourceIsDirWithoutRecursiveFlag (2.010s) 2023/08/15 13:25:18 [INFO] ✅ TestAccFsCpErrorsOnInvalidScheme (0.610s) 2023/08/15 13:25:33 [INFO] ✅ TestAccFsCpSourceIsDirectoryButTargetIsFile (14.900s) 2023/08/15 13:25:37 [INFO] ✅ TestAccFsLsForDbfs (3.770s) 2023/08/15 13:25:41 [INFO] ✅ TestAccFsLsForDbfsWithAbsolutePaths (4.160s) 2023/08/15 13:25:44 [INFO] ✅ TestAccFsLsForDbfsOnFile (2.990s) 2023/08/15 13:25:46 [INFO] ✅ TestAccFsLsForDbfsOnEmptyDir (1.870s) 2023/08/15 13:25:46 [INFO] ✅ TestAccFsLsForDbfsForNonexistingDir (0.850s) 2023/08/15 13:25:47 [INFO] ✅ TestAccFsLsWithoutScheme (0.560s) 2023/08/15 13:25:49 [INFO] ✅ TestAccFsMkdirCreatesDirectory (2.310s) 2023/08/15 13:25:52 [INFO] ✅ TestAccFsMkdirCreatesMultipleDirectories (2.920s) 2023/08/15 13:25:55 [INFO] ✅ TestAccFsMkdirWhenDirectoryAlreadyExists (2.320s) 2023/08/15 13:25:57 [INFO] ✅ TestAccFsMkdirWhenFileExistsAtPath (2.820s) 2023/08/15 13:26:01 [INFO] ✅ TestAccFsRmForFile (4.030s) 2023/08/15 13:26:05 [INFO] ✅ TestAccFsRmForEmptyDirectory (3.530s) 2023/08/15 13:26:08 [INFO] ✅ TestAccFsRmForNonEmptyDirectory (3.190s) 2023/08/15 13:26:09 [INFO] ✅ TestAccFsRmForNonExistentFile (0.830s) 2023/08/15 13:26:13 [INFO] ✅ TestAccFsRmForNonEmptyDirectoryWithRecursiveFlag (3.580s) 2023/08/15 13:26:13 [INFO] ✅ TestAccGitClone (0.800s) 2023/08/15 13:26:14 [INFO] ✅ TestAccGitCloneWithOnlyRepoNameOnAlternateBranch (0.790s) 2023/08/15 13:26:15 [INFO] ✅ TestAccGitCloneErrorsWhenRepositoryDoesNotExist (0.540s) 2023/08/15 13:26:23 [INFO] ✅ TestAccLock (8.630s) 2023/08/15 13:26:27 [INFO] ✅ TestAccLockUnlockWithoutAllowsLockFileNotExist (3.490s) 2023/08/15 13:26:30 [INFO] ✅ TestAccLockUnlockWithAllowsLockFileNotExist (3.130s) 2023/08/15 13:26:39 [INFO] ✅ TestAccSyncFullFileSync (9.370s) 2023/08/15 13:26:50 [INFO] ✅ TestAccSyncIncrementalFileSync (10.390s) 2023/08/15 13:27:00 [INFO] ✅ TestAccSyncNestedFolderSync (10.680s) 2023/08/15 13:27:11 [INFO] ✅ TestAccSyncNestedFolderDoesntFailOnNonEmptyDirectory (10.970s) 2023/08/15 13:27:22 [INFO] ✅ TestAccSyncNestedSpacePlusAndHashAreEscapedSync (10.930s) 2023/08/15 13:27:29 [INFO] ✅ TestAccSyncIncrementalFileOverwritesFolder (7.020s) 2023/08/15 13:27:37 [INFO] ✅ TestAccSyncIncrementalSyncPythonNotebookToFile (7.380s) 2023/08/15 13:27:43 [INFO] ✅ TestAccSyncIncrementalSyncFileToPythonNotebook (6.050s) 2023/08/15 13:27:48 [INFO] ✅ TestAccSyncIncrementalSyncPythonNotebookDelete (5.390s) 2023/08/15 13:27:51 [INFO] ✅ TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist (2.570s) 2023/08/15 13:27:56 [INFO] ✅ TestAccSyncEnsureRemotePathIsUsableIfRepoExists (5.540s) 2023/08/15 13:27:58 [INFO] ✅ TestAccSyncEnsureRemotePathIsUsableInWorkspace (1.840s) 2023/08/15 13:27:59 [INFO] ✅ TestAccWorkspaceList (0.790s) 2023/08/15 13:28:08 [INFO] ✅ TestAccExportDir (8.860s) 2023/08/15 13:28:11 [INFO] ✅ TestAccExportDirDoesNotOverwrite (3.090s) 2023/08/15 13:28:14 [INFO] ✅ TestAccExportDirWithOverwriteFlag (3.500s) 2023/08/15 13:28:23 [INFO] ✅ TestAccImportDir (8.330s) 2023/08/15 13:28:34 [INFO] ✅ TestAccImportDirDoesNotOverwrite (10.970s) 2023/08/15 13:28:44 [INFO] ✅ TestAccImportDirWithOverwriteFlag (10.130s) 2023/08/15 13:28:44 [INFO] ✅ 68/68 passed, 0 failed, 3 skipped ```
2023-08-15 13:50:40 +00:00
"slices"
"sort"
"strings"
"time"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/service/files"
)
// Type that implements fs.DirEntry for DBFS.
type dbfsDirEntry struct {
dbfsFileInfo
}
func (entry dbfsDirEntry) Type() fs.FileMode {
return entry.Mode()
}
func (entry dbfsDirEntry) Info() (fs.FileInfo, error) {
return entry.dbfsFileInfo, nil
}
// Type that implements fs.FileInfo for DBFS.
type dbfsFileInfo struct {
fi files.FileInfo
}
func (info dbfsFileInfo) Name() string {
return path.Base(info.fi.Path)
}
func (info dbfsFileInfo) Size() int64 {
return info.fi.FileSize
}
func (info dbfsFileInfo) Mode() fs.FileMode {
mode := fs.ModePerm
if info.fi.IsDir {
mode |= fs.ModeDir
}
return mode
}
func (info dbfsFileInfo) ModTime() time.Time {
return time.UnixMilli(info.fi.ModificationTime)
}
func (info dbfsFileInfo) IsDir() bool {
return info.fi.IsDir
}
func (info dbfsFileInfo) Sys() any {
return info.fi
}
// DbfsClient implements the [Filer] interface for the DBFS backend.
type DbfsClient struct {
workspaceClient *databricks.WorkspaceClient
// File operations will be relative to this path.
root WorkspaceRootPath
}
func NewDbfsClient(w *databricks.WorkspaceClient, root string) (Filer, error) {
return &DbfsClient{
workspaceClient: w,
root: NewWorkspaceRootPath(root),
}, nil
}
func (w *DbfsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error {
absPath, err := w.root.Join(name)
if err != nil {
return err
}
fileMode := files.FileModeWrite
if slices.Contains(mode, OverwriteIfExists) {
fileMode |= files.FileModeOverwrite
}
// Issue info call before write because it automatically creates parent directories.
//
// For discussion: we could decide this is actually convenient, remove the call below,
// and apply the same semantics for the WSFS filer.
//
if !slices.Contains(mode, CreateParentDirectories) {
_, err = w.workspaceClient.Dbfs.GetStatusByPath(ctx, path.Dir(absPath))
if err != nil {
var aerr *apierr.APIError
if !errors.As(err, &aerr) {
return err
}
// This API returns a 404 if the file doesn't exist.
if aerr.StatusCode == http.StatusNotFound {
if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
return NoSuchDirectoryError{path.Dir(absPath)}
}
}
return err
}
}
handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, fileMode)
if err != nil {
var aerr *apierr.APIError
if !errors.As(err, &aerr) {
return err
}
// This API returns a 400 if the file already exists.
if aerr.StatusCode == http.StatusBadRequest {
if aerr.ErrorCode == "RESOURCE_ALREADY_EXISTS" {
return FileAlreadyExistsError{absPath}
}
}
return err
}
_, err = io.Copy(handle, reader)
cerr := handle.Close()
if err == nil {
err = cerr
}
return err
}
func (w *DbfsClient) Read(ctx context.Context, name string) (io.ReadCloser, error) {
absPath, err := w.root.Join(name)
if err != nil {
return nil, err
}
handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead)
if err != nil {
// Return error if file is a directory
if strings.Contains(err.Error(), "cannot open directory for reading") {
return nil, NotAFile{absPath}
}
var aerr *apierr.APIError
if !errors.As(err, &aerr) {
return nil, err
}
// This API returns a 404 if the file doesn't exist.
if aerr.StatusCode == http.StatusNotFound {
if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
return nil, FileDoesNotExistError{absPath}
}
}
return nil, err
}
// A DBFS handle open for reading does not need to be closed.
return io.NopCloser(handle), nil
}
func (w *DbfsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error {
absPath, err := w.root.Join(name)
if err != nil {
return err
}
// Illegal to delete the root path.
if absPath == w.root.rootPath {
return CannotDeleteRootError{}
}
// Issue info call before delete because delete succeeds if the specified path doesn't exist.
//
// For discussion: we could decide this is actually convenient, remove the call below,
// and apply the same semantics for the WSFS filer.
//
_, err = w.workspaceClient.Dbfs.GetStatusByPath(ctx, absPath)
if err != nil {
var aerr *apierr.APIError
if !errors.As(err, &aerr) {
return err
}
// This API returns a 404 if the file doesn't exist.
if aerr.StatusCode == http.StatusNotFound {
if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
return FileDoesNotExistError{absPath}
}
}
return err
}
recursive := false
if slices.Contains(mode, DeleteRecursively) {
recursive = true
}
err = w.workspaceClient.Dbfs.Delete(ctx, files.Delete{
Path: absPath,
Recursive: recursive,
})
// Return early on success.
if err == nil {
return nil
}
// Special handling of this error only if it is an API error.
var aerr *apierr.APIError
if !errors.As(err, &aerr) {
return err
}
switch aerr.StatusCode {
case http.StatusBadRequest:
// Anecdotally, this error is returned when attempting to delete a non-empty directory.
if aerr.ErrorCode == "IO_ERROR" {
return DirectoryNotEmptyError{absPath}
}
}
return err
}
func (w *DbfsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) {
absPath, err := w.root.Join(name)
if err != nil {
return nil, err
}
res, err := w.workspaceClient.Dbfs.ListByPath(ctx, absPath)
if err != nil {
var aerr *apierr.APIError
if !errors.As(err, &aerr) {
return nil, err
}
// This API returns a 404 if the file doesn't exist.
if aerr.StatusCode == http.StatusNotFound {
if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
return nil, NoSuchDirectoryError{absPath}
}
}
return nil, err
}
if len(res.Files) == 1 && res.Files[0].Path == absPath {
return nil, NotADirectory{absPath}
}
info := make([]fs.DirEntry, len(res.Files))
for i, v := range res.Files {
info[i] = dbfsDirEntry{dbfsFileInfo: dbfsFileInfo{fi: v}}
}
// Sort by name for parity with os.ReadDir.
sort.Slice(info, func(i, j int) bool { return info[i].Name() < info[j].Name() })
return info, nil
}
func (w *DbfsClient) Mkdir(ctx context.Context, name string) error {
dirPath, err := w.root.Join(name)
if err != nil {
return err
}
return w.workspaceClient.Dbfs.MkdirsByPath(ctx, dirPath)
}
func (w *DbfsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) {
absPath, err := w.root.Join(name)
if err != nil {
return nil, err
}
info, err := w.workspaceClient.Dbfs.GetStatusByPath(ctx, absPath)
if err != nil {
var aerr *apierr.APIError
if !errors.As(err, &aerr) {
return nil, err
}
// This API returns a 404 if the file doesn't exist.
if aerr.StatusCode == http.StatusNotFound {
if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
return nil, FileDoesNotExistError{absPath}
}
}
return nil, err
}
return dbfsFileInfo{*info}, nil
}