package filer import ( "context" "errors" "io" "net/http" "path" "sort" "time" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/service/files" "golang.org/x/exp/slices" ) // DbfsClient implements the [Filer] interface for the DBFS backend. type DbfsClient struct { workspaceClient *databricks.WorkspaceClient // File operations will be relative to this path. root RootPath } func NewDbfsClient(w *databricks.WorkspaceClient, root string) (Filer, error) { return &DbfsClient{ workspaceClient: w, root: NewRootPath(root), }, nil } func (w *DbfsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { absPath, err := w.root.Join(name) if err != nil { return err } fileMode := files.FileModeWrite if slices.Contains(mode, OverwriteIfExists) { fileMode |= files.FileModeOverwrite } // Issue info call before write because it automatically creates parent directories. // // For discussion: we could decide this is actually convenient, remove the call below, // and apply the same semantics for the WSFS filer. // if !slices.Contains(mode, CreateParentDirectories) { _, err = w.workspaceClient.Dbfs.GetStatusByPath(ctx, path.Dir(absPath)) if err != nil { var aerr *apierr.APIError if !errors.As(err, &aerr) { return err } // This API returns a 404 if the file doesn't exist. if aerr.StatusCode == http.StatusNotFound { if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { return NoSuchDirectoryError{path.Dir(absPath)} } } return err } } handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, fileMode) if err != nil { var aerr *apierr.APIError if !errors.As(err, &aerr) { return err } // This API returns a 400 if the file already exists. if aerr.StatusCode == http.StatusBadRequest { if aerr.ErrorCode == "RESOURCE_ALREADY_EXISTS" { return FileAlreadyExistsError{absPath} } } return err } _, err = io.Copy(handle, reader) cerr := handle.Close() if err == nil { err = cerr } return err } func (w *DbfsClient) Read(ctx context.Context, name string) (io.Reader, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err } handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead) if err != nil { var aerr *apierr.APIError if !errors.As(err, &aerr) { return nil, err } // This API returns a 404 if the file doesn't exist. if aerr.StatusCode == http.StatusNotFound { if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { return nil, FileDoesNotExistError{absPath} } } return nil, err } return handle, nil } func (w *DbfsClient) Delete(ctx context.Context, name string) error { absPath, err := w.root.Join(name) if err != nil { return err } // Issue info call before delete because delete succeeds if the specified path doesn't exist. // // For discussion: we could decide this is actually convenient, remove the call below, // and apply the same semantics for the WSFS filer. // _, err = w.workspaceClient.Dbfs.GetStatusByPath(ctx, absPath) if err != nil { var aerr *apierr.APIError if !errors.As(err, &aerr) { return err } // This API returns a 404 if the file doesn't exist. if aerr.StatusCode == http.StatusNotFound { if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { return FileDoesNotExistError{absPath} } } return err } return w.workspaceClient.Dbfs.Delete(ctx, files.Delete{ Path: absPath, Recursive: false, }) } func (w *DbfsClient) ReadDir(ctx context.Context, name string) ([]FileInfo, error) { absPath, err := w.root.Join(name) if err != nil { return nil, err } res, err := w.workspaceClient.Dbfs.ListByPath(ctx, absPath) if err != nil { var aerr *apierr.APIError if !errors.As(err, &aerr) { return nil, err } // This API returns a 404 if the file doesn't exist. if aerr.StatusCode == http.StatusNotFound { if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { return nil, NoSuchDirectoryError{absPath} } } return nil, err } info := make([]FileInfo, len(res.Files)) for i, v := range res.Files { info[i] = FileInfo{ Name: path.Base(v.Path), Size: v.FileSize, ModTime: time.UnixMilli(v.ModificationTime), } } // Sort by name for parity with os.ReadDir. sort.Slice(info, func(i, j int) bool { return info[i].Name < info[j].Name }) return info, nil } func (w *DbfsClient) Mkdir(ctx context.Context, name string) error { dirPath, err := w.root.Join(name) if err != nil { return err } return w.workspaceClient.Dbfs.MkdirsByPath(ctx, dirPath) }