2022-12-14 14:37:14 +00:00
|
|
|
package filer
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"context"
|
2023-02-20 15:00:20 +00:00
|
|
|
"errors"
|
2022-12-14 14:37:14 +00:00
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"net/http"
|
2023-03-08 13:27:05 +00:00
|
|
|
"net/url"
|
2022-12-14 14:37:14 +00:00
|
|
|
"path"
|
|
|
|
"strings"
|
2023-05-26 09:35:13 +00:00
|
|
|
"time"
|
2022-12-14 14:37:14 +00:00
|
|
|
|
|
|
|
"github.com/databricks/databricks-sdk-go"
|
|
|
|
"github.com/databricks/databricks-sdk-go/apierr"
|
|
|
|
"github.com/databricks/databricks-sdk-go/client"
|
|
|
|
"github.com/databricks/databricks-sdk-go/service/workspace"
|
|
|
|
"golang.org/x/exp/slices"
|
|
|
|
)
|
|
|
|
|
2022-12-15 16:16:07 +00:00
|
|
|
// WorkspaceFilesClient implements the files-in-workspace API.
|
|
|
|
|
|
|
|
// NOTE: This API is available for files under /Repos if a workspace has files-in-repos enabled.
|
|
|
|
// It can access any workspace path if files-in-workspace is enabled.
|
2022-12-14 14:37:14 +00:00
|
|
|
type WorkspaceFilesClient struct {
|
|
|
|
workspaceClient *databricks.WorkspaceClient
|
|
|
|
apiClient *client.DatabricksClient
|
|
|
|
|
|
|
|
// File operations will be relative to this path.
|
2022-12-14 22:41:37 +00:00
|
|
|
root RootPath
|
2022-12-14 14:37:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, error) {
|
|
|
|
apiClient, err := client.New(w.Config)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return &WorkspaceFilesClient{
|
|
|
|
workspaceClient: w,
|
|
|
|
apiClient: apiClient,
|
|
|
|
|
2022-12-14 22:41:37 +00:00
|
|
|
root: NewRootPath(root),
|
2022-12-14 14:37:14 +00:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error {
|
2022-12-14 22:41:37 +00:00
|
|
|
absPath, err := w.root.Join(name)
|
2022-12-14 14:37:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove leading "/" so we can use it in the URL.
|
|
|
|
overwrite := slices.Contains(mode, OverwriteIfExists)
|
|
|
|
urlPath := fmt.Sprintf(
|
|
|
|
"/api/2.0/workspace-files/import-file/%s?overwrite=%t",
|
2023-03-17 16:42:35 +00:00
|
|
|
url.PathEscape(strings.TrimLeft(absPath, "/")),
|
2022-12-14 14:37:14 +00:00
|
|
|
overwrite,
|
|
|
|
)
|
|
|
|
|
|
|
|
// Buffer the file contents because we may need to retry below and we cannot read twice.
|
|
|
|
body, err := io.ReadAll(reader)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
err = w.apiClient.Do(ctx, http.MethodPost, urlPath, body, nil)
|
|
|
|
|
|
|
|
// If we got an API error we deal with it below.
|
2023-02-20 15:00:20 +00:00
|
|
|
var aerr *apierr.APIError
|
|
|
|
if !errors.As(err, &aerr) {
|
2022-12-14 14:37:14 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// This API returns a 404 if the parent directory does not exist.
|
|
|
|
if aerr.StatusCode == http.StatusNotFound {
|
|
|
|
if !slices.Contains(mode, CreateParentDirectories) {
|
|
|
|
return NoSuchDirectoryError{path.Dir(absPath)}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create parent directory.
|
|
|
|
err = w.workspaceClient.Workspace.MkdirsByPath(ctx, path.Dir(absPath))
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("unable to mkdir to write file %s: %w", absPath, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Retry without CreateParentDirectories mode flag.
|
|
|
|
return w.Write(ctx, name, bytes.NewReader(body), sliceWithout(mode, CreateParentDirectories)...)
|
|
|
|
}
|
|
|
|
|
|
|
|
// This API returns 409 if the file already exists.
|
|
|
|
if aerr.StatusCode == http.StatusConflict {
|
|
|
|
return FileAlreadyExistsError{absPath}
|
|
|
|
}
|
|
|
|
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.Reader, error) {
|
2022-12-14 22:41:37 +00:00
|
|
|
absPath, err := w.root.Join(name)
|
2022-12-14 14:37:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove leading "/" so we can use it in the URL.
|
|
|
|
urlPath := fmt.Sprintf(
|
|
|
|
"/api/2.0/workspace-files/%s",
|
|
|
|
strings.TrimLeft(absPath, "/"),
|
|
|
|
)
|
|
|
|
|
2023-01-05 11:03:31 +00:00
|
|
|
var res []byte
|
2022-12-14 14:37:14 +00:00
|
|
|
err = w.apiClient.Do(ctx, http.MethodGet, urlPath, nil, &res)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return bytes.NewReader(res), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string) error {
|
2022-12-14 22:41:37 +00:00
|
|
|
absPath, err := w.root.Join(name)
|
2022-12-14 14:37:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return w.workspaceClient.Workspace.Delete(ctx, workspace.Delete{
|
|
|
|
Path: absPath,
|
|
|
|
Recursive: false,
|
|
|
|
})
|
|
|
|
}
|
2023-05-26 09:35:13 +00:00
|
|
|
|
|
|
|
func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]FileInfo, error) {
|
|
|
|
absPath, err := w.root.Join(name)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
objects, err := w.workspaceClient.Workspace.ListAll(ctx, workspace.ListWorkspaceRequest{
|
|
|
|
Path: absPath,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-05-26 11:02:50 +00:00
|
|
|
info := make([]FileInfo, len(objects))
|
|
|
|
for i, v := range objects {
|
|
|
|
info[i] = FileInfo{
|
|
|
|
Type: string(v.ObjectType),
|
|
|
|
Name: path.Base(v.Path),
|
|
|
|
Size: v.Size,
|
|
|
|
ModTime: time.UnixMilli(v.ModifiedAt),
|
|
|
|
}
|
2023-05-26 09:35:13 +00:00
|
|
|
}
|
|
|
|
return info, nil
|
|
|
|
}
|
2023-05-26 15:13:45 +00:00
|
|
|
|
|
|
|
func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error {
|
|
|
|
dirPath, err := w.root.Join(name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return w.workspaceClient.Workspace.Mkdirs(ctx, workspace.Mkdirs{
|
|
|
|
Path: dirPath,
|
|
|
|
})
|
|
|
|
}
|