mirror of https://github.com/databricks/cli.git
Abstract over file handling with WSFS or DBFS through filer interface (#135)
This commit is contained in:
parent
fa458406ea
commit
12aae35519
|
@ -0,0 +1,112 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/databricks/bricks/libs/filer"
|
||||
"github.com/databricks/databricks-sdk-go"
|
||||
"github.com/databricks/databricks-sdk-go/apierr"
|
||||
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type filerTest struct {
|
||||
*testing.T
|
||||
filer.Filer
|
||||
}
|
||||
|
||||
func (f filerTest) assertContents(ctx context.Context, name string, contents string) {
|
||||
reader, err := f.Read(ctx, name)
|
||||
if !assert.NoError(f, err) {
|
||||
return
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(reader)
|
||||
if !assert.NoError(f, err) {
|
||||
return
|
||||
}
|
||||
|
||||
assert.Equal(f, contents, string(body))
|
||||
}
|
||||
|
||||
func temporaryWorkspaceDir(t *testing.T, w *databricks.WorkspaceClient) string {
|
||||
ctx := context.Background()
|
||||
me, err := w.CurrentUser.Me(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
path := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("wsfs-files-"))
|
||||
|
||||
// Ensure directory exists, but doesn't exist YET!
|
||||
// Otherwise we could inadvertently remove a directory that already exists on cleanup.
|
||||
t.Logf("mkdir %s", path)
|
||||
err = w.Workspace.MkdirsByPath(ctx, path)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Remove test directory on test completion.
|
||||
t.Cleanup(func() {
|
||||
t.Logf("rm -rf %s", path)
|
||||
err := w.Workspace.Delete(ctx, workspace.Delete{
|
||||
Path: path,
|
||||
Recursive: true,
|
||||
})
|
||||
if err == nil || apierr.IsMissing(err) {
|
||||
return
|
||||
}
|
||||
t.Logf("unable to remove temporary workspace path %s: %#v", path, err)
|
||||
})
|
||||
|
||||
return path
|
||||
}
|
||||
|
||||
func TestAccFilerWorkspaceFiles(t *testing.T) {
|
||||
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
|
||||
|
||||
ctx := context.Background()
|
||||
w := databricks.Must(databricks.NewWorkspaceClient())
|
||||
tmpdir := temporaryWorkspaceDir(t, w)
|
||||
f, err := filer.NewWorkspaceFilesClient(w, tmpdir)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Check if we can use this API here, skip test if we cannot.
|
||||
_, err = f.Read(ctx, "we_use_this_call_to_test_if_this_api_is_enabled")
|
||||
if apierr, ok := err.(apierr.APIError); ok && apierr.StatusCode == http.StatusBadRequest {
|
||||
t.Skip(apierr.Message)
|
||||
}
|
||||
|
||||
// Write should fail because the root path doesn't yet exist.
|
||||
err = f.Write(ctx, "/foo/bar", strings.NewReader(`"hello world"`))
|
||||
assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{}))
|
||||
|
||||
// Read should fail because the root path doesn't yet exist.
|
||||
_, err = f.Read(ctx, "/foo/bar")
|
||||
assert.True(t, apierr.IsMissing(err))
|
||||
|
||||
// Write with CreateParentDirectories flag should succeed.
|
||||
err = f.Write(ctx, "/foo/bar", strings.NewReader(`"hello world"`), filer.CreateParentDirectories)
|
||||
assert.NoError(t, err)
|
||||
filerTest{t, f}.assertContents(ctx, "/foo/bar", `"hello world"`)
|
||||
|
||||
// Write should fail because there is an existing file at the specified path.
|
||||
err = f.Write(ctx, "/foo/bar", strings.NewReader(`"hello universe"`))
|
||||
assert.True(t, errors.As(err, &filer.FileAlreadyExistsError{}))
|
||||
|
||||
// Write with OverwriteIfExists should succeed.
|
||||
err = f.Write(ctx, "/foo/bar", strings.NewReader(`"hello universe"`), filer.OverwriteIfExists)
|
||||
assert.NoError(t, err)
|
||||
filerTest{t, f}.assertContents(ctx, "/foo/bar", `"hello universe"`)
|
||||
|
||||
// Delete should fail if the file doesn't exist.
|
||||
err = f.Delete(ctx, "/doesnt_exist")
|
||||
assert.True(t, apierr.IsMissing(err))
|
||||
|
||||
// Delete should succeed for file that does exist.
|
||||
err = f.Delete(ctx, "/foo/bar")
|
||||
assert.NoError(t, err)
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
package filer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
type WriteMode int
|
||||
|
||||
const (
|
||||
OverwriteIfExists WriteMode = iota
|
||||
CreateParentDirectories = iota << 1
|
||||
)
|
||||
|
||||
type FileAlreadyExistsError struct {
|
||||
path string
|
||||
}
|
||||
|
||||
func (err FileAlreadyExistsError) Error() string {
|
||||
return fmt.Sprintf("file already exists: %s", err.path)
|
||||
}
|
||||
|
||||
type NoSuchDirectoryError struct {
|
||||
path string
|
||||
}
|
||||
|
||||
func (err NoSuchDirectoryError) Error() string {
|
||||
return fmt.Sprintf("no such directory: %s", err.path)
|
||||
}
|
||||
|
||||
// Filer is used to access files in a workspace.
|
||||
// It has implementations for accessing files in WSFS and in DBFS.
|
||||
type Filer interface {
|
||||
// Write file at `path`.
|
||||
// Use the mode to further specify behavior.
|
||||
Write(ctx context.Context, path string, reader io.Reader, mode ...WriteMode) error
|
||||
|
||||
// Read file at `path`.
|
||||
Read(ctx context.Context, path string) (io.Reader, error)
|
||||
|
||||
// Delete file at `path`.
|
||||
Delete(ctx context.Context, path string) error
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
package filer
|
||||
|
||||
import "golang.org/x/exp/slices"
|
||||
|
||||
// sliceWithout returns a copy of the specified slice without element e, if it is present.
|
||||
func sliceWithout[S []E, E comparable](s S, e E) S {
|
||||
s_ := slices.Clone(s)
|
||||
i := slices.Index(s_, e)
|
||||
if i >= 0 {
|
||||
s_ = slices.Delete(s_, i, i+1)
|
||||
}
|
||||
return s_
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
package filer
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestSliceWithout(t *testing.T) {
|
||||
assert.Equal(t, []int{}, sliceWithout([]int{}, 0))
|
||||
assert.Equal(t, []int{1, 2, 3}, sliceWithout([]int{1, 2, 3}, 4))
|
||||
assert.Equal(t, []int{2, 3}, sliceWithout([]int{1, 2, 3}, 1))
|
||||
assert.Equal(t, []int{1, 3}, sliceWithout([]int{1, 2, 3}, 2))
|
||||
assert.Equal(t, []int{1, 2}, sliceWithout([]int{1, 2, 3}, 3))
|
||||
|
||||
}
|
||||
|
||||
func TestSliceWithoutReturnsClone(t *testing.T) {
|
||||
var ints = []int{1, 2, 3}
|
||||
assert.Equal(t, []int{2, 3}, sliceWithout(ints, 1))
|
||||
assert.Equal(t, []int{1, 2, 3}, ints)
|
||||
}
|
|
@ -0,0 +1,143 @@
|
|||
package filer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/databricks/databricks-sdk-go"
|
||||
"github.com/databricks/databricks-sdk-go/apierr"
|
||||
"github.com/databricks/databricks-sdk-go/client"
|
||||
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
// WorkspaceFilesClient implements the Files-in-Workspace API.
|
||||
type WorkspaceFilesClient struct {
|
||||
workspaceClient *databricks.WorkspaceClient
|
||||
apiClient *client.DatabricksClient
|
||||
|
||||
// File operations will be relative to this path.
|
||||
root string
|
||||
}
|
||||
|
||||
func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, error) {
|
||||
apiClient, err := client.New(w.Config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &WorkspaceFilesClient{
|
||||
workspaceClient: w,
|
||||
apiClient: apiClient,
|
||||
|
||||
root: path.Clean(root),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (w *WorkspaceFilesClient) absPath(name string) (string, error) {
|
||||
absPath := path.Join(w.root, name)
|
||||
|
||||
// Don't allow escaping the specified root using relative paths.
|
||||
if !strings.HasPrefix(absPath, w.root) {
|
||||
return "", fmt.Errorf("relative path escapes root: %s", name)
|
||||
}
|
||||
|
||||
// Don't allow name to resolve to the root path.
|
||||
if strings.TrimPrefix(absPath, w.root) == "" {
|
||||
return "", fmt.Errorf("relative path resolves to root: %s", name)
|
||||
}
|
||||
|
||||
return absPath, nil
|
||||
}
|
||||
|
||||
func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error {
|
||||
absPath, err := w.absPath(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Remove leading "/" so we can use it in the URL.
|
||||
overwrite := slices.Contains(mode, OverwriteIfExists)
|
||||
urlPath := fmt.Sprintf(
|
||||
"/api/2.0/workspace-files/import-file/%s?overwrite=%t",
|
||||
strings.TrimLeft(absPath, "/"),
|
||||
overwrite,
|
||||
)
|
||||
|
||||
// Buffer the file contents because we may need to retry below and we cannot read twice.
|
||||
body, err := io.ReadAll(reader)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = w.apiClient.Do(ctx, http.MethodPost, urlPath, body, nil)
|
||||
|
||||
// If we got an API error we deal with it below.
|
||||
aerr, ok := err.(apierr.APIError)
|
||||
if !ok {
|
||||
return err
|
||||
}
|
||||
|
||||
// This API returns a 404 if the parent directory does not exist.
|
||||
if aerr.StatusCode == http.StatusNotFound {
|
||||
if !slices.Contains(mode, CreateParentDirectories) {
|
||||
return NoSuchDirectoryError{path.Dir(absPath)}
|
||||
}
|
||||
|
||||
// Create parent directory.
|
||||
err = w.workspaceClient.Workspace.MkdirsByPath(ctx, path.Dir(absPath))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to mkdir to write file %s: %w", absPath, err)
|
||||
}
|
||||
|
||||
// Retry without CreateParentDirectories mode flag.
|
||||
return w.Write(ctx, name, bytes.NewReader(body), sliceWithout(mode, CreateParentDirectories)...)
|
||||
}
|
||||
|
||||
// This API returns 409 if the file already exists.
|
||||
if aerr.StatusCode == http.StatusConflict {
|
||||
return FileAlreadyExistsError{absPath}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.Reader, error) {
|
||||
absPath, err := w.absPath(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Remove leading "/" so we can use it in the URL.
|
||||
urlPath := fmt.Sprintf(
|
||||
"/api/2.0/workspace-files/%s",
|
||||
strings.TrimLeft(absPath, "/"),
|
||||
)
|
||||
|
||||
// Update to []byte after https://github.com/databricks/databricks-sdk-go/pull/247 is merged.
|
||||
var res json.RawMessage
|
||||
err = w.apiClient.Do(ctx, http.MethodGet, urlPath, nil, &res)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return bytes.NewReader(res), nil
|
||||
}
|
||||
|
||||
func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string) error {
|
||||
absPath, err := w.absPath(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return w.workspaceClient.Workspace.Delete(ctx, workspace.Delete{
|
||||
Path: absPath,
|
||||
Recursive: false,
|
||||
})
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
package filer
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestWorkspaceFilesClientPaths(t *testing.T) {
|
||||
root := "/some/root/path"
|
||||
f := WorkspaceFilesClient{root: root}
|
||||
|
||||
remotePath, err := f.absPath("a/b/c")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, root+"/a/b/c", remotePath)
|
||||
|
||||
remotePath, err = f.absPath("a/b/../d")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, root+"/a/d", remotePath)
|
||||
|
||||
remotePath, err = f.absPath("a/../c")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, root+"/c", remotePath)
|
||||
|
||||
remotePath, err = f.absPath("a/b/c/.")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, root+"/a/b/c", remotePath)
|
||||
|
||||
remotePath, err = f.absPath("a/b/c/d/./../../f/g")
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, root+"/a/b/f/g", remotePath)
|
||||
|
||||
_, err = f.absPath("..")
|
||||
assert.ErrorContains(t, err, `relative path escapes root: ..`)
|
||||
|
||||
_, err = f.absPath("a/../..")
|
||||
assert.ErrorContains(t, err, `relative path escapes root: a/../..`)
|
||||
|
||||
_, err = f.absPath("./../.")
|
||||
assert.ErrorContains(t, err, `relative path escapes root: ./../.`)
|
||||
|
||||
_, err = f.absPath("/./.././..")
|
||||
assert.ErrorContains(t, err, `relative path escapes root: /./.././..`)
|
||||
|
||||
_, err = f.absPath("./../.")
|
||||
assert.ErrorContains(t, err, `relative path escapes root: ./../.`)
|
||||
|
||||
_, err = f.absPath("./..")
|
||||
assert.ErrorContains(t, err, `relative path escapes root: ./..`)
|
||||
|
||||
_, err = f.absPath("./../../..")
|
||||
assert.ErrorContains(t, err, `relative path escapes root: ./../../..`)
|
||||
|
||||
_, err = f.absPath("./../a/./b../../..")
|
||||
assert.ErrorContains(t, err, `relative path escapes root: ./../a/./b../../..`)
|
||||
|
||||
_, err = f.absPath("../..")
|
||||
assert.ErrorContains(t, err, `relative path escapes root: ../..`)
|
||||
|
||||
_, err = f.absPath(".//a/..//./b/..")
|
||||
assert.ErrorContains(t, err, `relative path resolves to root: .//a/..//./b/..`)
|
||||
|
||||
_, err = f.absPath("a/b/../..")
|
||||
assert.ErrorContains(t, err, "relative path resolves to root: a/b/../..")
|
||||
|
||||
_, err = f.absPath("")
|
||||
assert.ErrorContains(t, err, "relative path resolves to root: ")
|
||||
|
||||
_, err = f.absPath(".")
|
||||
assert.ErrorContains(t, err, "relative path resolves to root: .")
|
||||
|
||||
_, err = f.absPath("/")
|
||||
assert.ErrorContains(t, err, "relative path resolves to root: /")
|
||||
}
|
Loading…
Reference in New Issue