databricks-cli/libs/filer/workspace_files_cache_test.go

320 lines
7.7 KiB
Go

package filer
import (
"context"
"fmt"
"io"
"io/fs"
"testing"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/stretchr/testify/assert"
)
var errNotImplemented = fmt.Errorf("not implemented")
type cacheTestFiler struct {
calls int
readDir map[string][]fs.DirEntry
stat map[string]fs.FileInfo
}
func (m *cacheTestFiler) Write(ctx context.Context, path string, reader io.Reader, mode ...WriteMode) error {
return errNotImplemented
}
func (m *cacheTestFiler) Read(ctx context.Context, path string) (io.ReadCloser, error) {
return nil, errNotImplemented
}
func (m *cacheTestFiler) Delete(ctx context.Context, path string, mode ...DeleteMode) error {
return errNotImplemented
}
func (m *cacheTestFiler) ReadDir(ctx context.Context, path string) ([]fs.DirEntry, error) {
m.calls++
if fi, ok := m.readDir[path]; ok {
delete(m.readDir, path)
return fi, nil
}
return nil, fs.ErrNotExist
}
func (m *cacheTestFiler) Mkdir(ctx context.Context, path string) error {
return errNotImplemented
}
func (m *cacheTestFiler) Stat(ctx context.Context, name string) (fs.FileInfo, error) {
m.calls++
if fi, ok := m.stat[name]; ok {
delete(m.stat, name)
return fi, nil
}
return nil, fs.ErrNotExist
}
func TestWorkspaceFilesCache_ReadDirCache(t *testing.T) {
f := &cacheTestFiler{
readDir: map[string][]fs.DirEntry{
"dir1": {
wsfsDirEntry{
wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "file1",
Size: 1,
ObjectType: workspace.ObjectTypeFile,
},
},
},
wsfsDirEntry{
wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "file2",
Size: 2,
ObjectType: workspace.ObjectTypeFile,
},
},
},
},
},
}
ctx := context.Background()
cache := newWorkspaceFilesReadaheadCache(f)
defer cache.Cleanup()
// First read dir should hit the filer, second should hit the cache.
for range 2 {
fi, err := cache.ReadDir(ctx, "dir1")
assert.NoError(t, err)
if assert.Len(t, fi, 2) {
assert.Equal(t, "file1", fi[0].Name())
assert.Equal(t, "file2", fi[1].Name())
}
}
// Third stat should hit the filer, fourth should hit the cache.
for range 2 {
_, err := cache.ReadDir(ctx, "dir2")
assert.ErrorIs(t, err, fs.ErrNotExist)
}
// Assert we only called the filer twice.
assert.Equal(t, 2, f.calls)
}
func TestWorkspaceFilesCache_ReadDirCacheIsolation(t *testing.T) {
f := &cacheTestFiler{
readDir: map[string][]fs.DirEntry{
"dir": {
wsfsDirEntry{
wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "file",
Size: 1,
ObjectType: workspace.ObjectTypeFile,
},
},
},
},
},
}
ctx := context.Background()
cache := newWorkspaceFilesReadaheadCache(f)
defer cache.Cleanup()
// First read dir should hit the filer, second should hit the cache.
entries, err := cache.ReadDir(ctx, "dir")
assert.NoError(t, err)
assert.Equal(t, "file", entries[0].Name())
// Modify the entry to check that mutations are not reflected in the cache.
entries[0] = wsfsDirEntry{
wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "tainted",
},
},
}
// Read the directory again to check that the cache is isolated.
entries, err = cache.ReadDir(ctx, "dir")
assert.NoError(t, err)
assert.Equal(t, "file", entries[0].Name())
}
func TestWorkspaceFilesCache_StatCache(t *testing.T) {
f := &cacheTestFiler{
stat: map[string]fs.FileInfo{
"file1": &wsfsFileInfo{ObjectInfo: workspace.ObjectInfo{Path: "file1", Size: 1}},
},
}
ctx := context.Background()
cache := newWorkspaceFilesReadaheadCache(f)
defer cache.Cleanup()
// First stat should hit the filer, second should hit the cache.
for range 2 {
fi, err := cache.Stat(ctx, "file1")
if assert.NoError(t, err) {
assert.Equal(t, "file1", fi.Name())
assert.Equal(t, int64(1), fi.Size())
}
}
// Third stat should hit the filer, fourth should hit the cache.
for range 2 {
_, err := cache.Stat(ctx, "file2")
assert.ErrorIs(t, err, fs.ErrNotExist)
}
// Assert we only called the filer twice.
assert.Equal(t, 2, f.calls)
}
func TestWorkspaceFilesCache_ReadDirPopulatesStatCache(t *testing.T) {
f := &cacheTestFiler{
readDir: map[string][]fs.DirEntry{
"dir1": {
wsfsDirEntry{
wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "file1",
Size: 1,
ObjectType: workspace.ObjectTypeFile,
},
},
},
wsfsDirEntry{
wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "file2",
Size: 2,
ObjectType: workspace.ObjectTypeFile,
},
},
},
wsfsDirEntry{
wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "notebook1",
Size: 1,
ObjectType: workspace.ObjectTypeNotebook,
},
ReposExportFormat: "this should not end up in the stat cache",
},
},
},
},
stat: map[string]fs.FileInfo{
"dir1/notebook1": wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "notebook1",
Size: 1,
ObjectType: workspace.ObjectTypeNotebook,
},
ReposExportFormat: workspace.ExportFormatJupyter,
},
},
}
ctx := context.Background()
cache := newWorkspaceFilesReadaheadCache(f)
defer cache.Cleanup()
// Issue read dir to populate the stat cache.
_, err := cache.ReadDir(ctx, "dir1")
assert.NoError(t, err)
// Stat on a file in the directory should hit the cache.
fi, err := cache.Stat(ctx, "dir1/file1")
if assert.NoError(t, err) {
assert.Equal(t, "file1", fi.Name())
assert.Equal(t, int64(1), fi.Size())
}
// If the containing directory has been read, absence is also inferred from the cache.
_, err = cache.Stat(ctx, "dir1/file3")
assert.ErrorIs(t, err, fs.ErrNotExist)
// Stat on a notebook in the directory should have been performed in the background.
fi, err = cache.Stat(ctx, "dir1/notebook1")
if assert.NoError(t, err) {
assert.Equal(t, "notebook1", fi.Name())
assert.Equal(t, int64(1), fi.Size())
assert.Equal(t, workspace.ExportFormatJupyter, fi.(wsfsFileInfo).ReposExportFormat)
}
// Assert we called the filer twice (once for read dir, once for stat on the notebook).
assert.Equal(t, 2, f.calls)
}
func TestWorkspaceFilesCache_ReadDirTriggersReadahead(t *testing.T) {
f := &cacheTestFiler{
readDir: map[string][]fs.DirEntry{
"a": {
wsfsDirEntry{
wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "b1",
ObjectType: workspace.ObjectTypeDirectory,
},
},
},
wsfsDirEntry{
wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "b2",
ObjectType: workspace.ObjectTypeDirectory,
},
},
},
},
"a/b1": {
wsfsDirEntry{
wsfsFileInfo{
ObjectInfo: workspace.ObjectInfo{
Path: "file1",
Size: 1,
ObjectType: workspace.ObjectTypeFile,
},
},
},
},
"a/b2": {},
},
}
ctx := context.Background()
cache := newWorkspaceFilesReadaheadCache(f)
defer cache.Cleanup()
// Issue read dir to populate the stat cache.
_, err := cache.ReadDir(ctx, "a")
assert.NoError(t, err)
// Stat on a directory in the directory should hit the cache.
fi, err := cache.Stat(ctx, "a/b1")
if assert.NoError(t, err) {
assert.Equal(t, "b1", fi.Name())
assert.True(t, fi.IsDir())
}
// Stat on a file in a nested directory should hit the cache.
fi, err = cache.Stat(ctx, "a/b1/file1")
if assert.NoError(t, err) {
assert.Equal(t, "file1", fi.Name())
assert.Equal(t, int64(1), fi.Size())
}
// Stat on a non-existing file in an empty nested directory should hit the cache.
_, err = cache.Stat(ctx, "a/b2/file2")
assert.ErrorIs(t, err, fs.ErrNotExist)
// Assert we called the filer 3 times; once for each directory.
assert.Equal(t, 3, f.calls)
}