From 0448307b141eaf4488786ad2311b7bf76ac74ee1 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 19 Jul 2024 09:03:25 +0200 Subject: [PATCH] Add tests for the Workspace API readahead cache (#1605) ## Changes Backfill unit tests for #1582. ## Tests New tests pass. --- libs/filer/workspace_files_cache_test.go | 319 +++++++++++++++++++++++ 1 file changed, 319 insertions(+) create mode 100644 libs/filer/workspace_files_cache_test.go diff --git a/libs/filer/workspace_files_cache_test.go b/libs/filer/workspace_files_cache_test.go new file mode 100644 index 00000000..8983c598 --- /dev/null +++ b/libs/filer/workspace_files_cache_test.go @@ -0,0 +1,319 @@ +package filer + +import ( + "context" + "fmt" + "io" + "io/fs" + "testing" + + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/assert" +) + +var errNotImplemented = fmt.Errorf("not implemented") + +type cacheTestFiler struct { + calls int + + readDir map[string][]fs.DirEntry + stat map[string]fs.FileInfo +} + +func (m *cacheTestFiler) Write(ctx context.Context, path string, reader io.Reader, mode ...WriteMode) error { + return errNotImplemented +} + +func (m *cacheTestFiler) Read(ctx context.Context, path string) (io.ReadCloser, error) { + return nil, errNotImplemented +} + +func (m *cacheTestFiler) Delete(ctx context.Context, path string, mode ...DeleteMode) error { + return errNotImplemented +} + +func (m *cacheTestFiler) ReadDir(ctx context.Context, path string) ([]fs.DirEntry, error) { + m.calls++ + if fi, ok := m.readDir[path]; ok { + delete(m.readDir, path) + return fi, nil + } + return nil, fs.ErrNotExist +} + +func (m *cacheTestFiler) Mkdir(ctx context.Context, path string) error { + return errNotImplemented +} + +func (m *cacheTestFiler) Stat(ctx context.Context, name string) (fs.FileInfo, error) { + m.calls++ + if fi, ok := m.stat[name]; ok { + delete(m.stat, name) + return fi, nil + } + return nil, fs.ErrNotExist +} + +func TestWorkspaceFilesCache_ReadDirCache(t *testing.T) { + f := &cacheTestFiler{ + readDir: map[string][]fs.DirEntry{ + "dir1": { + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file1", + Size: 1, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file2", + Size: 2, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + }, + }, + } + + ctx := context.Background() + cache := newWorkspaceFilesReadaheadCache(f) + defer cache.Cleanup() + + // First read dir should hit the filer, second should hit the cache. + for range 2 { + fi, err := cache.ReadDir(ctx, "dir1") + assert.NoError(t, err) + if assert.Len(t, fi, 2) { + assert.Equal(t, "file1", fi[0].Name()) + assert.Equal(t, "file2", fi[1].Name()) + } + } + + // Third stat should hit the filer, fourth should hit the cache. + for range 2 { + _, err := cache.ReadDir(ctx, "dir2") + assert.ErrorIs(t, err, fs.ErrNotExist) + } + + // Assert we only called the filer twice. + assert.Equal(t, 2, f.calls) +} + +func TestWorkspaceFilesCache_ReadDirCacheIsolation(t *testing.T) { + f := &cacheTestFiler{ + readDir: map[string][]fs.DirEntry{ + "dir": { + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file", + Size: 1, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + }, + }, + } + + ctx := context.Background() + cache := newWorkspaceFilesReadaheadCache(f) + defer cache.Cleanup() + + // First read dir should hit the filer, second should hit the cache. + entries, err := cache.ReadDir(ctx, "dir") + assert.NoError(t, err) + assert.Equal(t, "file", entries[0].Name()) + + // Modify the entry to check that mutations are not reflected in the cache. + entries[0] = wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "tainted", + }, + }, + } + + // Read the directory again to check that the cache is isolated. + entries, err = cache.ReadDir(ctx, "dir") + assert.NoError(t, err) + assert.Equal(t, "file", entries[0].Name()) +} + +func TestWorkspaceFilesCache_StatCache(t *testing.T) { + f := &cacheTestFiler{ + stat: map[string]fs.FileInfo{ + "file1": &wsfsFileInfo{ObjectInfo: workspace.ObjectInfo{Path: "file1", Size: 1}}, + }, + } + + ctx := context.Background() + cache := newWorkspaceFilesReadaheadCache(f) + defer cache.Cleanup() + + // First stat should hit the filer, second should hit the cache. + for range 2 { + fi, err := cache.Stat(ctx, "file1") + if assert.NoError(t, err) { + assert.Equal(t, "file1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + } + } + + // Third stat should hit the filer, fourth should hit the cache. + for range 2 { + _, err := cache.Stat(ctx, "file2") + assert.ErrorIs(t, err, fs.ErrNotExist) + } + + // Assert we only called the filer twice. + assert.Equal(t, 2, f.calls) +} + +func TestWorkspaceFilesCache_ReadDirPopulatesStatCache(t *testing.T) { + f := &cacheTestFiler{ + readDir: map[string][]fs.DirEntry{ + "dir1": { + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file1", + Size: 1, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file2", + Size: 2, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "notebook1", + Size: 1, + ObjectType: workspace.ObjectTypeNotebook, + }, + ReposExportFormat: "this should not end up in the stat cache", + }, + }, + }, + }, + stat: map[string]fs.FileInfo{ + "dir1/notebook1": wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "notebook1", + Size: 1, + ObjectType: workspace.ObjectTypeNotebook, + }, + ReposExportFormat: workspace.ExportFormatJupyter, + }, + }, + } + + ctx := context.Background() + cache := newWorkspaceFilesReadaheadCache(f) + defer cache.Cleanup() + + // Issue read dir to populate the stat cache. + _, err := cache.ReadDir(ctx, "dir1") + assert.NoError(t, err) + + // Stat on a file in the directory should hit the cache. + fi, err := cache.Stat(ctx, "dir1/file1") + if assert.NoError(t, err) { + assert.Equal(t, "file1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + } + + // If the containing directory has been read, absence is also inferred from the cache. + _, err = cache.Stat(ctx, "dir1/file3") + assert.ErrorIs(t, err, fs.ErrNotExist) + + // Stat on a notebook in the directory should have been performed in the background. + fi, err = cache.Stat(ctx, "dir1/notebook1") + if assert.NoError(t, err) { + assert.Equal(t, "notebook1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + assert.Equal(t, workspace.ExportFormatJupyter, fi.(wsfsFileInfo).ReposExportFormat) + } + + // Assert we called the filer twice (once for read dir, once for stat on the notebook). + assert.Equal(t, 2, f.calls) +} + +func TestWorkspaceFilesCache_ReadDirTriggersReadahead(t *testing.T) { + f := &cacheTestFiler{ + readDir: map[string][]fs.DirEntry{ + "a": { + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "b1", + ObjectType: workspace.ObjectTypeDirectory, + }, + }, + }, + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "b2", + ObjectType: workspace.ObjectTypeDirectory, + }, + }, + }, + }, + "a/b1": { + wsfsDirEntry{ + wsfsFileInfo{ + ObjectInfo: workspace.ObjectInfo{ + Path: "file1", + Size: 1, + ObjectType: workspace.ObjectTypeFile, + }, + }, + }, + }, + "a/b2": {}, + }, + } + + ctx := context.Background() + cache := newWorkspaceFilesReadaheadCache(f) + defer cache.Cleanup() + + // Issue read dir to populate the stat cache. + _, err := cache.ReadDir(ctx, "a") + assert.NoError(t, err) + + // Stat on a directory in the directory should hit the cache. + fi, err := cache.Stat(ctx, "a/b1") + if assert.NoError(t, err) { + assert.Equal(t, "b1", fi.Name()) + assert.True(t, fi.IsDir()) + } + + // Stat on a file in a nested directory should hit the cache. + fi, err = cache.Stat(ctx, "a/b1/file1") + if assert.NoError(t, err) { + assert.Equal(t, "file1", fi.Name()) + assert.Equal(t, int64(1), fi.Size()) + } + + // Stat on a non-existing file in an empty nested directory should hit the cache. + _, err = cache.Stat(ctx, "a/b2/file2") + assert.ErrorIs(t, err, fs.ErrNotExist) + + // Assert we called the filer 3 times; once for each directory. + assert.Equal(t, 3, f.calls) +}