diff --git a/cmd/sync/snapshot_test.go b/cmd/sync/snapshot_test.go index 1fedb9fe..933a1b2b 100644 --- a/cmd/sync/snapshot_test.go +++ b/cmd/sync/snapshot_test.go @@ -9,64 +9,11 @@ import ( "time" "github.com/databricks/bricks/git" + "github.com/databricks/bricks/libs/testfile" "github.com/databricks/bricks/project" "github.com/stretchr/testify/assert" ) -type testFile struct { - mtime time.Time - fd *os.File - path string - // to make close idempotent - isOpen bool -} - -func createFile(t *testing.T, path string) *testFile { - f, err := os.Create(path) - assert.NoError(t, err) - - fileInfo, err := os.Stat(path) - assert.NoError(t, err) - - return &testFile{ - path: path, - fd: f, - mtime: fileInfo.ModTime(), - isOpen: true, - } -} - -func (f *testFile) close(t *testing.T) { - if f.isOpen { - err := f.fd.Close() - assert.NoError(t, err) - f.isOpen = false - } -} - -func (f *testFile) overwrite(t *testing.T, s string) { - err := os.Truncate(f.path, 0) - assert.NoError(t, err) - - _, err = f.fd.Seek(0, 0) - assert.NoError(t, err) - - _, err = f.fd.WriteString(s) - assert.NoError(t, err) - - // We manually update mtime after write because github actions file - // system does not :') - err = os.Chtimes(f.path, f.mtime.Add(time.Minute), f.mtime.Add(time.Minute)) - assert.NoError(t, err) - f.mtime = f.mtime.Add(time.Minute) -} - -func (f *testFile) remove(t *testing.T) { - f.close(t) - err := os.Remove(f.path) - assert.NoError(t, err) -} - func assertKeysOfMap(t *testing.T, m map[string]time.Time, expectedKeys []string) { keys := make([]string, len(m)) i := 0 @@ -87,11 +34,11 @@ func TestDiff(t *testing.T) { RemoteToLocalNames: make(map[string]string), } - f1 := createFile(t, filepath.Join(projectDir, "hello.txt")) - defer f1.close(t) + f1 := testfile.CreateFile(t, filepath.Join(projectDir, "hello.txt")) + defer f1.Close(t) worldFilePath := filepath.Join(projectDir, "world.txt") - f2 := createFile(t, worldFilePath) - defer f2.close(t) + f2 := testfile.CreateFile(t, worldFilePath) + defer f2.Close(t) // New files are put files, err := fileSet.All() @@ -107,7 +54,7 @@ func TestDiff(t *testing.T) { assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.RemoteToLocalNames) // world.txt is editted - f2.overwrite(t, "bunnies are cute.") + f2.Overwrite(t, "bunnies are cute.") assert.NoError(t, err) files, err = fileSet.All() assert.NoError(t, err) @@ -122,7 +69,7 @@ func TestDiff(t *testing.T) { assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.RemoteToLocalNames) // hello.txt is deleted - f1.remove(t) + f1.Remove(t) assert.NoError(t, err) files, err = fileSet.All() assert.NoError(t, err) @@ -148,9 +95,9 @@ func TestFolderDiff(t *testing.T) { err := os.Mkdir(filepath.Join(projectDir, "foo"), os.ModePerm) assert.NoError(t, err) - f1 := createFile(t, filepath.Join(projectDir, "foo", "bar.py")) - defer f1.close(t) - f1.overwrite(t, "# Databricks notebook source\nprint(\"abc\")") + f1 := testfile.CreateFile(t, filepath.Join(projectDir, "foo", "bar.py")) + defer f1.Close(t) + f1.Overwrite(t, "# Databricks notebook source\nprint(\"abc\")") files, err := fileSet.All() assert.NoError(t, err) @@ -160,7 +107,7 @@ func TestFolderDiff(t *testing.T) { assert.Len(t, change.put, 1) assert.Contains(t, change.put, "foo/bar.py") - f1.remove(t) + f1.Remove(t) files, err = fileSet.All() assert.NoError(t, err) change, err = state.diff(files) @@ -180,13 +127,13 @@ func TestPythonNotebookDiff(t *testing.T) { RemoteToLocalNames: make(map[string]string), } - foo := createFile(t, filepath.Join(projectDir, "foo.py")) - defer foo.close(t) + foo := testfile.CreateFile(t, filepath.Join(projectDir, "foo.py")) + defer foo.Close(t) // Case 1: notebook foo.py is uploaded files, err := fileSet.All() assert.NoError(t, err) - foo.overwrite(t, "# Databricks notebook source\nprint(\"abc\")") + foo.Overwrite(t, "# Databricks notebook source\nprint(\"abc\")") change, err := state.diff(files) assert.NoError(t, err) assert.Len(t, change.delete, 0) @@ -198,7 +145,7 @@ func TestPythonNotebookDiff(t *testing.T) { // Case 2: notebook foo.py is converted to python script by removing // magic keyword - foo.overwrite(t, "print(\"abc\")") + foo.Overwrite(t, "print(\"abc\")") files, err = fileSet.All() assert.NoError(t, err) change, err = state.diff(files) @@ -212,7 +159,7 @@ func TestPythonNotebookDiff(t *testing.T) { assert.Equal(t, map[string]string{"foo.py": "foo.py"}, state.RemoteToLocalNames) // Case 3: Python script foo.py is converted to a databricks notebook - foo.overwrite(t, "# Databricks notebook source\nprint(\"def\")") + foo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")") files, err = fileSet.All() assert.NoError(t, err) change, err = state.diff(files) @@ -226,7 +173,7 @@ func TestPythonNotebookDiff(t *testing.T) { assert.Equal(t, map[string]string{"foo": "foo.py"}, state.RemoteToLocalNames) // Case 4: Python notebook foo.py is deleted, and its remote name is used in change.delete - foo.remove(t) + foo.Remove(t) assert.NoError(t, err) files, err = fileSet.All() assert.NoError(t, err) @@ -251,10 +198,10 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { } // upload should work since they point to different destinations - pythonFoo := createFile(t, filepath.Join(projectDir, "foo.py")) - defer pythonFoo.close(t) - vanillaFoo := createFile(t, filepath.Join(projectDir, "foo")) - defer vanillaFoo.close(t) + pythonFoo := testfile.CreateFile(t, filepath.Join(projectDir, "foo.py")) + defer pythonFoo.Close(t) + vanillaFoo := testfile.CreateFile(t, filepath.Join(projectDir, "foo")) + defer vanillaFoo.Close(t) files, err := fileSet.All() assert.NoError(t, err) change, err := state.diff(files) @@ -265,7 +212,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { assert.Contains(t, change.put, "foo") // errors out because they point to the same destination - pythonFoo.overwrite(t, "# Databricks notebook source\nprint(\"def\")") + pythonFoo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")") files, err = fileSet.All() assert.NoError(t, err) change, err = state.diff(files) @@ -317,9 +264,9 @@ func TestOldSnapshotInvalidation(t *testing.T) { snapshotPath, err := emptySnapshot.getPath(ctx) assert.NoError(t, err) - snapshotFile := createFile(t, snapshotPath) - snapshotFile.overwrite(t, oldVersionSnapshot) - snapshotFile.close(t) + snapshotFile := testfile.CreateFile(t, snapshotPath) + snapshotFile.Overwrite(t, oldVersionSnapshot) + snapshotFile.Close(t) assert.FileExists(t, snapshotPath) snapshot := emptySnapshot @@ -342,9 +289,9 @@ func TestNoVersionSnapshotInvalidation(t *testing.T) { snapshotPath, err := emptySnapshot.getPath(ctx) assert.NoError(t, err) - snapshotFile := createFile(t, snapshotPath) - snapshotFile.overwrite(t, noVersionSnapshot) - snapshotFile.close(t) + snapshotFile := testfile.CreateFile(t, snapshotPath) + snapshotFile.Overwrite(t, noVersionSnapshot) + snapshotFile.Close(t) assert.FileExists(t, snapshotPath) snapshot := emptySnapshot @@ -369,9 +316,9 @@ func TestLatestVersionSnapshotGetsLoaded(t *testing.T) { snapshotPath, err := emptySnapshot.getPath(ctx) assert.NoError(t, err) - snapshotFile := createFile(t, snapshotPath) - snapshotFile.overwrite(t, latestVersionSnapshot) - snapshotFile.close(t) + snapshotFile := testfile.CreateFile(t, snapshotPath) + snapshotFile.Overwrite(t, latestVersionSnapshot) + snapshotFile.Close(t) assert.FileExists(t, snapshotPath) snapshot := emptySnapshot diff --git a/internal/sync_test.go b/internal/sync_test.go index 6f0048f6..baa4c1a8 100644 --- a/internal/sync_test.go +++ b/internal/sync_test.go @@ -5,31 +5,28 @@ import ( "encoding/json" "fmt" "io" + "net/http" "os" "os/exec" + "path" "path/filepath" "strings" "testing" "time" "github.com/databricks/bricks/cmd/sync" + "github.com/databricks/bricks/libs/testfile" "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/client" "github.com/databricks/databricks-sdk-go/service/repos" "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -// TODO: these tests are bloated, refactor these, and make write down tests for -// all edge cases with interop between files, directory and notebooks during syncing -// https://databricks.atlassian.net/browse/DECO-416 - // This test needs auth env vars to run. // Please run using the deco env test or deco env shell -func TestAccFullSync(t *testing.T) { - t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) - - wsc := databricks.Must(databricks.NewWorkspaceClient()) - ctx := context.Background() +func setupRepo(t *testing.T, wsc *databricks.WorkspaceClient, ctx context.Context) (localRoot, remoteRoot string) { me, err := wsc.CurrentUser.Me(ctx) assert.NoError(t, err) repoUrl := "https://github.com/shreyas-goenka/empty-repo.git" @@ -54,305 +51,416 @@ func TestAccFullSync(t *testing.T) { err = cmd.Run() assert.NoError(t, err) - // Create amsterdam.txt file - projectDir := filepath.Join(tempDir, "empty-repo") - f, err := os.Create(filepath.Join(projectDir, "amsterdam.txt")) - assert.NoError(t, err) - defer f.Close() - - // Run `bricks sync` in the background. - t.Setenv("BRICKS_ROOT", projectDir) - c := NewCobraTestRunner(t, "sync", "--remote-path", repoPath, "--persist-snapshot=false") - c.RunBackground() - - // First upload assertion - c.Eventually(func() bool { - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - return len(objects) == 3 - }, 30*time.Second, 5*time.Second) - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - var files1 []string - for _, v := range objects { - files1 = append(files1, filepath.Base(v.Path)) - } - assert.Len(t, files1, 3) - assert.Contains(t, files1, "amsterdam.txt") - assert.Contains(t, files1, ".gitkeep") - assert.Contains(t, files1, ".gitignore") - - // Create new files and assert - os.Create(filepath.Join(projectDir, "hello.txt")) - os.Create(filepath.Join(projectDir, "world.txt")) - c.Eventually(func() bool { - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - return len(objects) == 5 - }, 30*time.Second, 5*time.Second) - objects, err = wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - var files2 []string - for _, v := range objects { - files2 = append(files2, filepath.Base(v.Path)) - } - assert.Len(t, files2, 5) - assert.Contains(t, files2, "amsterdam.txt") - assert.Contains(t, files2, ".gitkeep") - assert.Contains(t, files2, "hello.txt") - assert.Contains(t, files2, "world.txt") - assert.Contains(t, files2, ".gitignore") - - // delete a file and assert - os.Remove(filepath.Join(projectDir, "hello.txt")) - c.Eventually(func() bool { - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - return len(objects) == 4 - }, 30*time.Second, 5*time.Second) - objects, err = wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - var files3 []string - for _, v := range objects { - files3 = append(files3, filepath.Base(v.Path)) - } - assert.Len(t, files3, 4) - assert.Contains(t, files3, "amsterdam.txt") - assert.Contains(t, files3, ".gitkeep") - assert.Contains(t, files3, "world.txt") - assert.Contains(t, files3, ".gitignore") + localRoot = filepath.Join(tempDir, "empty-repo") + remoteRoot = repoPath + return localRoot, remoteRoot } -func assertSnapshotContents(t *testing.T, host, repoPath, projectDir string, listOfSyncedFiles []string) { - snapshotPath := filepath.Join(projectDir, ".databricks/sync-snapshots", sync.GetFileName(host, repoPath)) - assert.FileExists(t, snapshotPath) +type assertSync struct { + t *testing.T + c *cobraTestRunner + w *databricks.WorkspaceClient + localRoot string + remoteRoot string +} + +func (a *assertSync) remoteDirContent(ctx context.Context, relativeDir string, expectedFiles []string) { + remoteDir := path.Join(a.remoteRoot, relativeDir) + a.c.Eventually(func() bool { + objects, err := a.w.Workspace.ListAll(ctx, workspace.List{ + Path: remoteDir, + }) + require.NoError(a.t, err) + return len(objects) == len(expectedFiles) + }, 30*time.Second, 5*time.Second) + objects, err := a.w.Workspace.ListAll(ctx, workspace.List{ + Path: remoteDir, + }) + require.NoError(a.t, err) + + var actualFiles []string + for _, v := range objects { + actualFiles = append(actualFiles, v.Path) + } + + assert.Len(a.t, actualFiles, len(expectedFiles)) + for _, v := range expectedFiles { + assert.Contains(a.t, actualFiles, path.Join(a.remoteRoot, relativeDir, v)) + } +} + +func (a *assertSync) remoteFileContent(ctx context.Context, relativePath string, expectedContent string) { + filePath := path.Join(a.remoteRoot, relativePath) + + // Remove leading "/" so we can use it in the URL. + urlPath := fmt.Sprintf( + "/api/2.0/workspace-files/%s", + strings.TrimLeft(filePath, "/"), + ) + + apiClient, err := client.New(a.w.Config) + require.NoError(a.t, err) + + var res []byte + a.c.Eventually(func() bool { + err = apiClient.Do(ctx, http.MethodGet, urlPath, nil, &res) + require.NoError(a.t, err) + actualContent := string(res) + return actualContent == expectedContent + }, 30*time.Second, 5*time.Second) +} + +func (a *assertSync) objectType(ctx context.Context, relativePath string, expected string) { + path := path.Join(a.remoteRoot, relativePath) + + a.c.Eventually(func() bool { + metadata, err := a.w.Workspace.GetStatusByPath(ctx, path) + if err != nil { + return false + } + return metadata.ObjectType.String() == expected + }, 30*time.Second, 5*time.Second) +} + +func (a *assertSync) language(ctx context.Context, relativePath string, expected string) { + path := path.Join(a.remoteRoot, relativePath) + + a.c.Eventually(func() bool { + metadata, err := a.w.Workspace.GetStatusByPath(ctx, path) + if err != nil { + return false + } + return metadata.Language.String() == expected + }, 30*time.Second, 5*time.Second) +} + +func (a *assertSync) snapshotContains(files []string) { + snapshotPath := filepath.Join(a.localRoot, ".databricks/sync-snapshots", sync.GetFileName(a.w.Config.Host, a.remoteRoot)) + assert.FileExists(a.t, snapshotPath) var s *sync.Snapshot f, err := os.Open(snapshotPath) - assert.NoError(t, err) + assert.NoError(a.t, err) defer f.Close() bytes, err := io.ReadAll(f) - assert.NoError(t, err) + assert.NoError(a.t, err) err = json.Unmarshal(bytes, &s) - assert.NoError(t, err) + assert.NoError(a.t, err) - assert.Equal(t, s.Host, host) - assert.Equal(t, s.RemotePath, repoPath) - for _, filePath := range listOfSyncedFiles { + assert.Equal(a.t, s.Host, a.w.Config.Host) + assert.Equal(a.t, s.RemotePath, a.remoteRoot) + for _, filePath := range files { _, ok := s.LastUpdatedTimes[filePath] - assert.True(t, ok, fmt.Sprintf("%s not in snapshot file: %v", filePath, s.LastUpdatedTimes)) + assert.True(a.t, ok, fmt.Sprintf("%s not in snapshot file: %v", filePath, s.LastUpdatedTimes)) } - assert.Equal(t, len(listOfSyncedFiles), len(s.LastUpdatedTimes)) + assert.Equal(a.t, len(files), len(s.LastUpdatedTimes)) } -func TestAccIncrementalSync(t *testing.T) { +func TestAccFullFileSync(t *testing.T) { t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) wsc := databricks.Must(databricks.NewWorkspaceClient()) ctx := context.Background() - me, err := wsc.CurrentUser.Me(ctx) - assert.NoError(t, err) - repoUrl := "https://github.com/shreyas-goenka/empty-repo.git" - repoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("empty-repo-sync-integration-")) - repoInfo, err := wsc.Repos.Create(ctx, repos.CreateRepo{ - Path: repoPath, - Url: repoUrl, - Provider: "gitHub", - }) - assert.NoError(t, err) - - t.Cleanup(func() { - err := wsc.Repos.DeleteByRepoId(ctx, repoInfo.Id) - assert.NoError(t, err) - }) - - // clone public empty remote repo - tempDir := t.TempDir() - cmd := exec.Command("git", "clone", repoUrl) - cmd.Dir = tempDir - err = cmd.Run() - assert.NoError(t, err) - - projectDir := filepath.Join(tempDir, "empty-repo") - - // Add .databricks to .gitignore - content := []byte("/.databricks/") - f2, err := os.Create(filepath.Join(projectDir, ".gitignore")) - assert.NoError(t, err) - defer f2.Close() - _, err = f2.Write(content) - assert.NoError(t, err) + localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx) // Run `bricks sync` in the background. - t.Setenv("BRICKS_ROOT", projectDir) - c := NewCobraTestRunner(t, "sync", "--remote-path", repoPath, "--persist-snapshot=true") + t.Setenv("BRICKS_ROOT", localRepoPath) + c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=false") c.RunBackground() - // First upload assertion - c.Eventually(func() bool { - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - return len(objects) == 2 - }, 30*time.Second, 5*time.Second) - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - var files1 []string - for _, v := range objects { - files1 = append(files1, filepath.Base(v.Path)) + assertSync := assertSync{ + t: t, + c: c, + w: wsc, + localRoot: localRepoPath, + remoteRoot: remoteRepoPath, } - assert.Len(t, files1, 2) - assert.Contains(t, files1, ".gitignore") - assert.Contains(t, files1, ".gitkeep") - assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{".gitkeep", ".gitignore"}) - // Create amsterdam.txt file - f, err := os.Create(filepath.Join(projectDir, "amsterdam.txt")) - assert.NoError(t, err) - defer f.Close() + // .gitkeep comes from cloning during repo setup + assertSync.remoteDirContent(ctx, "", []string{".gitkeep"}) - // new file upload assertion - c.Eventually(func() bool { - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - return len(objects) == 3 - }, 30*time.Second, 5*time.Second) - objects, err = wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - var files2 []string - for _, v := range objects { - files2 = append(files2, filepath.Base(v.Path)) - } - assert.Len(t, files2, 3) - assert.Contains(t, files2, "amsterdam.txt") - assert.Contains(t, files2, ".gitkeep") - assert.Contains(t, files2, ".gitignore") - assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{"amsterdam.txt", ".gitkeep", ".gitignore"}) + // New file + localFilePath := filepath.Join(localRepoPath, "foo.txt") + f := testfile.CreateFile(t, localFilePath) + defer f.Close(t) + assertSync.remoteDirContent(ctx, "", []string{"foo.txt", ".gitkeep", ".gitignore"}) + assertSync.remoteFileContent(ctx, "foo.txt", "") - // delete a file and assert - os.Remove(filepath.Join(projectDir, ".gitkeep")) - c.Eventually(func() bool { - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - return len(objects) == 2 - }, 30*time.Second, 5*time.Second) - objects, err = wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - var files3 []string - for _, v := range objects { - files3 = append(files3, filepath.Base(v.Path)) - } - assert.Len(t, files3, 2) - assert.Contains(t, files3, "amsterdam.txt") - assert.Contains(t, files3, ".gitignore") - assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{"amsterdam.txt", ".gitignore"}) + // Write to file + f.Overwrite(t, `{"statement": "Mi Gente"}`) + assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Mi Gente"}`) - // new file in dir upload assertion - fooPath := filepath.Join(projectDir, "bar/foo.txt") - err = os.MkdirAll(filepath.Dir(fooPath), os.ModePerm) - assert.NoError(t, err) - f, err = os.Create(fooPath) - assert.NoError(t, err) - defer f.Close() - assert.Eventually(t, func() bool { - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - return len(objects) == 3 - }, 30*time.Second, 5*time.Second) - objects, err = wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - var files4 []string - for _, v := range objects { - files4 = append(files4, filepath.Base(v.Path)) - } - assert.Len(t, files4, 3) - assert.Contains(t, files4, "amsterdam.txt") - assert.Contains(t, files4, ".gitignore") - assert.Contains(t, files4, "bar") - assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{"amsterdam.txt", "bar/foo.txt", ".gitignore"}) + // Write again + f.Overwrite(t, `{"statement": "Young Dumb & Broke"}`) + assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Young Dumb & Broke"}`) - // delete dir - err = os.RemoveAll(filepath.Dir(fooPath)) - assert.NoError(t, err) - assert.Eventually(t, func() bool { - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - return len(objects) == 3 - }, 30*time.Second, 5*time.Second) - objects, err = wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - var files5 []string - for _, v := range objects { - files5 = append(files5, filepath.Base(v.Path)) - if strings.Contains(v.Path, "bar") { - assert.Equal(t, workspace.ObjectType("DIRECTORY"), v.ObjectType) - } - } - assert.Len(t, files5, 3) - assert.Contains(t, files5, "bar") - assert.Contains(t, files5, "amsterdam.txt") - assert.Contains(t, files5, ".gitignore") - // workspace still contains `bar` directory but it has been deleted from snapshot - assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{"amsterdam.txt", ".gitignore"}) - - // file called bar should overwrite the directory - err = os.WriteFile(filepath.Join(projectDir, "bar"), []byte("Kal ho na ho is a cool movie"), os.ModePerm) - assert.NoError(t, err) - assert.Eventually(t, func() bool { - objects, err := wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - return len(objects) == 3 - }, 30*time.Second, 5*time.Second) - objects, err = wsc.Workspace.ListAll(ctx, workspace.List{ - Path: repoPath, - }) - assert.NoError(t, err) - var files6 []string - for _, v := range objects { - files6 = append(files6, filepath.Base(v.Path)) - if strings.Contains(v.Path, "bar") { - assert.Equal(t, workspace.ObjectType("FILE"), v.ObjectType) - } - } - assert.Len(t, files6, 3) - assert.Contains(t, files6, "amsterdam.txt") - assert.Contains(t, files6, ".gitignore") - // workspace still contains `bar` directory but it has been deleted from snapshot - assert.Contains(t, files6, "bar") - assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{"amsterdam.txt", "bar", ".gitignore"}) + // delete + f.Remove(t) + assertSync.remoteDirContent(ctx, "", []string{".gitkeep", ".gitignore"}) +} + +func TestAccIncrementalFileSync(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + wsc := databricks.Must(databricks.NewWorkspaceClient()) + ctx := context.Background() + + localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx) + + // Run `bricks sync` in the background. + t.Setenv("BRICKS_ROOT", localRepoPath) + c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true") + c.RunBackground() + + assertSync := assertSync{ + t: t, + c: c, + w: wsc, + localRoot: localRepoPath, + remoteRoot: remoteRepoPath, + } + + // .gitkeep comes from cloning during repo setup + assertSync.remoteDirContent(ctx, "", []string{".gitkeep"}) + + // New file + localFilePath := filepath.Join(localRepoPath, "foo.txt") + f := testfile.CreateFile(t, localFilePath) + defer f.Close(t) + assertSync.remoteDirContent(ctx, "", []string{"foo.txt", ".gitkeep", ".gitignore"}) + assertSync.remoteFileContent(ctx, "foo.txt", "") + assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo.txt"}) + + // Write to file + f.Overwrite(t, `{"statement": "Mi Gente"}`) + assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Mi Gente"}`) + + // Write again + f.Overwrite(t, `{"statement": "Young Dumb & Broke"}`) + assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Young Dumb & Broke"}`) + + // delete + f.Remove(t) + assertSync.remoteDirContent(ctx, "", []string{".gitkeep", ".gitignore"}) + assertSync.snapshotContains([]string{".gitkeep", ".gitignore"}) +} + +func TestAccNestedFolderSync(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + wsc := databricks.Must(databricks.NewWorkspaceClient()) + ctx := context.Background() + + localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx) + + // Run `bricks sync` in the background. + t.Setenv("BRICKS_ROOT", localRepoPath) + c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true") + c.RunBackground() + + assertSync := assertSync{ + t: t, + c: c, + w: wsc, + localRoot: localRepoPath, + remoteRoot: remoteRepoPath, + } + + // .gitkeep comes from cloning during repo setup + assertSync.remoteDirContent(ctx, "/", []string{".gitkeep"}) + + // New file + localFilePath := filepath.Join(localRepoPath, "dir1/dir2/dir3/foo.txt") + err := os.MkdirAll(filepath.Dir(localFilePath), 0o755) + assert.NoError(t, err) + f := testfile.CreateFile(t, localFilePath) + defer f.Close(t) + assertSync.remoteDirContent(ctx, "", []string{"dir1", ".gitkeep", ".gitignore"}) + assertSync.remoteDirContent(ctx, "dir1", []string{"dir2"}) + assertSync.remoteDirContent(ctx, "dir1/dir2", []string{"dir3"}) + assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{"foo.txt"}) + assertSync.snapshotContains([]string{".gitkeep", ".gitignore", filepath.FromSlash("dir1/dir2/dir3/foo.txt")}) + + // delete + f.Remove(t) + // directories are not cleaned up right now. This is not ideal + assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{}) + assertSync.snapshotContains([]string{".gitkeep", ".gitignore"}) +} + +// sync does not clean up empty directories from the workspace file system. +// This is a check for the edge case when a user does the following: +// +// 1. Add file foo/bar.txt +// 2. Delete foo/bar.txt (including the directory) +// 3. Add file foo +// +// In the above scenario sync should delete the empty folder and add foo to the remote +// file system +func TestAccIncrementalFileOverwritesFolder(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + wsc := databricks.Must(databricks.NewWorkspaceClient()) + ctx := context.Background() + + localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx) + + // Run `bricks sync` in the background. + t.Setenv("BRICKS_ROOT", localRepoPath) + c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true") + c.RunBackground() + + assertSync := assertSync{ + t: t, + c: c, + w: wsc, + localRoot: localRepoPath, + remoteRoot: remoteRepoPath, + } + + // create foo/bar.txt + localFilePath := filepath.Join(localRepoPath, "foo/bar.txt") + err := os.MkdirAll(filepath.Dir(localFilePath), 0o755) + assert.NoError(t, err) + f := testfile.CreateFile(t, localFilePath) + defer f.Close(t) + assertSync.remoteDirContent(ctx, "", []string{"foo", ".gitkeep", ".gitignore"}) + assertSync.remoteDirContent(ctx, "foo", []string{"bar.txt"}) + assertSync.snapshotContains([]string{".gitkeep", ".gitignore", filepath.FromSlash("foo/bar.txt")}) + + // delete foo/bar.txt + f.Remove(t) + os.Remove(filepath.Join(localRepoPath, "foo")) + assertSync.remoteDirContent(ctx, "foo", []string{}) + assertSync.objectType(ctx, "foo", "DIRECTORY") + assertSync.snapshotContains([]string{".gitkeep", ".gitignore"}) + + f2 := testfile.CreateFile(t, filepath.Join(localRepoPath, "foo")) + defer f2.Close(t) + assertSync.remoteDirContent(ctx, "", []string{"foo", ".gitkeep", ".gitignore"}) + assertSync.objectType(ctx, "foo", "FILE") + assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo"}) +} + +func TestAccIncrementalSyncPythonNotebookToFile(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + wsc := databricks.Must(databricks.NewWorkspaceClient()) + ctx := context.Background() + + localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx) + + // create python notebook + localFilePath := filepath.Join(localRepoPath, "foo.py") + f := testfile.CreateFile(t, localFilePath) + defer f.Close(t) + f.Overwrite(t, "# Databricks notebook source") + + // Run `bricks sync` in the background. + t.Setenv("BRICKS_ROOT", localRepoPath) + c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true") + c.RunBackground() + + assertSync := assertSync{ + t: t, + c: c, + w: wsc, + localRoot: localRepoPath, + remoteRoot: remoteRepoPath, + } + + // notebook was uploaded properly + assertSync.remoteDirContent(ctx, "", []string{"foo", ".gitkeep", ".gitignore"}) + assertSync.objectType(ctx, "foo", "NOTEBOOK") + assertSync.language(ctx, "foo", "PYTHON") + assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo.py"}) + + // convert to vanilla python file + f.Overwrite(t, "# No longer a python notebook") + assertSync.objectType(ctx, "foo.py", "FILE") + assertSync.remoteDirContent(ctx, "", []string{"foo.py", ".gitkeep", ".gitignore"}) + assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo.py"}) + + // delete the vanilla python file + f.Remove(t) + assertSync.remoteDirContent(ctx, "", []string{".gitkeep", ".gitignore"}) + assertSync.snapshotContains([]string{".gitkeep", ".gitignore"}) +} + +func TestAccIncrementalSyncFileToPythonNotebook(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + wsc := databricks.Must(databricks.NewWorkspaceClient()) + ctx := context.Background() + + localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx) + + // Run `bricks sync` in the background. + t.Setenv("BRICKS_ROOT", localRepoPath) + c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true") + c.RunBackground() + + assertSync := assertSync{ + t: t, + c: c, + w: wsc, + localRoot: localRepoPath, + remoteRoot: remoteRepoPath, + } + + // create vanilla python file + localFilePath := filepath.Join(localRepoPath, "foo.py") + f := testfile.CreateFile(t, localFilePath) + defer f.Close(t) + + // assert file upload + assertSync.remoteDirContent(ctx, "", []string{"foo.py", ".gitkeep", ".gitignore"}) + assertSync.objectType(ctx, "foo.py", "FILE") + assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo.py"}) + + // convert to notebook + f.Overwrite(t, "# Databricks notebook source") + assertSync.objectType(ctx, "foo", "NOTEBOOK") + assertSync.language(ctx, "foo", "PYTHON") + assertSync.remoteDirContent(ctx, "", []string{"foo", ".gitkeep", ".gitignore"}) + assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo.py"}) +} + +func TestAccIncrementalSyncPythonNotebookDelete(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + wsc := databricks.Must(databricks.NewWorkspaceClient()) + ctx := context.Background() + + localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx) + + // create python notebook + localFilePath := filepath.Join(localRepoPath, "foo.py") + f := testfile.CreateFile(t, localFilePath) + defer f.Close(t) + f.Overwrite(t, "# Databricks notebook source") + + // Run `bricks sync` in the background. + t.Setenv("BRICKS_ROOT", localRepoPath) + c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true") + c.RunBackground() + + assertSync := assertSync{ + t: t, + c: c, + w: wsc, + localRoot: localRepoPath, + remoteRoot: remoteRepoPath, + } + + // notebook was uploaded properly + assertSync.remoteDirContent(ctx, "", []string{"foo", ".gitkeep", ".gitignore"}) + assertSync.objectType(ctx, "foo", "NOTEBOOK") + assertSync.language(ctx, "foo", "PYTHON") + + // Delete notebook + f.Remove(t) + assertSync.remoteDirContent(ctx, "", []string{".gitkeep", ".gitignore"}) } diff --git a/libs/testfile/testfile.go b/libs/testfile/testfile.go new file mode 100644 index 00000000..c3b29f38 --- /dev/null +++ b/libs/testfile/testfile.go @@ -0,0 +1,64 @@ +package testfile + +import ( + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +// Use this struct to work with files in a github actions test environment +type TestFile struct { + mtime time.Time + fd *os.File + path string + // to make close idempotent + isOpen bool +} + +func CreateFile(t *testing.T, path string) *TestFile { + f, err := os.Create(path) + assert.NoError(t, err) + + fileInfo, err := os.Stat(path) + assert.NoError(t, err) + + return &TestFile{ + path: path, + fd: f, + mtime: fileInfo.ModTime(), + isOpen: true, + } +} + +func (f *TestFile) Close(t *testing.T) { + if f.isOpen { + err := f.fd.Close() + assert.NoError(t, err) + f.isOpen = false + } +} + +func (f *TestFile) Overwrite(t *testing.T, s string) { + err := os.Truncate(f.path, 0) + assert.NoError(t, err) + + _, err = f.fd.Seek(0, 0) + assert.NoError(t, err) + + _, err = f.fd.WriteString(s) + assert.NoError(t, err) + + // We manually update mtime after write because github actions file + // system does not :') + err = os.Chtimes(f.path, f.mtime.Add(time.Minute), f.mtime.Add(time.Minute)) + assert.NoError(t, err) + f.mtime = f.mtime.Add(time.Minute) +} + +func (f *TestFile) Remove(t *testing.T) { + f.Close(t) + err := os.Remove(f.path) + assert.NoError(t, err) +}