Refactor and cover edge cases in sync integration tests (#160)

This PR:
1. Refactors the sync integration tests to make them more readable
2. Adds additional tests for edge cases we encountered during vscode
runs
3. Intensional side effect: sync integration tests are also green on
windows (see
https://github.com/databricks/eng-dev-ecosystem/actions/runs/3817365642/jobs/6493576727)

Change in coverage

- We now test for python notebook <-> python file interconversion and
python notebook deletion being synced to workspace
- Tests are split up and are more focused on testing specific edge cases
This commit is contained in:
shreyas-goenka 2023-01-10 13:16:30 +01:00 committed by GitHub
parent b87b4b0f40
commit 0d9ecb5643
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 484 additions and 365 deletions

View File

@ -9,64 +9,11 @@ import (
"time"
"github.com/databricks/bricks/git"
"github.com/databricks/bricks/libs/testfile"
"github.com/databricks/bricks/project"
"github.com/stretchr/testify/assert"
)
type testFile struct {
mtime time.Time
fd *os.File
path string
// to make close idempotent
isOpen bool
}
func createFile(t *testing.T, path string) *testFile {
f, err := os.Create(path)
assert.NoError(t, err)
fileInfo, err := os.Stat(path)
assert.NoError(t, err)
return &testFile{
path: path,
fd: f,
mtime: fileInfo.ModTime(),
isOpen: true,
}
}
func (f *testFile) close(t *testing.T) {
if f.isOpen {
err := f.fd.Close()
assert.NoError(t, err)
f.isOpen = false
}
}
func (f *testFile) overwrite(t *testing.T, s string) {
err := os.Truncate(f.path, 0)
assert.NoError(t, err)
_, err = f.fd.Seek(0, 0)
assert.NoError(t, err)
_, err = f.fd.WriteString(s)
assert.NoError(t, err)
// We manually update mtime after write because github actions file
// system does not :')
err = os.Chtimes(f.path, f.mtime.Add(time.Minute), f.mtime.Add(time.Minute))
assert.NoError(t, err)
f.mtime = f.mtime.Add(time.Minute)
}
func (f *testFile) remove(t *testing.T) {
f.close(t)
err := os.Remove(f.path)
assert.NoError(t, err)
}
func assertKeysOfMap(t *testing.T, m map[string]time.Time, expectedKeys []string) {
keys := make([]string, len(m))
i := 0
@ -87,11 +34,11 @@ func TestDiff(t *testing.T) {
RemoteToLocalNames: make(map[string]string),
}
f1 := createFile(t, filepath.Join(projectDir, "hello.txt"))
defer f1.close(t)
f1 := testfile.CreateFile(t, filepath.Join(projectDir, "hello.txt"))
defer f1.Close(t)
worldFilePath := filepath.Join(projectDir, "world.txt")
f2 := createFile(t, worldFilePath)
defer f2.close(t)
f2 := testfile.CreateFile(t, worldFilePath)
defer f2.Close(t)
// New files are put
files, err := fileSet.All()
@ -107,7 +54,7 @@ func TestDiff(t *testing.T) {
assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.RemoteToLocalNames)
// world.txt is editted
f2.overwrite(t, "bunnies are cute.")
f2.Overwrite(t, "bunnies are cute.")
assert.NoError(t, err)
files, err = fileSet.All()
assert.NoError(t, err)
@ -122,7 +69,7 @@ func TestDiff(t *testing.T) {
assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.RemoteToLocalNames)
// hello.txt is deleted
f1.remove(t)
f1.Remove(t)
assert.NoError(t, err)
files, err = fileSet.All()
assert.NoError(t, err)
@ -148,9 +95,9 @@ func TestFolderDiff(t *testing.T) {
err := os.Mkdir(filepath.Join(projectDir, "foo"), os.ModePerm)
assert.NoError(t, err)
f1 := createFile(t, filepath.Join(projectDir, "foo", "bar.py"))
defer f1.close(t)
f1.overwrite(t, "# Databricks notebook source\nprint(\"abc\")")
f1 := testfile.CreateFile(t, filepath.Join(projectDir, "foo", "bar.py"))
defer f1.Close(t)
f1.Overwrite(t, "# Databricks notebook source\nprint(\"abc\")")
files, err := fileSet.All()
assert.NoError(t, err)
@ -160,7 +107,7 @@ func TestFolderDiff(t *testing.T) {
assert.Len(t, change.put, 1)
assert.Contains(t, change.put, "foo/bar.py")
f1.remove(t)
f1.Remove(t)
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
@ -180,13 +127,13 @@ func TestPythonNotebookDiff(t *testing.T) {
RemoteToLocalNames: make(map[string]string),
}
foo := createFile(t, filepath.Join(projectDir, "foo.py"))
defer foo.close(t)
foo := testfile.CreateFile(t, filepath.Join(projectDir, "foo.py"))
defer foo.Close(t)
// Case 1: notebook foo.py is uploaded
files, err := fileSet.All()
assert.NoError(t, err)
foo.overwrite(t, "# Databricks notebook source\nprint(\"abc\")")
foo.Overwrite(t, "# Databricks notebook source\nprint(\"abc\")")
change, err := state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
@ -198,7 +145,7 @@ func TestPythonNotebookDiff(t *testing.T) {
// Case 2: notebook foo.py is converted to python script by removing
// magic keyword
foo.overwrite(t, "print(\"abc\")")
foo.Overwrite(t, "print(\"abc\")")
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
@ -212,7 +159,7 @@ func TestPythonNotebookDiff(t *testing.T) {
assert.Equal(t, map[string]string{"foo.py": "foo.py"}, state.RemoteToLocalNames)
// Case 3: Python script foo.py is converted to a databricks notebook
foo.overwrite(t, "# Databricks notebook source\nprint(\"def\")")
foo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")")
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
@ -226,7 +173,7 @@ func TestPythonNotebookDiff(t *testing.T) {
assert.Equal(t, map[string]string{"foo": "foo.py"}, state.RemoteToLocalNames)
// Case 4: Python notebook foo.py is deleted, and its remote name is used in change.delete
foo.remove(t)
foo.Remove(t)
assert.NoError(t, err)
files, err = fileSet.All()
assert.NoError(t, err)
@ -251,10 +198,10 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) {
}
// upload should work since they point to different destinations
pythonFoo := createFile(t, filepath.Join(projectDir, "foo.py"))
defer pythonFoo.close(t)
vanillaFoo := createFile(t, filepath.Join(projectDir, "foo"))
defer vanillaFoo.close(t)
pythonFoo := testfile.CreateFile(t, filepath.Join(projectDir, "foo.py"))
defer pythonFoo.Close(t)
vanillaFoo := testfile.CreateFile(t, filepath.Join(projectDir, "foo"))
defer vanillaFoo.Close(t)
files, err := fileSet.All()
assert.NoError(t, err)
change, err := state.diff(files)
@ -265,7 +212,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) {
assert.Contains(t, change.put, "foo")
// errors out because they point to the same destination
pythonFoo.overwrite(t, "# Databricks notebook source\nprint(\"def\")")
pythonFoo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")")
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
@ -317,9 +264,9 @@ func TestOldSnapshotInvalidation(t *testing.T) {
snapshotPath, err := emptySnapshot.getPath(ctx)
assert.NoError(t, err)
snapshotFile := createFile(t, snapshotPath)
snapshotFile.overwrite(t, oldVersionSnapshot)
snapshotFile.close(t)
snapshotFile := testfile.CreateFile(t, snapshotPath)
snapshotFile.Overwrite(t, oldVersionSnapshot)
snapshotFile.Close(t)
assert.FileExists(t, snapshotPath)
snapshot := emptySnapshot
@ -342,9 +289,9 @@ func TestNoVersionSnapshotInvalidation(t *testing.T) {
snapshotPath, err := emptySnapshot.getPath(ctx)
assert.NoError(t, err)
snapshotFile := createFile(t, snapshotPath)
snapshotFile.overwrite(t, noVersionSnapshot)
snapshotFile.close(t)
snapshotFile := testfile.CreateFile(t, snapshotPath)
snapshotFile.Overwrite(t, noVersionSnapshot)
snapshotFile.Close(t)
assert.FileExists(t, snapshotPath)
snapshot := emptySnapshot
@ -369,9 +316,9 @@ func TestLatestVersionSnapshotGetsLoaded(t *testing.T) {
snapshotPath, err := emptySnapshot.getPath(ctx)
assert.NoError(t, err)
snapshotFile := createFile(t, snapshotPath)
snapshotFile.overwrite(t, latestVersionSnapshot)
snapshotFile.close(t)
snapshotFile := testfile.CreateFile(t, snapshotPath)
snapshotFile.Overwrite(t, latestVersionSnapshot)
snapshotFile.Close(t)
assert.FileExists(t, snapshotPath)
snapshot := emptySnapshot

View File

@ -5,31 +5,28 @@ import (
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"testing"
"time"
"github.com/databricks/bricks/cmd/sync"
"github.com/databricks/bricks/libs/testfile"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/client"
"github.com/databricks/databricks-sdk-go/service/repos"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TODO: these tests are bloated, refactor these, and make write down tests for
// all edge cases with interop between files, directory and notebooks during syncing
// https://databricks.atlassian.net/browse/DECO-416
// This test needs auth env vars to run.
// Please run using the deco env test or deco env shell
func TestAccFullSync(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
func setupRepo(t *testing.T, wsc *databricks.WorkspaceClient, ctx context.Context) (localRoot, remoteRoot string) {
me, err := wsc.CurrentUser.Me(ctx)
assert.NoError(t, err)
repoUrl := "https://github.com/shreyas-goenka/empty-repo.git"
@ -54,305 +51,416 @@ func TestAccFullSync(t *testing.T) {
err = cmd.Run()
assert.NoError(t, err)
// Create amsterdam.txt file
projectDir := filepath.Join(tempDir, "empty-repo")
f, err := os.Create(filepath.Join(projectDir, "amsterdam.txt"))
assert.NoError(t, err)
defer f.Close()
// Run `bricks sync` in the background.
t.Setenv("BRICKS_ROOT", projectDir)
c := NewCobraTestRunner(t, "sync", "--remote-path", repoPath, "--persist-snapshot=false")
c.RunBackground()
// First upload assertion
c.Eventually(func() bool {
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
return len(objects) == 3
}, 30*time.Second, 5*time.Second)
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
var files1 []string
for _, v := range objects {
files1 = append(files1, filepath.Base(v.Path))
}
assert.Len(t, files1, 3)
assert.Contains(t, files1, "amsterdam.txt")
assert.Contains(t, files1, ".gitkeep")
assert.Contains(t, files1, ".gitignore")
// Create new files and assert
os.Create(filepath.Join(projectDir, "hello.txt"))
os.Create(filepath.Join(projectDir, "world.txt"))
c.Eventually(func() bool {
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
return len(objects) == 5
}, 30*time.Second, 5*time.Second)
objects, err = wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
var files2 []string
for _, v := range objects {
files2 = append(files2, filepath.Base(v.Path))
}
assert.Len(t, files2, 5)
assert.Contains(t, files2, "amsterdam.txt")
assert.Contains(t, files2, ".gitkeep")
assert.Contains(t, files2, "hello.txt")
assert.Contains(t, files2, "world.txt")
assert.Contains(t, files2, ".gitignore")
// delete a file and assert
os.Remove(filepath.Join(projectDir, "hello.txt"))
c.Eventually(func() bool {
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
return len(objects) == 4
}, 30*time.Second, 5*time.Second)
objects, err = wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
var files3 []string
for _, v := range objects {
files3 = append(files3, filepath.Base(v.Path))
}
assert.Len(t, files3, 4)
assert.Contains(t, files3, "amsterdam.txt")
assert.Contains(t, files3, ".gitkeep")
assert.Contains(t, files3, "world.txt")
assert.Contains(t, files3, ".gitignore")
localRoot = filepath.Join(tempDir, "empty-repo")
remoteRoot = repoPath
return localRoot, remoteRoot
}
func assertSnapshotContents(t *testing.T, host, repoPath, projectDir string, listOfSyncedFiles []string) {
snapshotPath := filepath.Join(projectDir, ".databricks/sync-snapshots", sync.GetFileName(host, repoPath))
assert.FileExists(t, snapshotPath)
type assertSync struct {
t *testing.T
c *cobraTestRunner
w *databricks.WorkspaceClient
localRoot string
remoteRoot string
}
func (a *assertSync) remoteDirContent(ctx context.Context, relativeDir string, expectedFiles []string) {
remoteDir := path.Join(a.remoteRoot, relativeDir)
a.c.Eventually(func() bool {
objects, err := a.w.Workspace.ListAll(ctx, workspace.List{
Path: remoteDir,
})
require.NoError(a.t, err)
return len(objects) == len(expectedFiles)
}, 30*time.Second, 5*time.Second)
objects, err := a.w.Workspace.ListAll(ctx, workspace.List{
Path: remoteDir,
})
require.NoError(a.t, err)
var actualFiles []string
for _, v := range objects {
actualFiles = append(actualFiles, v.Path)
}
assert.Len(a.t, actualFiles, len(expectedFiles))
for _, v := range expectedFiles {
assert.Contains(a.t, actualFiles, path.Join(a.remoteRoot, relativeDir, v))
}
}
func (a *assertSync) remoteFileContent(ctx context.Context, relativePath string, expectedContent string) {
filePath := path.Join(a.remoteRoot, relativePath)
// Remove leading "/" so we can use it in the URL.
urlPath := fmt.Sprintf(
"/api/2.0/workspace-files/%s",
strings.TrimLeft(filePath, "/"),
)
apiClient, err := client.New(a.w.Config)
require.NoError(a.t, err)
var res []byte
a.c.Eventually(func() bool {
err = apiClient.Do(ctx, http.MethodGet, urlPath, nil, &res)
require.NoError(a.t, err)
actualContent := string(res)
return actualContent == expectedContent
}, 30*time.Second, 5*time.Second)
}
func (a *assertSync) objectType(ctx context.Context, relativePath string, expected string) {
path := path.Join(a.remoteRoot, relativePath)
a.c.Eventually(func() bool {
metadata, err := a.w.Workspace.GetStatusByPath(ctx, path)
if err != nil {
return false
}
return metadata.ObjectType.String() == expected
}, 30*time.Second, 5*time.Second)
}
func (a *assertSync) language(ctx context.Context, relativePath string, expected string) {
path := path.Join(a.remoteRoot, relativePath)
a.c.Eventually(func() bool {
metadata, err := a.w.Workspace.GetStatusByPath(ctx, path)
if err != nil {
return false
}
return metadata.Language.String() == expected
}, 30*time.Second, 5*time.Second)
}
func (a *assertSync) snapshotContains(files []string) {
snapshotPath := filepath.Join(a.localRoot, ".databricks/sync-snapshots", sync.GetFileName(a.w.Config.Host, a.remoteRoot))
assert.FileExists(a.t, snapshotPath)
var s *sync.Snapshot
f, err := os.Open(snapshotPath)
assert.NoError(t, err)
assert.NoError(a.t, err)
defer f.Close()
bytes, err := io.ReadAll(f)
assert.NoError(t, err)
assert.NoError(a.t, err)
err = json.Unmarshal(bytes, &s)
assert.NoError(t, err)
assert.NoError(a.t, err)
assert.Equal(t, s.Host, host)
assert.Equal(t, s.RemotePath, repoPath)
for _, filePath := range listOfSyncedFiles {
assert.Equal(a.t, s.Host, a.w.Config.Host)
assert.Equal(a.t, s.RemotePath, a.remoteRoot)
for _, filePath := range files {
_, ok := s.LastUpdatedTimes[filePath]
assert.True(t, ok, fmt.Sprintf("%s not in snapshot file: %v", filePath, s.LastUpdatedTimes))
assert.True(a.t, ok, fmt.Sprintf("%s not in snapshot file: %v", filePath, s.LastUpdatedTimes))
}
assert.Equal(t, len(listOfSyncedFiles), len(s.LastUpdatedTimes))
assert.Equal(a.t, len(files), len(s.LastUpdatedTimes))
}
func TestAccIncrementalSync(t *testing.T) {
func TestAccFullFileSync(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
me, err := wsc.CurrentUser.Me(ctx)
assert.NoError(t, err)
repoUrl := "https://github.com/shreyas-goenka/empty-repo.git"
repoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("empty-repo-sync-integration-"))
repoInfo, err := wsc.Repos.Create(ctx, repos.CreateRepo{
Path: repoPath,
Url: repoUrl,
Provider: "gitHub",
})
assert.NoError(t, err)
t.Cleanup(func() {
err := wsc.Repos.DeleteByRepoId(ctx, repoInfo.Id)
assert.NoError(t, err)
})
// clone public empty remote repo
tempDir := t.TempDir()
cmd := exec.Command("git", "clone", repoUrl)
cmd.Dir = tempDir
err = cmd.Run()
assert.NoError(t, err)
projectDir := filepath.Join(tempDir, "empty-repo")
// Add .databricks to .gitignore
content := []byte("/.databricks/")
f2, err := os.Create(filepath.Join(projectDir, ".gitignore"))
assert.NoError(t, err)
defer f2.Close()
_, err = f2.Write(content)
assert.NoError(t, err)
localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx)
// Run `bricks sync` in the background.
t.Setenv("BRICKS_ROOT", projectDir)
c := NewCobraTestRunner(t, "sync", "--remote-path", repoPath, "--persist-snapshot=true")
t.Setenv("BRICKS_ROOT", localRepoPath)
c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=false")
c.RunBackground()
// First upload assertion
c.Eventually(func() bool {
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
return len(objects) == 2
}, 30*time.Second, 5*time.Second)
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
var files1 []string
for _, v := range objects {
files1 = append(files1, filepath.Base(v.Path))
assertSync := assertSync{
t: t,
c: c,
w: wsc,
localRoot: localRepoPath,
remoteRoot: remoteRepoPath,
}
assert.Len(t, files1, 2)
assert.Contains(t, files1, ".gitignore")
assert.Contains(t, files1, ".gitkeep")
assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{".gitkeep", ".gitignore"})
// Create amsterdam.txt file
f, err := os.Create(filepath.Join(projectDir, "amsterdam.txt"))
assert.NoError(t, err)
defer f.Close()
// .gitkeep comes from cloning during repo setup
assertSync.remoteDirContent(ctx, "", []string{".gitkeep"})
// new file upload assertion
c.Eventually(func() bool {
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
return len(objects) == 3
}, 30*time.Second, 5*time.Second)
objects, err = wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
var files2 []string
for _, v := range objects {
files2 = append(files2, filepath.Base(v.Path))
}
assert.Len(t, files2, 3)
assert.Contains(t, files2, "amsterdam.txt")
assert.Contains(t, files2, ".gitkeep")
assert.Contains(t, files2, ".gitignore")
assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{"amsterdam.txt", ".gitkeep", ".gitignore"})
// New file
localFilePath := filepath.Join(localRepoPath, "foo.txt")
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.remoteDirContent(ctx, "", []string{"foo.txt", ".gitkeep", ".gitignore"})
assertSync.remoteFileContent(ctx, "foo.txt", "")
// delete a file and assert
os.Remove(filepath.Join(projectDir, ".gitkeep"))
c.Eventually(func() bool {
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
return len(objects) == 2
}, 30*time.Second, 5*time.Second)
objects, err = wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
var files3 []string
for _, v := range objects {
files3 = append(files3, filepath.Base(v.Path))
}
assert.Len(t, files3, 2)
assert.Contains(t, files3, "amsterdam.txt")
assert.Contains(t, files3, ".gitignore")
assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{"amsterdam.txt", ".gitignore"})
// Write to file
f.Overwrite(t, `{"statement": "Mi Gente"}`)
assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Mi Gente"}`)
// new file in dir upload assertion
fooPath := filepath.Join(projectDir, "bar/foo.txt")
err = os.MkdirAll(filepath.Dir(fooPath), os.ModePerm)
assert.NoError(t, err)
f, err = os.Create(fooPath)
assert.NoError(t, err)
defer f.Close()
assert.Eventually(t, func() bool {
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
return len(objects) == 3
}, 30*time.Second, 5*time.Second)
objects, err = wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
var files4 []string
for _, v := range objects {
files4 = append(files4, filepath.Base(v.Path))
}
assert.Len(t, files4, 3)
assert.Contains(t, files4, "amsterdam.txt")
assert.Contains(t, files4, ".gitignore")
assert.Contains(t, files4, "bar")
assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{"amsterdam.txt", "bar/foo.txt", ".gitignore"})
// Write again
f.Overwrite(t, `{"statement": "Young Dumb & Broke"}`)
assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Young Dumb & Broke"}`)
// delete dir
err = os.RemoveAll(filepath.Dir(fooPath))
assert.NoError(t, err)
assert.Eventually(t, func() bool {
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
return len(objects) == 3
}, 30*time.Second, 5*time.Second)
objects, err = wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
var files5 []string
for _, v := range objects {
files5 = append(files5, filepath.Base(v.Path))
if strings.Contains(v.Path, "bar") {
assert.Equal(t, workspace.ObjectType("DIRECTORY"), v.ObjectType)
}
}
assert.Len(t, files5, 3)
assert.Contains(t, files5, "bar")
assert.Contains(t, files5, "amsterdam.txt")
assert.Contains(t, files5, ".gitignore")
// workspace still contains `bar` directory but it has been deleted from snapshot
assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{"amsterdam.txt", ".gitignore"})
// file called bar should overwrite the directory
err = os.WriteFile(filepath.Join(projectDir, "bar"), []byte("Kal ho na ho is a cool movie"), os.ModePerm)
assert.NoError(t, err)
assert.Eventually(t, func() bool {
objects, err := wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
return len(objects) == 3
}, 30*time.Second, 5*time.Second)
objects, err = wsc.Workspace.ListAll(ctx, workspace.List{
Path: repoPath,
})
assert.NoError(t, err)
var files6 []string
for _, v := range objects {
files6 = append(files6, filepath.Base(v.Path))
if strings.Contains(v.Path, "bar") {
assert.Equal(t, workspace.ObjectType("FILE"), v.ObjectType)
}
}
assert.Len(t, files6, 3)
assert.Contains(t, files6, "amsterdam.txt")
assert.Contains(t, files6, ".gitignore")
// workspace still contains `bar` directory but it has been deleted from snapshot
assert.Contains(t, files6, "bar")
assertSnapshotContents(t, wsc.Config.Host, repoPath, projectDir, []string{"amsterdam.txt", "bar", ".gitignore"})
// delete
f.Remove(t)
assertSync.remoteDirContent(ctx, "", []string{".gitkeep", ".gitignore"})
}
func TestAccIncrementalFileSync(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx)
// Run `bricks sync` in the background.
t.Setenv("BRICKS_ROOT", localRepoPath)
c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true")
c.RunBackground()
assertSync := assertSync{
t: t,
c: c,
w: wsc,
localRoot: localRepoPath,
remoteRoot: remoteRepoPath,
}
// .gitkeep comes from cloning during repo setup
assertSync.remoteDirContent(ctx, "", []string{".gitkeep"})
// New file
localFilePath := filepath.Join(localRepoPath, "foo.txt")
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.remoteDirContent(ctx, "", []string{"foo.txt", ".gitkeep", ".gitignore"})
assertSync.remoteFileContent(ctx, "foo.txt", "")
assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo.txt"})
// Write to file
f.Overwrite(t, `{"statement": "Mi Gente"}`)
assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Mi Gente"}`)
// Write again
f.Overwrite(t, `{"statement": "Young Dumb & Broke"}`)
assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Young Dumb & Broke"}`)
// delete
f.Remove(t)
assertSync.remoteDirContent(ctx, "", []string{".gitkeep", ".gitignore"})
assertSync.snapshotContains([]string{".gitkeep", ".gitignore"})
}
func TestAccNestedFolderSync(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx)
// Run `bricks sync` in the background.
t.Setenv("BRICKS_ROOT", localRepoPath)
c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true")
c.RunBackground()
assertSync := assertSync{
t: t,
c: c,
w: wsc,
localRoot: localRepoPath,
remoteRoot: remoteRepoPath,
}
// .gitkeep comes from cloning during repo setup
assertSync.remoteDirContent(ctx, "/", []string{".gitkeep"})
// New file
localFilePath := filepath.Join(localRepoPath, "dir1/dir2/dir3/foo.txt")
err := os.MkdirAll(filepath.Dir(localFilePath), 0o755)
assert.NoError(t, err)
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.remoteDirContent(ctx, "", []string{"dir1", ".gitkeep", ".gitignore"})
assertSync.remoteDirContent(ctx, "dir1", []string{"dir2"})
assertSync.remoteDirContent(ctx, "dir1/dir2", []string{"dir3"})
assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{"foo.txt"})
assertSync.snapshotContains([]string{".gitkeep", ".gitignore", filepath.FromSlash("dir1/dir2/dir3/foo.txt")})
// delete
f.Remove(t)
// directories are not cleaned up right now. This is not ideal
assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{})
assertSync.snapshotContains([]string{".gitkeep", ".gitignore"})
}
// sync does not clean up empty directories from the workspace file system.
// This is a check for the edge case when a user does the following:
//
// 1. Add file foo/bar.txt
// 2. Delete foo/bar.txt (including the directory)
// 3. Add file foo
//
// In the above scenario sync should delete the empty folder and add foo to the remote
// file system
func TestAccIncrementalFileOverwritesFolder(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx)
// Run `bricks sync` in the background.
t.Setenv("BRICKS_ROOT", localRepoPath)
c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true")
c.RunBackground()
assertSync := assertSync{
t: t,
c: c,
w: wsc,
localRoot: localRepoPath,
remoteRoot: remoteRepoPath,
}
// create foo/bar.txt
localFilePath := filepath.Join(localRepoPath, "foo/bar.txt")
err := os.MkdirAll(filepath.Dir(localFilePath), 0o755)
assert.NoError(t, err)
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.remoteDirContent(ctx, "", []string{"foo", ".gitkeep", ".gitignore"})
assertSync.remoteDirContent(ctx, "foo", []string{"bar.txt"})
assertSync.snapshotContains([]string{".gitkeep", ".gitignore", filepath.FromSlash("foo/bar.txt")})
// delete foo/bar.txt
f.Remove(t)
os.Remove(filepath.Join(localRepoPath, "foo"))
assertSync.remoteDirContent(ctx, "foo", []string{})
assertSync.objectType(ctx, "foo", "DIRECTORY")
assertSync.snapshotContains([]string{".gitkeep", ".gitignore"})
f2 := testfile.CreateFile(t, filepath.Join(localRepoPath, "foo"))
defer f2.Close(t)
assertSync.remoteDirContent(ctx, "", []string{"foo", ".gitkeep", ".gitignore"})
assertSync.objectType(ctx, "foo", "FILE")
assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo"})
}
func TestAccIncrementalSyncPythonNotebookToFile(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx)
// create python notebook
localFilePath := filepath.Join(localRepoPath, "foo.py")
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
f.Overwrite(t, "# Databricks notebook source")
// Run `bricks sync` in the background.
t.Setenv("BRICKS_ROOT", localRepoPath)
c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true")
c.RunBackground()
assertSync := assertSync{
t: t,
c: c,
w: wsc,
localRoot: localRepoPath,
remoteRoot: remoteRepoPath,
}
// notebook was uploaded properly
assertSync.remoteDirContent(ctx, "", []string{"foo", ".gitkeep", ".gitignore"})
assertSync.objectType(ctx, "foo", "NOTEBOOK")
assertSync.language(ctx, "foo", "PYTHON")
assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo.py"})
// convert to vanilla python file
f.Overwrite(t, "# No longer a python notebook")
assertSync.objectType(ctx, "foo.py", "FILE")
assertSync.remoteDirContent(ctx, "", []string{"foo.py", ".gitkeep", ".gitignore"})
assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo.py"})
// delete the vanilla python file
f.Remove(t)
assertSync.remoteDirContent(ctx, "", []string{".gitkeep", ".gitignore"})
assertSync.snapshotContains([]string{".gitkeep", ".gitignore"})
}
func TestAccIncrementalSyncFileToPythonNotebook(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx)
// Run `bricks sync` in the background.
t.Setenv("BRICKS_ROOT", localRepoPath)
c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true")
c.RunBackground()
assertSync := assertSync{
t: t,
c: c,
w: wsc,
localRoot: localRepoPath,
remoteRoot: remoteRepoPath,
}
// create vanilla python file
localFilePath := filepath.Join(localRepoPath, "foo.py")
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
// assert file upload
assertSync.remoteDirContent(ctx, "", []string{"foo.py", ".gitkeep", ".gitignore"})
assertSync.objectType(ctx, "foo.py", "FILE")
assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo.py"})
// convert to notebook
f.Overwrite(t, "# Databricks notebook source")
assertSync.objectType(ctx, "foo", "NOTEBOOK")
assertSync.language(ctx, "foo", "PYTHON")
assertSync.remoteDirContent(ctx, "", []string{"foo", ".gitkeep", ".gitignore"})
assertSync.snapshotContains([]string{".gitkeep", ".gitignore", "foo.py"})
}
func TestAccIncrementalSyncPythonNotebookDelete(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx)
// create python notebook
localFilePath := filepath.Join(localRepoPath, "foo.py")
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
f.Overwrite(t, "# Databricks notebook source")
// Run `bricks sync` in the background.
t.Setenv("BRICKS_ROOT", localRepoPath)
c := NewCobraTestRunner(t, "sync", "--remote-path", remoteRepoPath, "--persist-snapshot=true")
c.RunBackground()
assertSync := assertSync{
t: t,
c: c,
w: wsc,
localRoot: localRepoPath,
remoteRoot: remoteRepoPath,
}
// notebook was uploaded properly
assertSync.remoteDirContent(ctx, "", []string{"foo", ".gitkeep", ".gitignore"})
assertSync.objectType(ctx, "foo", "NOTEBOOK")
assertSync.language(ctx, "foo", "PYTHON")
// Delete notebook
f.Remove(t)
assertSync.remoteDirContent(ctx, "", []string{".gitkeep", ".gitignore"})
}

64
libs/testfile/testfile.go Normal file
View File

@ -0,0 +1,64 @@
package testfile
import (
"os"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
// Use this struct to work with files in a github actions test environment
type TestFile struct {
mtime time.Time
fd *os.File
path string
// to make close idempotent
isOpen bool
}
func CreateFile(t *testing.T, path string) *TestFile {
f, err := os.Create(path)
assert.NoError(t, err)
fileInfo, err := os.Stat(path)
assert.NoError(t, err)
return &TestFile{
path: path,
fd: f,
mtime: fileInfo.ModTime(),
isOpen: true,
}
}
func (f *TestFile) Close(t *testing.T) {
if f.isOpen {
err := f.fd.Close()
assert.NoError(t, err)
f.isOpen = false
}
}
func (f *TestFile) Overwrite(t *testing.T, s string) {
err := os.Truncate(f.path, 0)
assert.NoError(t, err)
_, err = f.fd.Seek(0, 0)
assert.NoError(t, err)
_, err = f.fd.WriteString(s)
assert.NoError(t, err)
// We manually update mtime after write because github actions file
// system does not :')
err = os.Chtimes(f.path, f.mtime.Add(time.Minute), f.mtime.Add(time.Minute))
assert.NoError(t, err)
f.mtime = f.mtime.Add(time.Minute)
}
func (f *TestFile) Remove(t *testing.T) {
f.Close(t)
err := os.Remove(f.path)
assert.NoError(t, err)
}