databricks-cli/cmd/sync/snapshot_test.go

387 lines
12 KiB
Go

package sync
import (
"context"
"fmt"
"os"
"path/filepath"
"testing"
"time"
"github.com/databricks/bricks/git"
"github.com/databricks/bricks/project"
"github.com/stretchr/testify/assert"
)
type testFile struct {
mtime time.Time
fd *os.File
path string
// to make close idempotent
isOpen bool
}
func createFile(t *testing.T, path string) *testFile {
f, err := os.Create(path)
assert.NoError(t, err)
fileInfo, err := os.Stat(path)
assert.NoError(t, err)
return &testFile{
path: path,
fd: f,
mtime: fileInfo.ModTime(),
isOpen: true,
}
}
func (f *testFile) close(t *testing.T) {
if f.isOpen {
err := f.fd.Close()
assert.NoError(t, err)
f.isOpen = false
}
}
func (f *testFile) overwrite(t *testing.T, s string) {
err := os.Truncate(f.path, 0)
assert.NoError(t, err)
_, err = f.fd.Seek(0, 0)
assert.NoError(t, err)
_, err = f.fd.WriteString(s)
assert.NoError(t, err)
// We manually update mtime after write because github actions file
// system does not :')
err = os.Chtimes(f.path, f.mtime.Add(time.Minute), f.mtime.Add(time.Minute))
assert.NoError(t, err)
f.mtime = f.mtime.Add(time.Minute)
}
func (f *testFile) remove(t *testing.T) {
f.close(t)
err := os.Remove(f.path)
assert.NoError(t, err)
}
func assertKeysOfMap(t *testing.T, m map[string]time.Time, expectedKeys []string) {
keys := make([]string, len(m))
i := 0
for k := range m {
keys[i] = k
i++
}
assert.ElementsMatch(t, expectedKeys, keys)
}
func TestDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet := git.NewFileSet(projectDir)
state := Snapshot{
LastUpdatedTimes: make(map[string]time.Time),
LocalToRemoteNames: make(map[string]string),
RemoteToLocalNames: make(map[string]string),
}
f1 := createFile(t, filepath.Join(projectDir, "hello.txt"))
defer f1.close(t)
worldFilePath := filepath.Join(projectDir, "world.txt")
f2 := createFile(t, worldFilePath)
defer f2.close(t)
// New files are put
files, err := fileSet.All()
assert.NoError(t, err)
change, err := state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
assert.Len(t, change.put, 2)
assert.Contains(t, change.put, "hello.txt")
assert.Contains(t, change.put, "world.txt")
assertKeysOfMap(t, state.LastUpdatedTimes, []string{"hello.txt", "world.txt"})
assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.LocalToRemoteNames)
assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.RemoteToLocalNames)
// world.txt is editted
f2.overwrite(t, "bunnies are cute.")
assert.NoError(t, err)
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
assert.Len(t, change.put, 1)
assert.Contains(t, change.put, "world.txt")
assertKeysOfMap(t, state.LastUpdatedTimes, []string{"hello.txt", "world.txt"})
assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.LocalToRemoteNames)
assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.RemoteToLocalNames)
// hello.txt is deleted
f1.remove(t)
assert.NoError(t, err)
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 0)
assert.Contains(t, change.delete, "hello.txt")
assertKeysOfMap(t, state.LastUpdatedTimes, []string{"world.txt"})
assert.Equal(t, map[string]string{"world.txt": "world.txt"}, state.LocalToRemoteNames)
assert.Equal(t, map[string]string{"world.txt": "world.txt"}, state.RemoteToLocalNames)
}
func TestFolderDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet := git.NewFileSet(projectDir)
state := Snapshot{
LastUpdatedTimes: make(map[string]time.Time),
LocalToRemoteNames: make(map[string]string),
RemoteToLocalNames: make(map[string]string),
}
err := os.Mkdir(filepath.Join(projectDir, "foo"), os.ModePerm)
assert.NoError(t, err)
f1 := createFile(t, filepath.Join(projectDir, "foo", "bar.py"))
defer f1.close(t)
f1.overwrite(t, "# Databricks notebook source\nprint(\"abc\")")
files, err := fileSet.All()
assert.NoError(t, err)
change, err := state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
assert.Len(t, change.put, 1)
assert.Contains(t, change.put, "foo/bar.py")
f1.remove(t)
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 0)
assert.Contains(t, change.delete, "foo/bar")
}
func TestPythonNotebookDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet := git.NewFileSet(projectDir)
state := Snapshot{
LastUpdatedTimes: make(map[string]time.Time),
LocalToRemoteNames: make(map[string]string),
RemoteToLocalNames: make(map[string]string),
}
foo := createFile(t, filepath.Join(projectDir, "foo.py"))
defer foo.close(t)
// Case 1: notebook foo.py is uploaded
files, err := fileSet.All()
assert.NoError(t, err)
foo.overwrite(t, "# Databricks notebook source\nprint(\"abc\")")
change, err := state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
assert.Len(t, change.put, 1)
assert.Contains(t, change.put, "foo.py")
assertKeysOfMap(t, state.LastUpdatedTimes, []string{"foo.py"})
assert.Equal(t, map[string]string{"foo.py": "foo"}, state.LocalToRemoteNames)
assert.Equal(t, map[string]string{"foo": "foo.py"}, state.RemoteToLocalNames)
// Case 2: notebook foo.py is converted to python script by removing
// magic keyword
foo.overwrite(t, "print(\"abc\")")
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 1)
assert.Contains(t, change.put, "foo.py")
assert.Contains(t, change.delete, "foo")
assertKeysOfMap(t, state.LastUpdatedTimes, []string{"foo.py"})
assert.Equal(t, map[string]string{"foo.py": "foo.py"}, state.LocalToRemoteNames)
assert.Equal(t, map[string]string{"foo.py": "foo.py"}, state.RemoteToLocalNames)
// Case 3: Python script foo.py is converted to a databricks notebook
foo.overwrite(t, "# Databricks notebook source\nprint(\"def\")")
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 1)
assert.Contains(t, change.put, "foo.py")
assert.Contains(t, change.delete, "foo.py")
assertKeysOfMap(t, state.LastUpdatedTimes, []string{"foo.py"})
assert.Equal(t, map[string]string{"foo.py": "foo"}, state.LocalToRemoteNames)
assert.Equal(t, map[string]string{"foo": "foo.py"}, state.RemoteToLocalNames)
// Case 4: Python notebook foo.py is deleted, and its remote name is used in change.delete
foo.remove(t)
assert.NoError(t, err)
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 1)
assert.Len(t, change.put, 0)
assert.Contains(t, change.delete, "foo")
assert.Len(t, state.LastUpdatedTimes, 0)
assert.Equal(t, map[string]string{}, state.LocalToRemoteNames)
assert.Equal(t, map[string]string{}, state.RemoteToLocalNames)
}
func TestErrorWhenIdenticalRemoteName(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet := git.NewFileSet(projectDir)
state := Snapshot{
LastUpdatedTimes: make(map[string]time.Time),
LocalToRemoteNames: make(map[string]string),
RemoteToLocalNames: make(map[string]string),
}
// upload should work since they point to different destinations
pythonFoo := createFile(t, filepath.Join(projectDir, "foo.py"))
defer pythonFoo.close(t)
vanillaFoo := createFile(t, filepath.Join(projectDir, "foo"))
defer vanillaFoo.close(t)
files, err := fileSet.All()
assert.NoError(t, err)
change, err := state.diff(files)
assert.NoError(t, err)
assert.Len(t, change.delete, 0)
assert.Len(t, change.put, 2)
assert.Contains(t, change.put, "foo.py")
assert.Contains(t, change.put, "foo")
// errors out because they point to the same destination
pythonFoo.overwrite(t, "# Databricks notebook source\nprint(\"def\")")
files, err = fileSet.All()
assert.NoError(t, err)
change, err = state.diff(files)
assert.ErrorContains(t, err, "both foo and foo.py point to the same remote file location foo. Please remove one of them from your local project")
}
func TestNewSnapshotDefaults(t *testing.T) {
ctx := setupProject(t)
snapshot, err := newSnapshot(ctx, "/Repos/foo/bar")
prj := project.Get(ctx)
assert.NoError(t, err)
assert.Equal(t, LatestSnapshotVersion, snapshot.Version)
assert.Equal(t, "/Repos/foo/bar", snapshot.RemotePath)
assert.Equal(t, prj.WorkspacesClient().Config.Host, snapshot.Host)
assert.Empty(t, snapshot.LastUpdatedTimes)
assert.Empty(t, snapshot.RemoteToLocalNames)
assert.Empty(t, snapshot.LocalToRemoteNames)
}
func getEmptySnapshot() Snapshot {
return Snapshot{
LastUpdatedTimes: make(map[string]time.Time),
LocalToRemoteNames: make(map[string]string),
RemoteToLocalNames: make(map[string]string),
}
}
func setupProject(t *testing.T) context.Context {
projectDir := t.TempDir()
ctx := context.TODO()
t.Setenv("DATABRICKS_HOST", "www.foobar.com")
ctx, err := project.Initialize(ctx, projectDir, "development")
assert.NoError(t, err)
return ctx
}
func TestOldSnapshotInvalidation(t *testing.T) {
oldVersionSnapshot := `{
"version": "v0",
"host": "www.foobar.com",
"remote_path": "/Repos/foo/bar",
"last_modified_times": {},
"local_to_remote_names": {},
"remote_to_local_names": {}
}`
ctx := setupProject(t)
emptySnapshot := getEmptySnapshot()
snapshotPath, err := emptySnapshot.getPath(ctx)
assert.NoError(t, err)
snapshotFile := createFile(t, snapshotPath)
snapshotFile.overwrite(t, oldVersionSnapshot)
snapshotFile.close(t)
assert.FileExists(t, snapshotPath)
snapshot := emptySnapshot
err = snapshot.loadSnapshot(ctx)
assert.NoError(t, err)
// assert snapshot did not get loaded
assert.Equal(t, emptySnapshot, snapshot)
}
func TestNoVersionSnapshotInvalidation(t *testing.T) {
noVersionSnapshot := `{
"host": "www.foobar.com",
"remote_path": "/Repos/foo/bar",
"last_modified_times": {},
"local_to_remote_names": {},
"remote_to_local_names": {}
}`
ctx := setupProject(t)
emptySnapshot := getEmptySnapshot()
snapshotPath, err := emptySnapshot.getPath(ctx)
assert.NoError(t, err)
snapshotFile := createFile(t, snapshotPath)
snapshotFile.overwrite(t, noVersionSnapshot)
snapshotFile.close(t)
assert.FileExists(t, snapshotPath)
snapshot := emptySnapshot
err = snapshot.loadSnapshot(ctx)
assert.NoError(t, err)
// assert snapshot did not get loaded
assert.Equal(t, emptySnapshot, snapshot)
}
func TestLatestVersionSnapshotGetsLoaded(t *testing.T) {
latestVersionSnapshot := fmt.Sprintf(`{
"version": "%s",
"host": "www.foobar.com",
"remote_path": "/Repos/foo/bar",
"last_modified_times": {},
"local_to_remote_names": {},
"remote_to_local_names": {}
}`, LatestSnapshotVersion)
ctx := setupProject(t)
emptySnapshot := getEmptySnapshot()
snapshotPath, err := emptySnapshot.getPath(ctx)
assert.NoError(t, err)
snapshotFile := createFile(t, snapshotPath)
snapshotFile.overwrite(t, latestVersionSnapshot)
snapshotFile.close(t)
assert.FileExists(t, snapshotPath)
snapshot := emptySnapshot
err = snapshot.loadSnapshot(ctx)
assert.NoError(t, err)
// assert snapshot gets loaded
assert.NotEqual(t, emptySnapshot, snapshot)
assert.Equal(t, LatestSnapshotVersion, snapshot.Version)
assert.Equal(t, "www.foobar.com", snapshot.Host)
assert.Equal(t, "/Repos/foo/bar", snapshot.RemotePath)
}