From 40ae23bb33cb8babb590f9bf21fd1d357b54d902 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 3 Oct 2023 15:47:46 +0200 Subject: [PATCH] Refactor change computation for sync (#785) ## Changes This PR pays some tech debt by refactoring sync diff computation into interfaces that are more robust. Specifically: 1. Refactor the single diff computation function into a `SnapshotState` class that computes the target state only based on the current local files making it more robust and not carrying over state from previous iterations. 2. Adds new validations for the sync state which make sure that the invariants that downstream code expects are actually held true. This prevents a class of issues where these invariants break and the synchroniser behaves unexpectedly. Note, this does not change the existing schema for the snapshot, only the way the diff is computed, and thus is backwards compatible (ie does not require a schema version bump). ## Tests --- internal/sync_test.go | 6 +- libs/sync/diff.go | 69 +++++++++ libs/sync/diff_test.go | 113 ++++++++++++++ libs/sync/snapshot.go | 144 +++--------------- libs/sync/snapshot_state.go | 114 ++++++++++++++ libs/sync/snapshot_state_test.go | 116 ++++++++++++++ libs/sync/snapshot_test.go | 66 ++++---- .../testdata/sync-fileset/invalid-nb.ipynb | 0 libs/sync/testdata/sync-fileset/my-nb.py | 2 + libs/sync/testdata/sync-fileset/my-script.py | 1 + .../sync/testdata/sync-fileset/valid-nb.ipynb | 21 +++ 11 files changed, 500 insertions(+), 152 deletions(-) create mode 100644 libs/sync/snapshot_state.go create mode 100644 libs/sync/snapshot_state_test.go create mode 100644 libs/sync/testdata/sync-fileset/invalid-nb.ipynb create mode 100644 libs/sync/testdata/sync-fileset/my-nb.py create mode 100644 libs/sync/testdata/sync-fileset/my-script.py create mode 100644 libs/sync/testdata/sync-fileset/valid-nb.ipynb diff --git a/internal/sync_test.go b/internal/sync_test.go index 9eb1ac1b..f970a7ce 100644 --- a/internal/sync_test.go +++ b/internal/sync_test.go @@ -222,10 +222,10 @@ func (a *syncTest) snapshotContains(files []string) { assert.Equal(a.t, s.Host, a.w.Config.Host) assert.Equal(a.t, s.RemotePath, a.remoteRoot) for _, filePath := range files { - _, ok := s.LastUpdatedTimes[filePath] - assert.True(a.t, ok, fmt.Sprintf("%s not in snapshot file: %v", filePath, s.LastUpdatedTimes)) + _, ok := s.LastModifiedTimes[filePath] + assert.True(a.t, ok, fmt.Sprintf("%s not in snapshot file: %v", filePath, s.LastModifiedTimes)) } - assert.Equal(a.t, len(files), len(s.LastUpdatedTimes)) + assert.Equal(a.t, len(files), len(s.LastModifiedTimes)) } func TestAccSyncFullFileSync(t *testing.T) { diff --git a/libs/sync/diff.go b/libs/sync/diff.go index 26e99b34..074bfc56 100644 --- a/libs/sync/diff.go +++ b/libs/sync/diff.go @@ -2,8 +2,12 @@ package sync import ( "path" + "path/filepath" + + "golang.org/x/exp/maps" ) +// List of operations to apply to synchronize local file systems changes to WSFS. type diff struct { delete []string rmdir []string @@ -15,6 +19,71 @@ func (d diff) IsEmpty() bool { return len(d.put) == 0 && len(d.delete) == 0 } +// Compute operations required to make files in WSFS reflect current local files. +// Takes into account changes since the last sync iteration. +func computeDiff(after *SnapshotState, before *SnapshotState) diff { + d := &diff{ + delete: make([]string, 0), + rmdir: make([]string, 0), + mkdir: make([]string, 0), + put: make([]string, 0), + } + d.addRemovedFiles(after, before) + d.addFilesWithRemoteNameChanged(after, before) + d.addNewFiles(after, before) + d.addUpdatedFiles(after, before) + return *d +} + +// Add operators for tracked files that no longer exist. +func (d *diff) addRemovedFiles(after *SnapshotState, before *SnapshotState) { + for localName, remoteName := range before.LocalToRemoteNames { + if _, ok := after.LocalToRemoteNames[localName]; !ok { + d.delete = append(d.delete, remoteName) + } + } + + // Remove directories that would no longer contain any files. + beforeDirs := MakeDirSet(maps.Keys(before.LocalToRemoteNames)) + afterDirs := MakeDirSet(maps.Keys(after.LocalToRemoteNames)) + d.rmdir = beforeDirs.Remove(afterDirs).Slice() +} + +// Cleanup previous remote files for files that had their remote targets change. For +// example this is possible if you convert a normal python script to a notebook. +func (d *diff) addFilesWithRemoteNameChanged(after *SnapshotState, before *SnapshotState) { + for localName, beforeRemoteName := range before.LocalToRemoteNames { + afterRemoteName, ok := after.LocalToRemoteNames[localName] + if ok && afterRemoteName != beforeRemoteName { + d.delete = append(d.delete, beforeRemoteName) + } + } +} + +// Add operators for files that were not being tracked before. +func (d *diff) addNewFiles(after *SnapshotState, before *SnapshotState) { + for localName := range after.LastModifiedTimes { + if _, ok := before.LastModifiedTimes[localName]; !ok { + d.put = append(d.put, filepath.ToSlash(localName)) + } + } + + // Add directories required for these new files. + beforeDirs := MakeDirSet(maps.Keys(before.LocalToRemoteNames)) + afterDirs := MakeDirSet(maps.Keys(after.LocalToRemoteNames)) + d.mkdir = afterDirs.Remove(beforeDirs).Slice() +} + +// Add operators for files which had their contents updated. +func (d *diff) addUpdatedFiles(after *SnapshotState, before *SnapshotState) { + for localName, modTime := range after.LastModifiedTimes { + prevModTime, ok := before.LastModifiedTimes[localName] + if ok && modTime.After(prevModTime) { + d.put = append(d.put, filepath.ToSlash(localName)) + } + } +} + // groupedMkdir returns a slice of slices of paths to create. // Because the underlying mkdir calls create intermediate directories, // we can group them together to reduce the total number of calls. diff --git a/libs/sync/diff_test.go b/libs/sync/diff_test.go index ff448872..94b6cc37 100644 --- a/libs/sync/diff_test.go +++ b/libs/sync/diff_test.go @@ -2,6 +2,7 @@ package sync import ( "testing" + "time" "github.com/stretchr/testify/assert" ) @@ -71,3 +72,115 @@ func TestDiffGroupedRmdirWithLeafsOnly(t *testing.T) { assert.Len(t, out, 1) assert.ElementsMatch(t, d.rmdir, out[0]) } + +func TestDiffComputationForRemovedFiles(t *testing.T) { + before := &SnapshotState{ + LocalToRemoteNames: map[string]string{ + "foo/a/b/c.py": "foo/a/b/c", + }, + RemoteToLocalNames: map[string]string{ + "foo/a/b/c": "foo/a/b/c.py", + }, + } + after := &SnapshotState{} + + expected := diff{ + delete: []string{"foo/a/b/c"}, + rmdir: []string{"foo", "foo/a", "foo/a/b"}, + mkdir: []string{}, + put: []string{}, + } + assert.Equal(t, expected, computeDiff(after, before)) +} + +func TestDiffComputationWhenRemoteNameIsChanged(t *testing.T) { + tick := time.Now() + before := &SnapshotState{ + LocalToRemoteNames: map[string]string{ + "foo/a/b/c.py": "foo/a/b/c", + }, + RemoteToLocalNames: map[string]string{ + "foo/a/b/c": "foo/a/b/c.py", + }, + LastModifiedTimes: map[string]time.Time{ + "foo/a/b/c.py": tick, + }, + } + tick = tick.Add(time.Second) + after := &SnapshotState{ + LocalToRemoteNames: map[string]string{ + "foo/a/b/c.py": "foo/a/b/c.py", + }, + RemoteToLocalNames: map[string]string{ + "foo/a/b/c.py": "foo/a/b/c.py", + }, + LastModifiedTimes: map[string]time.Time{ + "foo/a/b/c.py": tick, + }, + } + + expected := diff{ + delete: []string{"foo/a/b/c"}, + rmdir: []string{}, + mkdir: []string{}, + put: []string{"foo/a/b/c.py"}, + } + assert.Equal(t, expected, computeDiff(after, before)) +} + +func TestDiffComputationForNewFiles(t *testing.T) { + after := &SnapshotState{ + LocalToRemoteNames: map[string]string{ + "foo/a/b/c.py": "foo/a/b/c", + }, + RemoteToLocalNames: map[string]string{ + "foo/a/b/c": "foo/a/b/c.py", + }, + LastModifiedTimes: map[string]time.Time{ + "foo/a/b/c.py": time.Now(), + }, + } + + expected := diff{ + delete: []string{}, + rmdir: []string{}, + mkdir: []string{"foo", "foo/a", "foo/a/b"}, + put: []string{"foo/a/b/c.py"}, + } + assert.Equal(t, expected, computeDiff(after, &SnapshotState{})) +} + +func TestDiffComputationForUpdatedFiles(t *testing.T) { + tick := time.Now() + before := &SnapshotState{ + LocalToRemoteNames: map[string]string{ + "foo/a/b/c": "foo/a/b/c", + }, + RemoteToLocalNames: map[string]string{ + "foo/a/b/c": "foo/a/b/c", + }, + LastModifiedTimes: map[string]time.Time{ + "foo/a/b/c": tick, + }, + } + tick = tick.Add(time.Second) + after := &SnapshotState{ + LocalToRemoteNames: map[string]string{ + "foo/a/b/c": "foo/a/b/c", + }, + RemoteToLocalNames: map[string]string{ + "foo/a/b/c": "foo/a/b/c", + }, + LastModifiedTimes: map[string]time.Time{ + "foo/a/b/c": tick, + }, + } + + expected := diff{ + delete: []string{}, + rmdir: []string{}, + mkdir: []string{}, + put: []string{"foo/a/b/c"}, + } + assert.Equal(t, expected, computeDiff(after, before)) +} diff --git a/libs/sync/snapshot.go b/libs/sync/snapshot.go index 1680f046..7e2130e9 100644 --- a/libs/sync/snapshot.go +++ b/libs/sync/snapshot.go @@ -6,7 +6,6 @@ import ( "fmt" "os" "path/filepath" - "strings" "time" "crypto/md5" @@ -14,8 +13,6 @@ import ( "github.com/databricks/cli/libs/fileset" "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/notebook" - "golang.org/x/exp/maps" ) // Bump it up every time a potentially breaking change is made to the snapshot schema @@ -51,19 +48,7 @@ type Snapshot struct { // Path in workspace for project repo RemotePath string `json:"remote_path"` - // Map of all files present in the remote repo with the: - // key: relative file path from project root - // value: last time the remote instance of this file was updated - LastUpdatedTimes map[string]time.Time `json:"last_modified_times"` - - // This map maps local file names to their remote names - // eg. notebook named "foo.py" locally would be stored as "foo", thus this - // map will contain an entry "foo.py" -> "foo" - LocalToRemoteNames map[string]string `json:"local_to_remote_names"` - - // Inverse of localToRemoteNames. Together the form a bijective mapping (ie - // there is a 1:1 unique mapping between local and remote name) - RemoteToLocalNames map[string]string `json:"remote_to_local_names"` + *SnapshotState } const syncSnapshotDirName = "sync-snapshots" @@ -99,12 +84,14 @@ func newSnapshot(ctx context.Context, opts *SyncOptions) (*Snapshot, error) { SnapshotPath: path, New: true, - Version: LatestSnapshotVersion, - Host: opts.Host, - RemotePath: opts.RemotePath, - LastUpdatedTimes: make(map[string]time.Time), - LocalToRemoteNames: make(map[string]string), - RemoteToLocalNames: make(map[string]string), + Version: LatestSnapshotVersion, + Host: opts.Host, + RemotePath: opts.RemotePath, + SnapshotState: &SnapshotState{ + LastModifiedTimes: make(map[string]time.Time), + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: make(map[string]string), + }, }, nil } @@ -173,109 +160,22 @@ func loadOrNewSnapshot(ctx context.Context, opts *SyncOptions) (*Snapshot, error return snapshot, nil } -func (s *Snapshot) diff(ctx context.Context, all []fileset.File) (change diff, err error) { - lastModifiedTimes := s.LastUpdatedTimes - remoteToLocalNames := s.RemoteToLocalNames - localToRemoteNames := s.LocalToRemoteNames - - // set of files currently present in the local file system and tracked by git - localFileSet := map[string]struct{}{} - for _, f := range all { - localFileSet[f.Relative] = struct{}{} +func (s *Snapshot) diff(ctx context.Context, all []fileset.File) (diff, error) { + targetState, err := NewSnapshotState(all) + if err != nil { + return diff{}, fmt.Errorf("error while computing new sync state: %w", err) } - // Capture both previous and current set of files. - previousFiles := maps.Keys(lastModifiedTimes) - currentFiles := maps.Keys(localFileSet) - - // Build directory sets to figure out which directories to create and which to remove. - previousDirectories := MakeDirSet(previousFiles) - currentDirectories := MakeDirSet(currentFiles) - - // Create new directories; remove stale directories. - change.mkdir = currentDirectories.Remove(previousDirectories).Slice() - change.rmdir = previousDirectories.Remove(currentDirectories).Slice() - - for _, f := range all { - // get current modified timestamp - modified := f.Modified() - lastSeenModified, seen := lastModifiedTimes[f.Relative] - - if !seen || modified.After(lastSeenModified) { - lastModifiedTimes[f.Relative] = modified - - // get file metadata about whether it's a notebook - isNotebook, _, err := notebook.Detect(f.Absolute) - if err != nil { - // Ignore this file if we're unable to determine the notebook type. - // Trying to upload such a file to the workspace would fail anyway. - log.Warnf(ctx, err.Error()) - continue - } - - // change separators to '/' for file paths in remote store - unixFileName := filepath.ToSlash(f.Relative) - - // put file in databricks workspace - change.put = append(change.put, unixFileName) - - // Strip extension for notebooks. - remoteName := unixFileName - if isNotebook { - ext := filepath.Ext(remoteName) - remoteName = strings.TrimSuffix(remoteName, ext) - } - - // If the remote handle of a file changes, we want to delete the old - // remote version of that file to avoid duplicates. - // This can happen if a python notebook is converted to a python - // script or vice versa - oldRemoteName, ok := localToRemoteNames[f.Relative] - if ok && oldRemoteName != remoteName { - change.delete = append(change.delete, oldRemoteName) - delete(remoteToLocalNames, oldRemoteName) - } - - // We cannot allow two local files in the project to point to the same - // remote path - prevLocalName, ok := remoteToLocalNames[remoteName] - _, prevLocalFileExists := localFileSet[prevLocalName] - if ok && prevLocalName != f.Relative && prevLocalFileExists { - return change, fmt.Errorf("both %s and %s point to the same remote file location %s. Please remove one of them from your local project", prevLocalName, f.Relative, remoteName) - } - localToRemoteNames[f.Relative] = remoteName - remoteToLocalNames[remoteName] = f.Relative - } - } - // figure out files in the snapshot.lastModifiedTimes, but not on local - // filesystem. These will be deleted - for localName := range lastModifiedTimes { - _, exists := localFileSet[localName] - if exists { - continue - } - - // TODO: https://databricks.atlassian.net/browse/DECO-429 - // Add error wrapper giving instructions like this for all errors here :) - remoteName, ok := localToRemoteNames[localName] - if !ok { - return change, fmt.Errorf("missing remote path for local path: %s. Please try syncing again after deleting .databricks/sync-snapshots dir from your project root", localName) - } - - // add them to a delete batch - change.delete = append(change.delete, remoteName) + currentState := s.SnapshotState + if err := currentState.validate(); err != nil { + return diff{}, fmt.Errorf("error parsing existing sync state: %w", err) } - // and remove them from the snapshot - for _, remoteName := range change.delete { - // we do note assert that remoteName exists in remoteToLocalNames since it - // will be missing for files with remote name changed - localName := remoteToLocalNames[remoteName] + // Compute diff to apply to get from current state to new target state. + diff := computeDiff(targetState, currentState) - delete(lastModifiedTimes, localName) - delete(remoteToLocalNames, remoteName) - delete(localToRemoteNames, localName) - } - - return + // Update state to new value. This is not persisted to the file system before + // the diff is applied successfully. + s.SnapshotState = targetState + return diff, nil } diff --git a/libs/sync/snapshot_state.go b/libs/sync/snapshot_state.go new file mode 100644 index 00000000..57506352 --- /dev/null +++ b/libs/sync/snapshot_state.go @@ -0,0 +1,114 @@ +package sync + +import ( + "fmt" + "path/filepath" + "strings" + "time" + + "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/notebook" +) + +// SnapshotState keeps track of files on the local filesystem and their corresponding +// entries in WSFS. +type SnapshotState struct { + // Map of local file names to their last recorded modified time. Files found + // to have a newer mtime have their content synced to their remote version. + LastModifiedTimes map[string]time.Time `json:"last_modified_times"` + + // Map of local file names to corresponding remote names. + // For example: A notebook named "foo.py" locally would be stored as "foo" + // in WSFS, and the entry would be: {"foo.py": "foo"} + LocalToRemoteNames map[string]string `json:"local_to_remote_names"` + + // Inverse of LocalToRemoteNames. Together they form a 1:1 mapping where all + // the remote names and local names are unique. + RemoteToLocalNames map[string]string `json:"remote_to_local_names"` +} + +// Convert an array of files on the local file system to a SnapshotState representation. +func NewSnapshotState(localFiles []fileset.File) (*SnapshotState, error) { + fs := &SnapshotState{ + LastModifiedTimes: make(map[string]time.Time), + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: make(map[string]string), + } + + // Expect no files to have a duplicate entry in the input array. + seen := make(map[string]fileset.File) + for _, f := range localFiles { + if _, ok := seen[f.Relative]; !ok { + seen[f.Relative] = f + } else { + return nil, fmt.Errorf("expected only one entry per file. Found duplicate entries for file: %s", f.Relative) + } + } + + // Compute the new state. + for _, f := range localFiles { + // Compute the remote name the file will have in WSFS + remoteName := filepath.ToSlash(f.Relative) + isNotebook, _, err := notebook.Detect(f.Absolute) + if err != nil { + // Ignore this file if we're unable to determine the notebook type. + // Trying to upload such a file to the workspace would fail anyway. + continue + } + if isNotebook { + ext := filepath.Ext(remoteName) + remoteName = strings.TrimSuffix(remoteName, ext) + } + + // Add the file to snapshot state + fs.LastModifiedTimes[f.Relative] = f.Modified() + if existingLocalName, ok := fs.RemoteToLocalNames[remoteName]; ok { + return nil, fmt.Errorf("both %s and %s point to the same remote file location %s. Please remove one of them from your local project", existingLocalName, f.Relative, remoteName) + } + + fs.LocalToRemoteNames[f.Relative] = remoteName + fs.RemoteToLocalNames[remoteName] = f.Relative + } + return fs, nil +} + +// Consistency checks for the sync files state representation. These are invariants +// that downstream code for computing changes to apply to WSFS depends on. +// +// Invariants: +// 1. All entries in LastModifiedTimes have a corresponding entry in LocalToRemoteNames +// and vice versa. +// 2. LocalToRemoteNames and RemoteToLocalNames together form a 1:1 mapping of +// local <-> remote file names. +func (fs *SnapshotState) validate() error { + // Validate invariant (1) + for localName := range fs.LastModifiedTimes { + if _, ok := fs.LocalToRemoteNames[localName]; !ok { + return fmt.Errorf("invalid sync state representation. Local file %s is missing the corresponding remote file", localName) + } + } + for localName := range fs.LocalToRemoteNames { + if _, ok := fs.LastModifiedTimes[localName]; !ok { + return fmt.Errorf("invalid sync state representation. Local file %s is missing it's last modified time", localName) + } + } + + // Validate invariant (2) + for localName, remoteName := range fs.LocalToRemoteNames { + if _, ok := fs.RemoteToLocalNames[remoteName]; !ok { + return fmt.Errorf("invalid sync state representation. Remote file %s is missing the corresponding local file", remoteName) + } + if fs.RemoteToLocalNames[remoteName] != localName { + return fmt.Errorf("invalid sync state representation. Inconsistent values found. Local file %s points to %s. Remote file %s points to %s", localName, remoteName, remoteName, fs.RemoteToLocalNames[remoteName]) + } + } + for remoteName, localName := range fs.RemoteToLocalNames { + if _, ok := fs.LocalToRemoteNames[localName]; !ok { + return fmt.Errorf("invalid sync state representation. local file %s is missing the corresponding remote file", localName) + } + if fs.LocalToRemoteNames[localName] != remoteName { + return fmt.Errorf("invalid sync state representation. Inconsistent values found. Remote file %s points to %s. Local file %s points to %s", remoteName, localName, localName, fs.LocalToRemoteNames[localName]) + } + } + return nil +} diff --git a/libs/sync/snapshot_state_test.go b/libs/sync/snapshot_state_test.go new file mode 100644 index 00000000..bfcdbef6 --- /dev/null +++ b/libs/sync/snapshot_state_test.go @@ -0,0 +1,116 @@ +package sync + +import ( + "testing" + "time" + + "github.com/databricks/cli/libs/fileset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSnapshotState(t *testing.T) { + fileSet := fileset.New("./testdata/sync-fileset") + files, err := fileSet.All() + require.NoError(t, err) + + // Assert initial contents of the fileset + assert.Len(t, files, 4) + assert.Equal(t, "invalid-nb.ipynb", files[0].Name()) + assert.Equal(t, "my-nb.py", files[1].Name()) + assert.Equal(t, "my-script.py", files[2].Name()) + assert.Equal(t, "valid-nb.ipynb", files[3].Name()) + + // Assert snapshot state generated from the fileset. Note that the invalid notebook + // has been ignored. + s, err := NewSnapshotState(files) + require.NoError(t, err) + assertKeysOfMap(t, s.LastModifiedTimes, []string{"valid-nb.ipynb", "my-nb.py", "my-script.py"}) + assertKeysOfMap(t, s.LocalToRemoteNames, []string{"valid-nb.ipynb", "my-nb.py", "my-script.py"}) + assertKeysOfMap(t, s.RemoteToLocalNames, []string{"valid-nb", "my-nb", "my-script.py"}) + assert.NoError(t, s.validate()) +} + +func TestSnapshotStateValidationErrors(t *testing.T) { + s := &SnapshotState{ + LastModifiedTimes: map[string]time.Time{ + "a": time.Now(), + }, + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: make(map[string]string), + } + assert.EqualError(t, s.validate(), "invalid sync state representation. Local file a is missing the corresponding remote file") + + s = &SnapshotState{ + LastModifiedTimes: map[string]time.Time{}, + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: map[string]string{ + "a": "b", + }, + } + assert.EqualError(t, s.validate(), "invalid sync state representation. local file b is missing the corresponding remote file") + + s = &SnapshotState{ + LastModifiedTimes: map[string]time.Time{ + "a": time.Now(), + }, + LocalToRemoteNames: map[string]string{ + "a": "b", + }, + RemoteToLocalNames: make(map[string]string), + } + assert.EqualError(t, s.validate(), "invalid sync state representation. Remote file b is missing the corresponding local file") + + s = &SnapshotState{ + LastModifiedTimes: make(map[string]time.Time), + LocalToRemoteNames: map[string]string{ + "a": "b", + }, + RemoteToLocalNames: map[string]string{ + "b": "a", + }, + } + assert.EqualError(t, s.validate(), "invalid sync state representation. Local file a is missing it's last modified time") + + s = &SnapshotState{ + LastModifiedTimes: map[string]time.Time{ + "a": time.Now(), + }, + LocalToRemoteNames: map[string]string{ + "a": "b", + }, + RemoteToLocalNames: map[string]string{ + "b": "b", + }, + } + assert.EqualError(t, s.validate(), "invalid sync state representation. Inconsistent values found. Local file a points to b. Remote file b points to b") + + s = &SnapshotState{ + LastModifiedTimes: map[string]time.Time{ + "a": time.Now(), + "c": time.Now(), + }, + LocalToRemoteNames: map[string]string{ + "a": "b", + "c": "b", + }, + RemoteToLocalNames: map[string]string{ + "b": "a", + }, + } + assert.EqualError(t, s.validate(), "invalid sync state representation. Inconsistent values found. Local file c points to b. Remote file b points to a") + + s = &SnapshotState{ + LastModifiedTimes: map[string]time.Time{ + "a": time.Now(), + }, + LocalToRemoteNames: map[string]string{ + "a": "b", + }, + RemoteToLocalNames: map[string]string{ + "b": "a", + "c": "a", + }, + } + assert.EqualError(t, s.validate(), "invalid sync state representation. Inconsistent values found. Remote file c points to a. Local file a points to b") +} diff --git a/libs/sync/snapshot_test.go b/libs/sync/snapshot_test.go index c2e8f6b8..d6358d4a 100644 --- a/libs/sync/snapshot_test.go +++ b/libs/sync/snapshot_test.go @@ -14,7 +14,7 @@ import ( "github.com/stretchr/testify/require" ) -func assertKeysOfMap(t *testing.T, m map[string]time.Time, expectedKeys []string) { +func assertKeysOfMap[T any](t *testing.T, m map[string]T, expectedKeys []string) { keys := make([]string, len(m)) i := 0 for k := range m { @@ -32,9 +32,11 @@ func TestDiff(t *testing.T) { fileSet, err := git.NewFileSet(projectDir) require.NoError(t, err) state := Snapshot{ - LastUpdatedTimes: make(map[string]time.Time), - LocalToRemoteNames: make(map[string]string), - RemoteToLocalNames: make(map[string]string), + SnapshotState: &SnapshotState{ + LastModifiedTimes: make(map[string]time.Time), + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: make(map[string]string), + }, } f1 := testfile.CreateFile(t, filepath.Join(projectDir, "hello.txt")) @@ -52,7 +54,7 @@ func TestDiff(t *testing.T) { assert.Len(t, change.put, 2) assert.Contains(t, change.put, "hello.txt") assert.Contains(t, change.put, "world.txt") - assertKeysOfMap(t, state.LastUpdatedTimes, []string{"hello.txt", "world.txt"}) + assertKeysOfMap(t, state.LastModifiedTimes, []string{"hello.txt", "world.txt"}) assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.LocalToRemoteNames) assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.RemoteToLocalNames) @@ -67,7 +69,7 @@ func TestDiff(t *testing.T) { assert.Len(t, change.delete, 0) assert.Len(t, change.put, 1) assert.Contains(t, change.put, "world.txt") - assertKeysOfMap(t, state.LastUpdatedTimes, []string{"hello.txt", "world.txt"}) + assertKeysOfMap(t, state.LastModifiedTimes, []string{"hello.txt", "world.txt"}) assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.LocalToRemoteNames) assert.Equal(t, map[string]string{"hello.txt": "hello.txt", "world.txt": "world.txt"}, state.RemoteToLocalNames) @@ -81,7 +83,7 @@ func TestDiff(t *testing.T) { assert.Len(t, change.delete, 1) assert.Len(t, change.put, 0) assert.Contains(t, change.delete, "hello.txt") - assertKeysOfMap(t, state.LastUpdatedTimes, []string{"world.txt"}) + assertKeysOfMap(t, state.LastModifiedTimes, []string{"world.txt"}) assert.Equal(t, map[string]string{"world.txt": "world.txt"}, state.LocalToRemoteNames) assert.Equal(t, map[string]string{"world.txt": "world.txt"}, state.RemoteToLocalNames) } @@ -94,9 +96,11 @@ func TestSymlinkDiff(t *testing.T) { fileSet, err := git.NewFileSet(projectDir) require.NoError(t, err) state := Snapshot{ - LastUpdatedTimes: make(map[string]time.Time), - LocalToRemoteNames: make(map[string]string), - RemoteToLocalNames: make(map[string]string), + SnapshotState: &SnapshotState{ + LastModifiedTimes: make(map[string]time.Time), + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: make(map[string]string), + }, } err = os.Mkdir(filepath.Join(projectDir, "foo"), os.ModePerm) @@ -123,9 +127,11 @@ func TestFolderDiff(t *testing.T) { fileSet, err := git.NewFileSet(projectDir) require.NoError(t, err) state := Snapshot{ - LastUpdatedTimes: make(map[string]time.Time), - LocalToRemoteNames: make(map[string]string), - RemoteToLocalNames: make(map[string]string), + SnapshotState: &SnapshotState{ + LastModifiedTimes: make(map[string]time.Time), + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: make(map[string]string), + }, } err = os.Mkdir(filepath.Join(projectDir, "foo"), os.ModePerm) @@ -166,9 +172,11 @@ func TestPythonNotebookDiff(t *testing.T) { fileSet, err := git.NewFileSet(projectDir) require.NoError(t, err) state := Snapshot{ - LastUpdatedTimes: make(map[string]time.Time), - LocalToRemoteNames: make(map[string]string), - RemoteToLocalNames: make(map[string]string), + SnapshotState: &SnapshotState{ + LastModifiedTimes: make(map[string]time.Time), + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: make(map[string]string), + }, } foo := testfile.CreateFile(t, filepath.Join(projectDir, "foo.py")) @@ -183,7 +191,7 @@ func TestPythonNotebookDiff(t *testing.T) { assert.Len(t, change.delete, 0) assert.Len(t, change.put, 1) assert.Contains(t, change.put, "foo.py") - assertKeysOfMap(t, state.LastUpdatedTimes, []string{"foo.py"}) + assertKeysOfMap(t, state.LastModifiedTimes, []string{"foo.py"}) assert.Equal(t, map[string]string{"foo.py": "foo"}, state.LocalToRemoteNames) assert.Equal(t, map[string]string{"foo": "foo.py"}, state.RemoteToLocalNames) @@ -198,7 +206,7 @@ func TestPythonNotebookDiff(t *testing.T) { assert.Len(t, change.put, 1) assert.Contains(t, change.put, "foo.py") assert.Contains(t, change.delete, "foo") - assertKeysOfMap(t, state.LastUpdatedTimes, []string{"foo.py"}) + assertKeysOfMap(t, state.LastModifiedTimes, []string{"foo.py"}) assert.Equal(t, map[string]string{"foo.py": "foo.py"}, state.LocalToRemoteNames) assert.Equal(t, map[string]string{"foo.py": "foo.py"}, state.RemoteToLocalNames) @@ -212,7 +220,7 @@ func TestPythonNotebookDiff(t *testing.T) { assert.Len(t, change.put, 1) assert.Contains(t, change.put, "foo.py") assert.Contains(t, change.delete, "foo.py") - assertKeysOfMap(t, state.LastUpdatedTimes, []string{"foo.py"}) + assertKeysOfMap(t, state.LastModifiedTimes, []string{"foo.py"}) assert.Equal(t, map[string]string{"foo.py": "foo"}, state.LocalToRemoteNames) assert.Equal(t, map[string]string{"foo": "foo.py"}, state.RemoteToLocalNames) @@ -226,7 +234,7 @@ func TestPythonNotebookDiff(t *testing.T) { assert.Len(t, change.delete, 1) assert.Len(t, change.put, 0) assert.Contains(t, change.delete, "foo") - assert.Len(t, state.LastUpdatedTimes, 0) + assert.Len(t, state.LastModifiedTimes, 0) assert.Equal(t, map[string]string{}, state.LocalToRemoteNames) assert.Equal(t, map[string]string{}, state.RemoteToLocalNames) } @@ -239,9 +247,11 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { fileSet, err := git.NewFileSet(projectDir) require.NoError(t, err) state := Snapshot{ - LastUpdatedTimes: make(map[string]time.Time), - LocalToRemoteNames: make(map[string]string), - RemoteToLocalNames: make(map[string]string), + SnapshotState: &SnapshotState{ + LastModifiedTimes: make(map[string]time.Time), + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: make(map[string]string), + }, } // upload should work since they point to different destinations @@ -274,9 +284,11 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { fileSet, err := git.NewFileSet(projectDir) require.NoError(t, err) state := Snapshot{ - LastUpdatedTimes: make(map[string]time.Time), - LocalToRemoteNames: make(map[string]string), - RemoteToLocalNames: make(map[string]string), + SnapshotState: &SnapshotState{ + LastModifiedTimes: make(map[string]time.Time), + LocalToRemoteNames: make(map[string]string), + RemoteToLocalNames: make(map[string]string), + }, } // upload should work since they point to different destinations @@ -321,7 +333,7 @@ func TestNewSnapshotDefaults(t *testing.T) { assert.Equal(t, LatestSnapshotVersion, snapshot.Version) assert.Equal(t, opts.RemotePath, snapshot.RemotePath) assert.Equal(t, opts.Host, snapshot.Host) - assert.Empty(t, snapshot.LastUpdatedTimes) + assert.Empty(t, snapshot.LastModifiedTimes) assert.Empty(t, snapshot.RemoteToLocalNames) assert.Empty(t, snapshot.LocalToRemoteNames) } diff --git a/libs/sync/testdata/sync-fileset/invalid-nb.ipynb b/libs/sync/testdata/sync-fileset/invalid-nb.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/libs/sync/testdata/sync-fileset/my-nb.py b/libs/sync/testdata/sync-fileset/my-nb.py new file mode 100644 index 00000000..7bdf929b --- /dev/null +++ b/libs/sync/testdata/sync-fileset/my-nb.py @@ -0,0 +1,2 @@ +# Databricks notebook source +print("2") diff --git a/libs/sync/testdata/sync-fileset/my-script.py b/libs/sync/testdata/sync-fileset/my-script.py new file mode 100644 index 00000000..d2297561 --- /dev/null +++ b/libs/sync/testdata/sync-fileset/my-script.py @@ -0,0 +1 @@ +print("1") diff --git a/libs/sync/testdata/sync-fileset/valid-nb.ipynb b/libs/sync/testdata/sync-fileset/valid-nb.ipynb new file mode 100644 index 00000000..f36e3589 --- /dev/null +++ b/libs/sync/testdata/sync-fileset/valid-nb.ipynb @@ -0,0 +1,21 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"3\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}