From e3e9bc6def4e55603a316d0f155664d6bacdb11d Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 18 Aug 2023 10:07:25 +0200 Subject: [PATCH] Added support for sync.include and sync.exclude sections (#671) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Changes Added support for `sync.include` and `sync.exclude` sections ## Tests Added `sample-java` folder to gitignore ``` bundle: name: wheel-task sync: include: - "./sample-java/*.kts" ``` Kotlin files were correctly synced. ``` [DEBUG] Test execution command: /opt/homebrew/opt/go@1.21/bin/go test ./... -json -timeout 1h -coverpkg=./... -coverprofile=coverage.txt -run ^TestAcc [DEBUG] Test execution directory: /Users/andrew.nester/cli 2023/08/17 17:12:10 [INFO] ✅ TestAccAlertsCreateErrWhenNoArguments (2.320s) 2023/08/17 17:12:10 [INFO] ✅ TestAccApiGet (0.650s) 2023/08/17 17:12:12 [INFO] ✅ TestAccClustersList (1.060s) 2023/08/17 17:12:12 [INFO] ✅ TestAccClustersGet (0.760s) 2023/08/17 17:12:26 [INFO] ✅ TestAccFilerWorkspaceFilesReadWrite (13.270s) 2023/08/17 17:12:32 [INFO] ✅ TestAccFilerWorkspaceFilesReadDir (6.860s) 2023/08/17 17:12:46 [INFO] ✅ TestAccFilerDbfsReadWrite (13.380s) 2023/08/17 17:12:53 [INFO] ✅ TestAccFilerDbfsReadDir (7.460s) 2023/08/17 17:13:01 [INFO] ✅ TestAccFilerWorkspaceNotebookConflict (7.920s) 2023/08/17 17:13:10 [INFO] ✅ TestAccFilerWorkspaceNotebookWithOverwriteFlag (9.290s) 2023/08/17 17:13:10 [INFO] ✅ TestAccFilerLocalReadWrite (0.010s) 2023/08/17 17:13:11 [INFO] ✅ TestAccFilerLocalReadDir (0.010s) 2023/08/17 17:13:14 [INFO] ✅ TestAccFsCatForDbfs (3.180s) 2023/08/17 17:13:15 [INFO] ✅ TestAccFsCatForDbfsOnNonExistentFile (0.940s) 2023/08/17 17:13:15 [INFO] ✅ TestAccFsCatForDbfsInvalidScheme (0.560s) 2023/08/17 17:13:18 [INFO] ✅ TestAccFsCatDoesNotSupportOutputModeJson (2.910s) 2023/08/17 17:13:51 [INFO] ✅ TestAccFsCpDir (32.730s) 2023/08/17 17:14:06 [INFO] ✅ TestAccFsCpFileToFile (14.740s) 2023/08/17 17:14:20 [INFO] ✅ TestAccFsCpFileToDir (14.340s) 2023/08/17 17:14:53 [INFO] ✅ TestAccFsCpDirToDirFileNotOverwritten (32.710s) 2023/08/17 17:15:12 [INFO] ✅ TestAccFsCpFileToDirFileNotOverwritten (19.590s) 2023/08/17 17:15:32 [INFO] ✅ TestAccFsCpFileToFileFileNotOverwritten (19.950s) 2023/08/17 17:16:11 [INFO] ✅ TestAccFsCpDirToDirWithOverwriteFlag (38.970s) 2023/08/17 17:16:32 [INFO] ✅ TestAccFsCpFileToFileWithOverwriteFlag (21.040s) 2023/08/17 17:16:52 [INFO] ✅ TestAccFsCpFileToDirWithOverwriteFlag (19.670s) 2023/08/17 17:16:54 [INFO] ✅ TestAccFsCpErrorsWhenSourceIsDirWithoutRecursiveFlag (1.890s) 2023/08/17 17:16:54 [INFO] ✅ TestAccFsCpErrorsOnInvalidScheme (0.690s) 2023/08/17 17:17:10 [INFO] ✅ TestAccFsCpSourceIsDirectoryButTargetIsFile (15.810s) 2023/08/17 17:17:14 [INFO] ✅ TestAccFsLsForDbfs (4.000s) 2023/08/17 17:17:18 [INFO] ✅ TestAccFsLsForDbfsWithAbsolutePaths (4.000s) 2023/08/17 17:17:21 [INFO] ✅ TestAccFsLsForDbfsOnFile (3.140s) 2023/08/17 17:17:23 [INFO] ✅ TestAccFsLsForDbfsOnEmptyDir (2.030s) 2023/08/17 17:17:24 [INFO] ✅ TestAccFsLsForDbfsForNonexistingDir (0.840s) 2023/08/17 17:17:25 [INFO] ✅ TestAccFsLsWithoutScheme (0.590s) 2023/08/17 17:17:27 [INFO] ✅ TestAccFsMkdirCreatesDirectory (2.310s) 2023/08/17 17:17:30 [INFO] ✅ TestAccFsMkdirCreatesMultipleDirectories (2.800s) 2023/08/17 17:17:33 [INFO] ✅ TestAccFsMkdirWhenDirectoryAlreadyExists (2.700s) 2023/08/17 17:17:35 [INFO] ✅ TestAccFsMkdirWhenFileExistsAtPath (2.870s) 2023/08/17 17:17:40 [INFO] ✅ TestAccFsRmForFile (4.030s) 2023/08/17 17:17:43 [INFO] ✅ TestAccFsRmForEmptyDirectory (3.470s) 2023/08/17 17:17:46 [INFO] ✅ TestAccFsRmForNonEmptyDirectory (3.350s) 2023/08/17 17:17:47 [INFO] ✅ TestAccFsRmForNonExistentFile (0.940s) 2023/08/17 17:17:51 [INFO] ✅ TestAccFsRmForNonEmptyDirectoryWithRecursiveFlag (3.570s) 2023/08/17 17:17:52 [INFO] ✅ TestAccGitClone (0.890s) 2023/08/17 17:17:52 [INFO] ✅ TestAccGitCloneWithOnlyRepoNameOnAlternateBranch (0.730s) 2023/08/17 17:17:53 [INFO] ✅ TestAccGitCloneErrorsWhenRepositoryDoesNotExist (0.540s) 2023/08/17 17:18:02 [INFO] ✅ TestAccLock (8.800s) 2023/08/17 17:18:06 [INFO] ✅ TestAccLockUnlockWithoutAllowsLockFileNotExist (3.930s) 2023/08/17 17:18:09 [INFO] ✅ TestAccLockUnlockWithAllowsLockFileNotExist (3.320s) 2023/08/17 17:18:20 [INFO] ✅ TestAccSyncFullFileSync (10.570s) 2023/08/17 17:18:31 [INFO] ✅ TestAccSyncIncrementalFileSync (11.460s) 2023/08/17 17:18:42 [INFO] ✅ TestAccSyncNestedFolderSync (10.850s) 2023/08/17 17:18:53 [INFO] ✅ TestAccSyncNestedFolderDoesntFailOnNonEmptyDirectory (10.650s) 2023/08/17 17:19:04 [INFO] ✅ TestAccSyncNestedSpacePlusAndHashAreEscapedSync (10.930s) 2023/08/17 17:19:11 [INFO] ✅ TestAccSyncIncrementalFileOverwritesFolder (7.010s) 2023/08/17 17:19:18 [INFO] ✅ TestAccSyncIncrementalSyncPythonNotebookToFile (7.380s) 2023/08/17 17:19:24 [INFO] ✅ TestAccSyncIncrementalSyncFileToPythonNotebook (6.220s) 2023/08/17 17:19:30 [INFO] ✅ TestAccSyncIncrementalSyncPythonNotebookDelete (5.530s) 2023/08/17 17:19:32 [INFO] ✅ TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist (2.620s) 2023/08/17 17:19:38 [INFO] ✅ TestAccSyncEnsureRemotePathIsUsableIfRepoExists (5.460s) 2023/08/17 17:19:40 [INFO] ✅ TestAccSyncEnsureRemotePathIsUsableInWorkspace (1.850s) 2023/08/17 17:19:40 [INFO] ✅ TestAccWorkspaceList (0.780s) 2023/08/17 17:19:51 [INFO] ✅ TestAccExportDir (10.350s) 2023/08/17 17:19:54 [INFO] ✅ TestAccExportDirDoesNotOverwrite (3.330s) 2023/08/17 17:19:58 [INFO] ✅ TestAccExportDirWithOverwriteFlag (3.770s) 2023/08/17 17:20:07 [INFO] ✅ TestAccImportDir (9.320s) 2023/08/17 17:20:24 [INFO] ✅ TestAccImportDirDoesNotOverwrite (16.950s) 2023/08/17 17:20:35 [INFO] ✅ TestAccImportDirWithOverwriteFlag (10.620s) 2023/08/17 17:20:35 [INFO] ✅ 68/68 passed, 0 failed, 3 skipped ``` --- bundle/bundle.go | 34 +++++++++++ bundle/config/root.go | 3 + bundle/config/sync.go | 13 ++++ bundle/deploy/files/sync.go | 12 +++- cmd/bundle/sync.go | 7 +++ cmd/sync/sync.go | 7 +++ libs/fileset/glob.go | 49 +++++++++++++++ libs/fileset/glob_test.go | 65 ++++++++++++++++++++ libs/set/set.go | 75 +++++++++++++++++++++++ libs/set/set_test.go | 111 +++++++++++++++++++++++++++++++++ libs/sync/sync.go | 72 ++++++++++++++++++---- libs/sync/sync_test.go | 119 ++++++++++++++++++++++++++++++++++++ 12 files changed, 554 insertions(+), 13 deletions(-) create mode 100644 bundle/config/sync.go create mode 100644 libs/fileset/glob.go create mode 100644 libs/fileset/glob_test.go create mode 100644 libs/set/set.go create mode 100644 libs/set/set_test.go create mode 100644 libs/sync/sync_test.go diff --git a/bundle/bundle.go b/bundle/bundle.go index a5eaa289..d69d5815 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -24,6 +24,8 @@ import ( "github.com/hashicorp/terraform-exec/tfexec" ) +const internalFolder = ".internal" + type Bundle struct { Config config.Root @@ -155,6 +157,38 @@ func (b *Bundle) CacheDir(paths ...string) (string, error) { return dir, nil } +// This directory is used to store and automaticaly sync internal bundle files, such as, f.e +// notebook trampoline files for Python wheel and etc. +func (b *Bundle) InternalDir() (string, error) { + cacheDir, err := b.CacheDir() + if err != nil { + return "", err + } + + dir := filepath.Join(cacheDir, internalFolder) + err = os.MkdirAll(dir, 0700) + if err != nil { + return dir, err + } + + return dir, nil +} + +// GetSyncIncludePatterns returns a list of user defined includes +// And also adds InternalDir folder to include list for sync command +// so this folder is always synced +func (b *Bundle) GetSyncIncludePatterns() ([]string, error) { + internalDir, err := b.InternalDir() + if err != nil { + return nil, err + } + internalDirRel, err := filepath.Rel(b.Config.Path, internalDir) + if err != nil { + return nil, err + } + return append(b.Config.Sync.Include, filepath.ToSlash(filepath.Join(internalDirRel, "*.*"))), nil +} + func (b *Bundle) GitRepository() (*git.Repository, error) { rootPath, err := folders.FindDirWithLeaf(b.Config.Path, ".git") if err != nil { diff --git a/bundle/config/root.go b/bundle/config/root.go index 24426dd8..e0d20425 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -77,6 +77,9 @@ type Root struct { // DEPRECATED. Left for backward compatibility with Targets Environments map[string]*Target `json:"environments,omitempty"` + + // Sync section specifies options for files synchronization + Sync Sync `json:"sync"` } func Load(path string) (*Root, error) { diff --git a/bundle/config/sync.go b/bundle/config/sync.go new file mode 100644 index 00000000..0580e4c4 --- /dev/null +++ b/bundle/config/sync.go @@ -0,0 +1,13 @@ +package config + +type Sync struct { + // Include contains a list of globs evaluated relative to the bundle root path + // to explicitly include files that were excluded by the user's gitignore. + Include []string `json:"include,omitempty"` + + // Exclude contains a list of globs evaluated relative to the bundle root path + // to explicitly exclude files that were included by + // 1) the default that observes the user's gitignore, or + // 2) the `Include` field above. + Exclude []string `json:"exclude,omitempty"` +} diff --git a/bundle/deploy/files/sync.go b/bundle/deploy/files/sync.go index 84d79dc8..2dccd20a 100644 --- a/bundle/deploy/files/sync.go +++ b/bundle/deploy/files/sync.go @@ -14,9 +14,17 @@ func getSync(ctx context.Context, b *bundle.Bundle) (*sync.Sync, error) { return nil, fmt.Errorf("cannot get bundle cache directory: %w", err) } + includes, err := b.GetSyncIncludePatterns() + if err != nil { + return nil, fmt.Errorf("cannot get list of sync includes: %w", err) + } + opts := sync.SyncOptions{ - LocalPath: b.Config.Path, - RemotePath: b.Config.Workspace.FilesPath, + LocalPath: b.Config.Path, + RemotePath: b.Config.Workspace.FilesPath, + Include: includes, + Exclude: b.Config.Sync.Exclude, + Full: false, CurrentUser: b.Config.Workspace.CurrentUser.User, diff --git a/cmd/bundle/sync.go b/cmd/bundle/sync.go index 2fff7baf..be45626a 100644 --- a/cmd/bundle/sync.go +++ b/cmd/bundle/sync.go @@ -23,9 +23,16 @@ func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, b *bundle.Bundle) return nil, fmt.Errorf("cannot get bundle cache directory: %w", err) } + includes, err := b.GetSyncIncludePatterns() + if err != nil { + return nil, fmt.Errorf("cannot get list of sync includes: %w", err) + } + opts := sync.SyncOptions{ LocalPath: b.Config.Path, RemotePath: b.Config.Workspace.FilesPath, + Include: includes, + Exclude: b.Config.Sync.Exclude, Full: f.full, PollInterval: f.interval, diff --git a/cmd/sync/sync.go b/cmd/sync/sync.go index d2aad0c3..4a62123b 100644 --- a/cmd/sync/sync.go +++ b/cmd/sync/sync.go @@ -35,9 +35,16 @@ func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, args []string, b * return nil, fmt.Errorf("cannot get bundle cache directory: %w", err) } + includes, err := b.GetSyncIncludePatterns() + if err != nil { + return nil, fmt.Errorf("cannot get list of sync includes: %w", err) + } + opts := sync.SyncOptions{ LocalPath: b.Config.Path, RemotePath: b.Config.Workspace.FilesPath, + Include: includes, + Exclude: b.Config.Sync.Exclude, Full: f.full, PollInterval: f.interval, diff --git a/libs/fileset/glob.go b/libs/fileset/glob.go new file mode 100644 index 00000000..7a9f130b --- /dev/null +++ b/libs/fileset/glob.go @@ -0,0 +1,49 @@ +package fileset + +import ( + "io/fs" + "os" + "path/filepath" +) + +type GlobSet struct { + root string + patterns []string +} + +func NewGlobSet(root string, includes []string) (*GlobSet, error) { + absRoot, err := filepath.Abs(root) + if err != nil { + return nil, err + } + for k := range includes { + includes[k] = filepath.Join(absRoot, filepath.FromSlash(includes[k])) + } + return &GlobSet{absRoot, includes}, nil +} + +// Return all files which matches defined glob patterns +func (s *GlobSet) All() ([]File, error) { + files := make([]File, 0) + for _, pattern := range s.patterns { + matches, err := filepath.Glob(pattern) + if err != nil { + return files, err + } + + for _, match := range matches { + matchRel, err := filepath.Rel(s.root, match) + if err != nil { + return files, err + } + + stat, err := os.Stat(match) + if err != nil { + return files, err + } + files = append(files, File{fs.FileInfoToDirEntry(stat), match, matchRel}) + } + } + + return files, nil +} diff --git a/libs/fileset/glob_test.go b/libs/fileset/glob_test.go new file mode 100644 index 00000000..f6ac7e19 --- /dev/null +++ b/libs/fileset/glob_test.go @@ -0,0 +1,65 @@ +package fileset + +import ( + "io/fs" + "os" + "path/filepath" + "slices" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestGlobFileset(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + root := filepath.Join(cwd, "..", "filer") + + entries, err := os.ReadDir(root) + require.NoError(t, err) + + g, err := NewGlobSet(root, []string{ + "./*.go", + }) + require.NoError(t, err) + + files, err := g.All() + require.NoError(t, err) + + require.Equal(t, len(files), len(entries)) + for _, f := range files { + exists := slices.ContainsFunc(entries, func(de fs.DirEntry) bool { + return de.Name() == f.Name() + }) + require.True(t, exists) + } + + g, err = NewGlobSet(root, []string{ + "./*.js", + }) + require.NoError(t, err) + + files, err = g.All() + require.NoError(t, err) + require.Equal(t, len(files), 0) +} + +func TestGlobFilesetWithRelativeRoot(t *testing.T) { + root := filepath.Join("..", "filer") + + entries, err := os.ReadDir(root) + require.NoError(t, err) + + g, err := NewGlobSet(root, []string{ + "./*.go", + }) + require.NoError(t, err) + + files, err := g.All() + require.NoError(t, err) + + require.Equal(t, len(files), len(entries)) + for _, f := range files { + require.True(t, filepath.IsAbs(f.Absolute)) + } +} diff --git a/libs/set/set.go b/libs/set/set.go new file mode 100644 index 00000000..4798ed09 --- /dev/null +++ b/libs/set/set.go @@ -0,0 +1,75 @@ +package set + +import ( + "fmt" + + "golang.org/x/exp/maps" +) + +type hashFunc[T any] func(a T) string + +// Set struct represents set data structure +type Set[T any] struct { + key hashFunc[T] + data map[string]T +} + +// NewSetFromF initialise a new set with initial values and a hash function +// to define uniqueness of value +func NewSetFromF[T any](values []T, f hashFunc[T]) *Set[T] { + s := &Set[T]{ + key: f, + data: make(map[string]T), + } + + for _, v := range values { + s.Add(v) + } + + return s +} + +// NewSetF initialise a new empty and a hash function +// to define uniqueness of value +func NewSetF[T any](f hashFunc[T]) *Set[T] { + return NewSetFromF([]T{}, f) +} + +// NewSetFrom initialise a new set with initial values which are comparable +func NewSetFrom[T comparable](values []T) *Set[T] { + return NewSetFromF(values, func(item T) string { + return fmt.Sprintf("%v", item) + }) +} + +// NewSetFrom initialise a new empty set for comparable values +func NewSet[T comparable]() *Set[T] { + return NewSetFrom([]T{}) +} + +func (s *Set[T]) addOne(item T) { + s.data[s.key(item)] = item +} + +// Add one or multiple items to set +func (s *Set[T]) Add(items ...T) { + for _, i := range items { + s.addOne(i) + } +} + +// Remove an item from set. No-op if the item does not exist +func (s *Set[T]) Remove(item T) { + delete(s.data, s.key(item)) +} + +// Indicates if the item exists in the set +func (s *Set[T]) Has(item T) bool { + _, ok := s.data[s.key(item)] + return ok +} + +// Returns an iterable slice of values from set +func (s *Set[T]) Iter() []T { + return maps.Values(s.data) +} diff --git a/libs/set/set_test.go b/libs/set/set_test.go new file mode 100644 index 00000000..c2b6e25c --- /dev/null +++ b/libs/set/set_test.go @@ -0,0 +1,111 @@ +package set + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestSet(t *testing.T) { + s := NewSetFrom([]string{}) + require.ElementsMatch(t, []string{}, s.Iter()) + + s = NewSetFrom([]string{"a", "a", "a", "b", "b", "c", "d", "e"}) + require.ElementsMatch(t, []string{"a", "b", "c", "d", "e"}, s.Iter()) + + i := NewSetFrom([]int{1, 1, 2, 3, 4, 5, 7, 7, 7, 10, 11}) + require.ElementsMatch(t, []int{1, 2, 3, 4, 5, 7, 10, 11}, i.Iter()) + + f := NewSetFrom([]float32{1.1, 1.1, 2.0, 3.1, 4.5, 5.1, 7.1, 7.2, 7.1, 10.1, 11.0}) + require.ElementsMatch(t, []float32{1.1, 2.0, 3.1, 4.5, 5.1, 7.1, 7.2, 10.1, 11.0}, f.Iter()) +} + +type testStruct struct { + key string + value int +} + +func TestSetCustomKey(t *testing.T) { + s := NewSetF(func(item *testStruct) string { + return fmt.Sprintf("%s:%d", item.key, item.value) + }) + s.Add(&testStruct{"a", 1}) + s.Add(&testStruct{"b", 2}) + s.Add(&testStruct{"c", 1}) + s.Add(&testStruct{"a", 1}) + s.Add(&testStruct{"a", 1}) + s.Add(&testStruct{"a", 1}) + s.Add(&testStruct{"c", 1}) + s.Add(&testStruct{"c", 3}) + + require.ElementsMatch(t, []*testStruct{ + {"a", 1}, + {"b", 2}, + {"c", 1}, + {"c", 3}, + }, s.Iter()) +} + +func TestSetAdd(t *testing.T) { + s := NewSet[string]() + s.Add("a") + s.Add("a") + s.Add("a") + s.Add("b") + s.Add("c") + s.Add("c") + s.Add("d") + s.Add("d") + + require.ElementsMatch(t, []string{"a", "b", "c", "d"}, s.Iter()) +} + +func TestSetRemove(t *testing.T) { + s := NewSet[string]() + s.Add("a") + s.Add("a") + s.Add("a") + s.Add("b") + s.Add("c") + s.Add("c") + s.Add("d") + s.Add("d") + + s.Remove("d") + s.Remove("d") + s.Remove("a") + + require.ElementsMatch(t, []string{"b", "c"}, s.Iter()) +} + +func TestSetHas(t *testing.T) { + s := NewSet[string]() + require.False(t, s.Has("a")) + + s.Add("a") + require.True(t, s.Has("a")) + + s.Add("a") + s.Add("a") + require.True(t, s.Has("a")) + + s.Add("b") + s.Add("c") + s.Add("c") + s.Add("d") + s.Add("d") + + require.True(t, s.Has("a")) + require.True(t, s.Has("b")) + require.True(t, s.Has("c")) + require.True(t, s.Has("d")) + + s.Remove("d") + s.Remove("a") + + require.False(t, s.Has("a")) + require.True(t, s.Has("b")) + require.True(t, s.Has("c")) + require.False(t, s.Has("d")) +} diff --git a/libs/sync/sync.go b/libs/sync/sync.go index a299214d..8be478fc 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -6,8 +6,10 @@ import ( "time" "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/fileset" "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/set" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/service/iam" ) @@ -15,6 +17,8 @@ import ( type SyncOptions struct { LocalPath string RemotePath string + Include []string + Exclude []string Full bool @@ -32,7 +36,10 @@ type SyncOptions struct { type Sync struct { *SyncOptions - fileSet *git.FileSet + fileSet *git.FileSet + includeFileSet *fileset.GlobSet + excludeFileSet *fileset.GlobSet + snapshot *Snapshot filer filer.Filer @@ -52,6 +59,16 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { return nil, err } + includeFileSet, err := fileset.NewGlobSet(opts.LocalPath, opts.Include) + if err != nil { + return nil, err + } + + excludeFileSet, err := fileset.NewGlobSet(opts.LocalPath, opts.Exclude) + if err != nil { + return nil, err + } + // Verify that the remote path we're about to synchronize to is valid and allowed. err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath, opts.CurrentUser) if err != nil { @@ -88,11 +105,13 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { return &Sync{ SyncOptions: &opts, - fileSet: fileSet, - snapshot: snapshot, - filer: filer, - notifier: &NopNotifier{}, - seq: 0, + fileSet: fileSet, + includeFileSet: includeFileSet, + excludeFileSet: excludeFileSet, + snapshot: snapshot, + filer: filer, + notifier: &NopNotifier{}, + seq: 0, }, nil } @@ -132,15 +151,12 @@ func (s *Sync) notifyComplete(ctx context.Context, d diff) { } func (s *Sync) RunOnce(ctx context.Context) error { - // tradeoff: doing portable monitoring only due to macOS max descriptor manual ulimit setting requirement - // https://github.com/gorakhargosh/watchdog/blob/master/src/watchdog/observers/kqueue.py#L394-L418 - all, err := s.fileSet.All() + files, err := getFileList(ctx, s) if err != nil { - log.Errorf(ctx, "cannot list files: %s", err) return err } - change, err := s.snapshot.diff(ctx, all) + change, err := s.snapshot.diff(ctx, files) if err != nil { return err } @@ -166,6 +182,40 @@ func (s *Sync) RunOnce(ctx context.Context) error { return nil } +func getFileList(ctx context.Context, s *Sync) ([]fileset.File, error) { + // tradeoff: doing portable monitoring only due to macOS max descriptor manual ulimit setting requirement + // https://github.com/gorakhargosh/watchdog/blob/master/src/watchdog/observers/kqueue.py#L394-L418 + all := set.NewSetF(func(f fileset.File) string { + return f.Absolute + }) + gitFiles, err := s.fileSet.All() + if err != nil { + log.Errorf(ctx, "cannot list files: %s", err) + return nil, err + } + all.Add(gitFiles...) + + include, err := s.includeFileSet.All() + if err != nil { + log.Errorf(ctx, "cannot list include files: %s", err) + return nil, err + } + + all.Add(include...) + + exclude, err := s.excludeFileSet.All() + if err != nil { + log.Errorf(ctx, "cannot list exclude files: %s", err) + return nil, err + } + + for _, f := range exclude { + all.Remove(f) + } + + return all.Iter(), nil +} + func (s *Sync) DestroySnapshot(ctx context.Context) error { return s.snapshot.Destroy(ctx) } diff --git a/libs/sync/sync_test.go b/libs/sync/sync_test.go new file mode 100644 index 00000000..99c7e04b --- /dev/null +++ b/libs/sync/sync_test.go @@ -0,0 +1,119 @@ +package sync + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/git" + "github.com/stretchr/testify/require" +) + +func createFile(dir string, name string) error { + f, err := os.Create(filepath.Join(dir, name)) + if err != nil { + return err + } + + return f.Close() +} + +func setupFiles(t *testing.T) string { + dir := t.TempDir() + + err := createFile(dir, "a.go") + require.NoError(t, err) + + err = createFile(dir, "b.go") + require.NoError(t, err) + + err = createFile(dir, "ab.go") + require.NoError(t, err) + + err = createFile(dir, "abc.go") + require.NoError(t, err) + + err = createFile(dir, "c.go") + require.NoError(t, err) + + err = createFile(dir, "d.go") + require.NoError(t, err) + + dbDir := filepath.Join(dir, ".databricks") + err = os.Mkdir(dbDir, 0755) + require.NoError(t, err) + + err = createFile(dbDir, "e.go") + require.NoError(t, err) + + return dir + +} + +func TestGetFileSet(t *testing.T) { + ctx := context.Background() + + dir := setupFiles(t) + fileSet, err := git.NewFileSet(dir) + require.NoError(t, err) + + err = fileSet.EnsureValidGitIgnoreExists() + require.NoError(t, err) + + inc, err := fileset.NewGlobSet(dir, []string{}) + require.NoError(t, err) + + excl, err := fileset.NewGlobSet(dir, []string{}) + require.NoError(t, err) + + s := &Sync{ + SyncOptions: &SyncOptions{}, + + fileSet: fileSet, + includeFileSet: inc, + excludeFileSet: excl, + } + + fileList, err := getFileList(ctx, s) + require.NoError(t, err) + require.Equal(t, len(fileList), 7) + + inc, err = fileset.NewGlobSet(dir, []string{}) + require.NoError(t, err) + + excl, err = fileset.NewGlobSet(dir, []string{"*.go"}) + require.NoError(t, err) + + s = &Sync{ + SyncOptions: &SyncOptions{}, + + fileSet: fileSet, + includeFileSet: inc, + excludeFileSet: excl, + } + + fileList, err = getFileList(ctx, s) + require.NoError(t, err) + require.Equal(t, len(fileList), 1) + + inc, err = fileset.NewGlobSet(dir, []string{".databricks/*.*"}) + require.NoError(t, err) + + excl, err = fileset.NewGlobSet(dir, []string{}) + require.NoError(t, err) + + s = &Sync{ + SyncOptions: &SyncOptions{}, + + fileSet: fileSet, + includeFileSet: inc, + excludeFileSet: excl, + } + + fileList, err = getFileList(ctx, s) + require.NoError(t, err) + require.Equal(t, len(fileList), 8) + +}