diff --git a/libs/fileset/glob.go b/libs/fileset/glob.go index 7a9f130b..9d8626e5 100644 --- a/libs/fileset/glob.go +++ b/libs/fileset/glob.go @@ -1,49 +1,22 @@ package fileset import ( - "io/fs" - "os" "path/filepath" ) -type GlobSet struct { - root string - patterns []string -} - -func NewGlobSet(root string, includes []string) (*GlobSet, error) { +func NewGlobSet(root string, includes []string) (*FileSet, error) { absRoot, err := filepath.Abs(root) if err != nil { return nil, err } + for k := range includes { - includes[k] = filepath.Join(absRoot, filepath.FromSlash(includes[k])) - } - return &GlobSet{absRoot, includes}, nil -} - -// Return all files which matches defined glob patterns -func (s *GlobSet) All() ([]File, error) { - files := make([]File, 0) - for _, pattern := range s.patterns { - matches, err := filepath.Glob(pattern) - if err != nil { - return files, err - } - - for _, match := range matches { - matchRel, err := filepath.Rel(s.root, match) - if err != nil { - return files, err - } - - stat, err := os.Stat(match) - if err != nil { - return files, err - } - files = append(files, File{fs.FileInfoToDirEntry(stat), match, matchRel}) - } + includes[k] = filepath.ToSlash(filepath.Clean(includes[k])) } - return files, nil + fs := &FileSet{ + absRoot, + newIncluder(includes), + } + return fs, nil } diff --git a/libs/fileset/glob_test.go b/libs/fileset/glob_test.go index f6ac7e19..e8d3696c 100644 --- a/libs/fileset/glob_test.go +++ b/libs/fileset/glob_test.go @@ -5,6 +5,7 @@ import ( "os" "path/filepath" "slices" + "strings" "testing" "github.com/stretchr/testify/require" @@ -63,3 +64,96 @@ func TestGlobFilesetWithRelativeRoot(t *testing.T) { require.True(t, filepath.IsAbs(f.Absolute)) } } + +func TestGlobFilesetRecursively(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + root := filepath.Join(cwd, "..", "git") + + entries := make([]string, 0) + err = filepath.Walk(filepath.Join(root, "testdata"), func(path string, info fs.FileInfo, err error) error { + if !info.IsDir() { + entries = append(entries, path) + } + return nil + }) + require.NoError(t, err) + + g, err := NewGlobSet(root, []string{ + "testdata/*", + }) + require.NoError(t, err) + + files, err := g.All() + require.NoError(t, err) + + require.Equal(t, len(files), len(entries)) + for _, f := range files { + exists := slices.ContainsFunc(entries, func(path string) bool { + return path == f.Absolute + }) + require.True(t, exists) + } +} + +func TestGlobFilesetDir(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + root := filepath.Join(cwd, "..", "git") + + entries := make([]string, 0) + err = filepath.Walk(filepath.Join(root, "testdata", "a"), func(path string, info fs.FileInfo, err error) error { + if !info.IsDir() { + entries = append(entries, path) + } + return nil + }) + require.NoError(t, err) + + g, err := NewGlobSet(root, []string{ + "testdata/a", + }) + require.NoError(t, err) + + files, err := g.All() + require.NoError(t, err) + + require.Equal(t, len(files), len(entries)) + for _, f := range files { + exists := slices.ContainsFunc(entries, func(path string) bool { + return path == f.Absolute + }) + require.True(t, exists) + } +} + +func TestGlobFilesetDoubleQuotesWithFilePatterns(t *testing.T) { + cwd, err := os.Getwd() + require.NoError(t, err) + root := filepath.Join(cwd, "..", "git") + + entries := make([]string, 0) + err = filepath.Walk(filepath.Join(root, "testdata"), func(path string, info fs.FileInfo, err error) error { + if strings.HasSuffix(path, ".txt") { + entries = append(entries, path) + } + return nil + }) + require.NoError(t, err) + + g, err := NewGlobSet(root, []string{ + "testdata/**/*.txt", + }) + require.NoError(t, err) + + files, err := g.All() + require.NoError(t, err) + + require.Equal(t, len(files), len(entries)) + for _, f := range files { + exists := slices.ContainsFunc(entries, func(path string) bool { + return path == f.Absolute + }) + require.True(t, exists) + } +} diff --git a/libs/fileset/ignorer.go b/libs/fileset/ignorer.go index ba066f41..eb87682f 100644 --- a/libs/fileset/ignorer.go +++ b/libs/fileset/ignorer.go @@ -1,5 +1,9 @@ package fileset +import ( + ignore "github.com/sabhiram/go-gitignore" +) + // Ignorer is the interface for what determines if a path // in the [FileSet] must be ignored or not. type Ignorer interface { @@ -17,3 +21,37 @@ func (nopIgnorer) IgnoreFile(path string) (bool, error) { func (nopIgnorer) IgnoreDirectory(path string) (bool, error) { return false, nil } + +type includer struct { + matcher *ignore.GitIgnore +} + +func newIncluder(includes []string) *includer { + matcher := ignore.CompileIgnoreLines(includes...) + return &includer{ + matcher, + } +} + +func (i *includer) IgnoreFile(path string) (bool, error) { + return i.ignore(path), nil +} + +// In the context of 'include' functionality, the Ignorer logic appears to be reversed: +// For patterns like 'foo/bar/' which intends to match directories only, we still need to traverse into the directory for potential file matches. +// Ignoring the directory entirely isn't an option, especially when dealing with patterns like 'foo/bar/*.go'. +// While this pattern doesn't target directories, it does match all Go files within them and ignoring directories not matching the pattern +// Will result in missing file matches. +// During the tree traversal process, we call 'IgnoreDirectory' on ".", "./foo", and "./foo/bar", +// all while applying the 'foo/bar/*.go' pattern. To handle this situation effectively, it requires to make the code more complex. +// This could mean generating various prefix patterns to facilitate the exclusion of directories from traversal. +// It's worth noting that, in this particular case, opting for a simpler logic results in a performance trade-off. +func (i *includer) IgnoreDirectory(path string) (bool, error) { + return false, nil +} + +func (i *includer) ignore(path string) bool { + matched := i.matcher.MatchesPath(path) + // If matched, do not ignore the file because we want to include it + return !matched +} diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 8be478fc..beb3f6a3 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -37,8 +37,8 @@ type Sync struct { *SyncOptions fileSet *git.FileSet - includeFileSet *fileset.GlobSet - excludeFileSet *fileset.GlobSet + includeFileSet *fileset.FileSet + excludeFileSet *fileset.FileSet snapshot *Snapshot filer filer.Filer diff --git a/libs/sync/sync_test.go b/libs/sync/sync_test.go index 99c7e04b..0f1ad61b 100644 --- a/libs/sync/sync_test.go +++ b/libs/sync/sync_test.go @@ -48,8 +48,25 @@ func setupFiles(t *testing.T) string { err = createFile(dbDir, "e.go") require.NoError(t, err) - return dir + testDir := filepath.Join(dir, "test") + err = os.Mkdir(testDir, 0755) + require.NoError(t, err) + sub1 := filepath.Join(testDir, "sub1") + err = os.Mkdir(sub1, 0755) + require.NoError(t, err) + + err = createFile(sub1, "f.go") + require.NoError(t, err) + + sub2 := filepath.Join(sub1, "sub2") + err = os.Mkdir(sub2, 0755) + require.NoError(t, err) + + err = createFile(sub2, "g.go") + require.NoError(t, err) + + return dir } func TestGetFileSet(t *testing.T) { @@ -78,7 +95,7 @@ func TestGetFileSet(t *testing.T) { fileList, err := getFileList(ctx, s) require.NoError(t, err) - require.Equal(t, len(fileList), 7) + require.Equal(t, len(fileList), 9) inc, err = fileset.NewGlobSet(dir, []string{}) require.NoError(t, err) @@ -98,7 +115,7 @@ func TestGetFileSet(t *testing.T) { require.NoError(t, err) require.Equal(t, len(fileList), 1) - inc, err = fileset.NewGlobSet(dir, []string{".databricks/*.*"}) + inc, err = fileset.NewGlobSet(dir, []string{".databricks/*"}) require.NoError(t, err) excl, err = fileset.NewGlobSet(dir, []string{}) @@ -114,6 +131,34 @@ func TestGetFileSet(t *testing.T) { fileList, err = getFileList(ctx, s) require.NoError(t, err) - require.Equal(t, len(fileList), 8) - + require.Equal(t, len(fileList), 10) +} + +func TestRecursiveExclude(t *testing.T) { + ctx := context.Background() + + dir := setupFiles(t) + fileSet, err := git.NewFileSet(dir) + require.NoError(t, err) + + err = fileSet.EnsureValidGitIgnoreExists() + require.NoError(t, err) + + inc, err := fileset.NewGlobSet(dir, []string{}) + require.NoError(t, err) + + excl, err := fileset.NewGlobSet(dir, []string{"test/**"}) + require.NoError(t, err) + + s := &Sync{ + SyncOptions: &SyncOptions{}, + + fileSet: fileSet, + includeFileSet: inc, + excludeFileSet: excl, + } + + fileList, err := getFileList(ctx, s) + require.NoError(t, err) + require.Equal(t, len(fileList), 7) }