Support .gitignore syntax in sync section and make sure it works recursively (#854)

Fixes #815
This commit is contained in:
Andrew Nester 2023-10-10 10:45:15 +02:00 committed by GitHub
parent 8131a8b5fa
commit 8c1441ff71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 192 additions and 42 deletions

View File

@ -1,49 +1,22 @@
package fileset
import (
"io/fs"
"os"
"path/filepath"
)
type GlobSet struct {
root string
patterns []string
}
func NewGlobSet(root string, includes []string) (*GlobSet, error) {
func NewGlobSet(root string, includes []string) (*FileSet, error) {
absRoot, err := filepath.Abs(root)
if err != nil {
return nil, err
}
for k := range includes {
includes[k] = filepath.Join(absRoot, filepath.FromSlash(includes[k]))
}
return &GlobSet{absRoot, includes}, nil
}
// Return all files which matches defined glob patterns
func (s *GlobSet) All() ([]File, error) {
files := make([]File, 0)
for _, pattern := range s.patterns {
matches, err := filepath.Glob(pattern)
if err != nil {
return files, err
}
for _, match := range matches {
matchRel, err := filepath.Rel(s.root, match)
if err != nil {
return files, err
}
stat, err := os.Stat(match)
if err != nil {
return files, err
}
files = append(files, File{fs.FileInfoToDirEntry(stat), match, matchRel})
}
includes[k] = filepath.ToSlash(filepath.Clean(includes[k]))
}
return files, nil
fs := &FileSet{
absRoot,
newIncluder(includes),
}
return fs, nil
}

View File

@ -5,6 +5,7 @@ import (
"os"
"path/filepath"
"slices"
"strings"
"testing"
"github.com/stretchr/testify/require"
@ -63,3 +64,96 @@ func TestGlobFilesetWithRelativeRoot(t *testing.T) {
require.True(t, filepath.IsAbs(f.Absolute))
}
}
func TestGlobFilesetRecursively(t *testing.T) {
cwd, err := os.Getwd()
require.NoError(t, err)
root := filepath.Join(cwd, "..", "git")
entries := make([]string, 0)
err = filepath.Walk(filepath.Join(root, "testdata"), func(path string, info fs.FileInfo, err error) error {
if !info.IsDir() {
entries = append(entries, path)
}
return nil
})
require.NoError(t, err)
g, err := NewGlobSet(root, []string{
"testdata/*",
})
require.NoError(t, err)
files, err := g.All()
require.NoError(t, err)
require.Equal(t, len(files), len(entries))
for _, f := range files {
exists := slices.ContainsFunc(entries, func(path string) bool {
return path == f.Absolute
})
require.True(t, exists)
}
}
func TestGlobFilesetDir(t *testing.T) {
cwd, err := os.Getwd()
require.NoError(t, err)
root := filepath.Join(cwd, "..", "git")
entries := make([]string, 0)
err = filepath.Walk(filepath.Join(root, "testdata", "a"), func(path string, info fs.FileInfo, err error) error {
if !info.IsDir() {
entries = append(entries, path)
}
return nil
})
require.NoError(t, err)
g, err := NewGlobSet(root, []string{
"testdata/a",
})
require.NoError(t, err)
files, err := g.All()
require.NoError(t, err)
require.Equal(t, len(files), len(entries))
for _, f := range files {
exists := slices.ContainsFunc(entries, func(path string) bool {
return path == f.Absolute
})
require.True(t, exists)
}
}
func TestGlobFilesetDoubleQuotesWithFilePatterns(t *testing.T) {
cwd, err := os.Getwd()
require.NoError(t, err)
root := filepath.Join(cwd, "..", "git")
entries := make([]string, 0)
err = filepath.Walk(filepath.Join(root, "testdata"), func(path string, info fs.FileInfo, err error) error {
if strings.HasSuffix(path, ".txt") {
entries = append(entries, path)
}
return nil
})
require.NoError(t, err)
g, err := NewGlobSet(root, []string{
"testdata/**/*.txt",
})
require.NoError(t, err)
files, err := g.All()
require.NoError(t, err)
require.Equal(t, len(files), len(entries))
for _, f := range files {
exists := slices.ContainsFunc(entries, func(path string) bool {
return path == f.Absolute
})
require.True(t, exists)
}
}

View File

@ -1,5 +1,9 @@
package fileset
import (
ignore "github.com/sabhiram/go-gitignore"
)
// Ignorer is the interface for what determines if a path
// in the [FileSet] must be ignored or not.
type Ignorer interface {
@ -17,3 +21,37 @@ func (nopIgnorer) IgnoreFile(path string) (bool, error) {
func (nopIgnorer) IgnoreDirectory(path string) (bool, error) {
return false, nil
}
type includer struct {
matcher *ignore.GitIgnore
}
func newIncluder(includes []string) *includer {
matcher := ignore.CompileIgnoreLines(includes...)
return &includer{
matcher,
}
}
func (i *includer) IgnoreFile(path string) (bool, error) {
return i.ignore(path), nil
}
// In the context of 'include' functionality, the Ignorer logic appears to be reversed:
// For patterns like 'foo/bar/' which intends to match directories only, we still need to traverse into the directory for potential file matches.
// Ignoring the directory entirely isn't an option, especially when dealing with patterns like 'foo/bar/*.go'.
// While this pattern doesn't target directories, it does match all Go files within them and ignoring directories not matching the pattern
// Will result in missing file matches.
// During the tree traversal process, we call 'IgnoreDirectory' on ".", "./foo", and "./foo/bar",
// all while applying the 'foo/bar/*.go' pattern. To handle this situation effectively, it requires to make the code more complex.
// This could mean generating various prefix patterns to facilitate the exclusion of directories from traversal.
// It's worth noting that, in this particular case, opting for a simpler logic results in a performance trade-off.
func (i *includer) IgnoreDirectory(path string) (bool, error) {
return false, nil
}
func (i *includer) ignore(path string) bool {
matched := i.matcher.MatchesPath(path)
// If matched, do not ignore the file because we want to include it
return !matched
}

View File

@ -37,8 +37,8 @@ type Sync struct {
*SyncOptions
fileSet *git.FileSet
includeFileSet *fileset.GlobSet
excludeFileSet *fileset.GlobSet
includeFileSet *fileset.FileSet
excludeFileSet *fileset.FileSet
snapshot *Snapshot
filer filer.Filer

View File

@ -48,8 +48,25 @@ func setupFiles(t *testing.T) string {
err = createFile(dbDir, "e.go")
require.NoError(t, err)
return dir
testDir := filepath.Join(dir, "test")
err = os.Mkdir(testDir, 0755)
require.NoError(t, err)
sub1 := filepath.Join(testDir, "sub1")
err = os.Mkdir(sub1, 0755)
require.NoError(t, err)
err = createFile(sub1, "f.go")
require.NoError(t, err)
sub2 := filepath.Join(sub1, "sub2")
err = os.Mkdir(sub2, 0755)
require.NoError(t, err)
err = createFile(sub2, "g.go")
require.NoError(t, err)
return dir
}
func TestGetFileSet(t *testing.T) {
@ -78,7 +95,7 @@ func TestGetFileSet(t *testing.T) {
fileList, err := getFileList(ctx, s)
require.NoError(t, err)
require.Equal(t, len(fileList), 7)
require.Equal(t, len(fileList), 9)
inc, err = fileset.NewGlobSet(dir, []string{})
require.NoError(t, err)
@ -98,7 +115,7 @@ func TestGetFileSet(t *testing.T) {
require.NoError(t, err)
require.Equal(t, len(fileList), 1)
inc, err = fileset.NewGlobSet(dir, []string{".databricks/*.*"})
inc, err = fileset.NewGlobSet(dir, []string{".databricks/*"})
require.NoError(t, err)
excl, err = fileset.NewGlobSet(dir, []string{})
@ -114,6 +131,34 @@ func TestGetFileSet(t *testing.T) {
fileList, err = getFileList(ctx, s)
require.NoError(t, err)
require.Equal(t, len(fileList), 8)
require.Equal(t, len(fileList), 10)
}
func TestRecursiveExclude(t *testing.T) {
ctx := context.Background()
dir := setupFiles(t)
fileSet, err := git.NewFileSet(dir)
require.NoError(t, err)
err = fileSet.EnsureValidGitIgnoreExists()
require.NoError(t, err)
inc, err := fileset.NewGlobSet(dir, []string{})
require.NoError(t, err)
excl, err := fileset.NewGlobSet(dir, []string{"test/**"})
require.NoError(t, err)
s := &Sync{
SyncOptions: &SyncOptions{},
fileSet: fileSet,
includeFileSet: inc,
excludeFileSet: excl,
}
fileList, err := getFileList(ctx, s)
require.NoError(t, err)
require.Equal(t, len(fileList), 7)
}