Take into account gitignore rules defined in parent directories (#182)

This change introduces `git.View`.

View represents a view on a directory tree that takes into account all
applicable .gitignore files. The directory tree does NOT need to be the
repository root.

For example: with a repository root at "myrepo", a view can be anchored
at "myrepo/someproject" and still respect the ignore rules defined at
"myrepo/.gitignore".

We use this functionality to synchronize files from a path nested in a
repository while respecting the repository's ignore rules.

Co-authored-by: Serge Smertin <259697+nfx@users.noreply.github.com>
This commit is contained in:
Pieter Noordhuis 2023-01-27 14:54:28 +01:00 committed by GitHub
parent df7b341afe
commit 2eb10800a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 503 additions and 0 deletions

208
git/repository.go Normal file
View File

@ -0,0 +1,208 @@
package git
import (
"fmt"
"io/fs"
"os"
"path"
"path/filepath"
"strings"
"github.com/databricks/bricks/folders"
ignore "github.com/sabhiram/go-gitignore"
)
const gitIgnoreFileName = ".gitignore"
// Repository represents a Git repository or a directory
// that could later be initialized as Git repository.
type Repository struct {
// real indicates if this is a real repository or a non-Git
// directory where we process .gitignore files.
real bool
// rootPath is the absolute path to the repository root.
rootPath string
// ignore contains a list of ignore patterns indexed by the
// path prefix relative to the repository root.
//
// Example prefixes: ".", "foo/bar"
//
// Note: prefixes use the forward slash instead of the
// OS-specific path separator. This matches Git convention.
ignore map[string][]*ignore.GitIgnore
}
func (r *Repository) includeIgnoreFile(relativeIgnoreFilePath, relativeTo string) error {
absPath := filepath.Join(r.rootPath, relativeIgnoreFilePath)
// The file must be stat-able and not a directory.
// If it doesn't exist or is a directory, do nothing.
stat, err := os.Stat(absPath)
if err != nil || stat.IsDir() {
return nil
}
ignore, err := ignore.CompileIgnoreFile(absPath)
if err != nil {
return err
}
relativeTo = path.Clean(filepath.ToSlash(relativeTo))
r.ignore[relativeTo] = append(r.ignore[relativeTo], ignore)
return nil
}
// Include ignore files in directories that are parent to `relPath`.
//
// If equal to "foo/bar" this loads ignore files
// located at the repository root and in the directory "foo".
//
// If equal to "." this function does nothing.
func (r *Repository) includeIgnoreFilesUpToPath(relPath string) error {
// Accumulate list of directories to load ignore file from.
paths := []string{
".",
}
for _, path := range strings.Split(relPath, string(os.PathSeparator)) {
paths = append(paths, filepath.Join(paths[len(paths)-1], path))
}
// Load ignore files.
for _, path := range paths {
// Path equal to `relPath` is loaded by [includeIgnoreFilesUnderPath].
if path == relPath {
continue
}
err := r.includeIgnoreFile(filepath.Join(path, gitIgnoreFileName), path)
if err != nil {
return err
}
}
return nil
}
// Include ignore files in directories that are equal to or nested under `relPath`.
func (r *Repository) includeIgnoreFilesUnderPath(relPath string) error {
absPath := filepath.Join(r.rootPath, relPath)
err := filepath.WalkDir(absPath, r.includeIgnoreFilesWalkDirFn)
if err != nil {
return fmt.Errorf("unable to walk directory: %w", err)
}
return nil
}
// includeIgnoreFilesWalkDirFn is called from [filepath.WalkDir] in includeIgnoreFilesUnderPath.
func (r *Repository) includeIgnoreFilesWalkDirFn(absPath string, d fs.DirEntry, err error) error {
if err != nil {
// If reading the target path fails bubble up the error.
if d == nil {
return err
}
// Ignore failure to read paths nested under the target path.
return filepath.SkipDir
}
// Get path relative to root path.
pathRelativeToRoot, err := filepath.Rel(r.rootPath, absPath)
if err != nil {
return err
}
// Check if directory is ignored before recursing into it.
if d.IsDir() && r.Ignore(pathRelativeToRoot) {
return filepath.SkipDir
}
// Load .gitignore if we find one.
if d.Name() == gitIgnoreFileName {
err := r.includeIgnoreFile(pathRelativeToRoot, filepath.Dir(pathRelativeToRoot))
if err != nil {
return err
}
}
return nil
}
// Include ignore files relevant for files nested under `relPath`.
func (r *Repository) includeIgnoreFilesForPath(relPath string) error {
err := r.includeIgnoreFilesUpToPath(relPath)
if err != nil {
return err
}
return r.includeIgnoreFilesUnderPath(relPath)
}
// Ignore computes whether to ignore the specified path.
// The specified path is relative to the repository root path.
func (r *Repository) Ignore(relPath string) bool {
parts := strings.Split(filepath.ToSlash(relPath), "/")
// Retain trailing slash for directory patterns.
// We know a trailing slash was present if the last element
// after splitting is an empty string.
trailingSlash := ""
if parts[len(parts)-1] == "" {
parts = parts[:len(parts)-1]
trailingSlash = "/"
}
// Walk over path prefixes to check applicable gitignore files.
for i := range parts {
prefix := path.Clean(strings.Join(parts[:i], "/"))
suffix := path.Clean(strings.Join(parts[i:], "/")) + trailingSlash
// For this prefix (e.g. ".", or "dir1/dir2") we check if the
// suffix is matched in the respective ignore files.
fs, ok := r.ignore[prefix]
if !ok {
continue
}
for _, f := range fs {
if f.MatchesPath(suffix) {
return true
}
}
}
return false
}
func NewRepository(path string) (*Repository, error) {
path, err := filepath.Abs(path)
if err != nil {
return nil, err
}
real := true
rootPath, err := folders.FindDirWithLeaf(path, ".git")
if err != nil {
if !os.IsNotExist(err) {
return nil, err
}
// Cannot find `.git` directory.
// Treat the specified path as a potential repository root.
real = false
rootPath = path
}
repo := &Repository{
real: real,
rootPath: rootPath,
ignore: make(map[string][]*ignore.GitIgnore),
}
// Always ignore ".git" directory.
repo.ignore["."] = append(repo.ignore["."], ignore.CompileIgnoreLines(".git"))
// Load repository-wide excludes file.
err = repo.includeIgnoreFile(filepath.Join(".git", "info", "excludes"), ".")
if err != nil {
return nil, err
}
return repo, nil
}

28
git/repository_test.go Normal file
View File

@ -0,0 +1,28 @@
package git
import (
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestRepository(t *testing.T) {
// Load this repository as test.
repo, err := NewRepository("..")
require.NoError(t, err)
// Load all .gitignore files in this repository.
err = repo.includeIgnoreFilesForPath(".")
require.NoError(t, err)
// Check that top level ignores work.
assert.True(t, repo.Ignore(".DS_Store"))
assert.True(t, repo.Ignore("foo.pyc"))
assert.False(t, repo.Ignore("vendor"))
assert.True(t, repo.Ignore("vendor/"))
// Check that ignores under testdata work.
assert.True(t, repo.Ignore(filepath.Join("git", "testdata", "root.ignoreme")))
}

9
git/testdata/.gitignore vendored Normal file
View File

@ -0,0 +1,9 @@
root.*
/root/foo
**/root_double
# Don't recurse into this directory.
/ignorethis
# Directory pattern.
ignoredirectory/

2
git/testdata/a/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
a.*
**/a_double

2
git/testdata/a/b/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
b.*
**/b_double

0
git/testdata/ignorethis/.gitkeep vendored Normal file
View File

68
git/view.go Normal file
View File

@ -0,0 +1,68 @@
package git
import (
"path/filepath"
"strings"
)
// View represents a view on a directory tree that takes into account
// all applicable .gitignore files. The directory tree does NOT need
// to be the repository root.
//
// For example: with a repository root at "myrepo", a view can be
// anchored at "myrepo/someproject" and still respect the ignore
// rules defined at "myrepo/.gitignore".
//
// We use this functionality to synchronize files from a path nested
// in a repository while respecting the repository's ignore rules.
type View struct {
repo *Repository
// targetPath is the relative path within the repository we care about.
// For example: "." or "a/b".
targetPath string
}
// Ignore computes whether to ignore the specified path.
// The specified path is relative to the view's target path.
func (v *View) Ignore(path string) bool {
path = filepath.ToSlash(path)
// Retain trailing slash for directory patterns.
// Needs special handling because it is removed by path cleaning.
trailingSlash := ""
if strings.HasSuffix(path, "/") {
trailingSlash = "/"
}
return v.repo.Ignore(filepath.Join(v.targetPath, path) + trailingSlash)
}
func NewView(path string) (*View, error) {
path, err := filepath.Abs(path)
if err != nil {
return nil, err
}
repo, err := NewRepository(path)
if err != nil {
return nil, err
}
// Target path must be relative to the repository root path.
targetPath, err := filepath.Rel(repo.rootPath, path)
if err != nil {
return nil, err
}
// Load ignore files relevant for this view's path.
err = repo.includeIgnoreFilesForPath(targetPath)
if err != nil {
return nil, err
}
return &View{
repo: repo,
targetPath: targetPath,
}, nil
}

186
git/view_test.go Normal file
View File

@ -0,0 +1,186 @@
package git
import (
"io"
"io/fs"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func copyTestdata(t *testing.T) string {
tempDir := t.TempDir()
// Copy everything under "testdata" to temporary directory.
err := filepath.WalkDir("testdata", func(path string, d fs.DirEntry, err error) error {
require.NoError(t, err)
if d.IsDir() {
err := os.MkdirAll(filepath.Join(tempDir, path), 0755)
require.NoError(t, err)
return nil
}
fin, err := os.Open(path)
require.NoError(t, err)
defer fin.Close()
fout, err := os.Create(filepath.Join(tempDir, path))
require.NoError(t, err)
defer fout.Close()
_, err = io.Copy(fout, fin)
require.NoError(t, err)
return nil
})
require.NoError(t, err)
return filepath.Join(tempDir, "testdata")
}
func createFakeRepo(t *testing.T) string {
absPath := copyTestdata(t)
// Add .git directory to make it look like a Git repository.
err := os.Mkdir(filepath.Join(absPath, ".git"), 0755)
require.NoError(t, err)
return absPath
}
func testViewAtRoot(t *testing.T, v *View) {
// Check .gitignore at root.
assert.True(t, v.Ignore("root.sh"))
assert.True(t, v.Ignore("root/foo"))
assert.True(t, v.Ignore("root_double"))
assert.False(t, v.Ignore("newfile"))
assert.True(t, v.Ignore("ignoredirectory/"))
// Nested .gitignores should not affect root.
assert.False(t, v.Ignore("a.sh"))
// Nested .gitignores should apply in their path.
assert.True(t, v.Ignore("a/a.sh"))
assert.True(t, v.Ignore("a/whatever/a.sh"))
// .git must always be ignored.
assert.True(t, v.Ignore(".git"))
}
func TestViewRootInBricksRepo(t *testing.T) {
v, err := NewView("./testdata")
require.NoError(t, err)
testViewAtRoot(t, v)
}
func TestViewRootInTempRepo(t *testing.T) {
v, err := NewView(createFakeRepo(t))
require.NoError(t, err)
testViewAtRoot(t, v)
}
func TestViewRootInTempDir(t *testing.T) {
v, err := NewView(copyTestdata(t))
require.NoError(t, err)
testViewAtRoot(t, v)
}
func testViewAtA(t *testing.T, v *View) {
// Inherit .gitignore from root.
assert.True(t, v.Ignore("root.sh"))
assert.False(t, v.Ignore("root/foo"))
assert.True(t, v.Ignore("root_double"))
assert.True(t, v.Ignore("ignoredirectory/"))
// Check current .gitignore
assert.True(t, v.Ignore("a.sh"))
assert.True(t, v.Ignore("a_double"))
assert.False(t, v.Ignore("newfile"))
// Nested .gitignores should apply in their path.
assert.True(t, v.Ignore("b/b.sh"))
assert.True(t, v.Ignore("b/whatever/b.sh"))
}
func TestViewAInBricksRepo(t *testing.T) {
v, err := NewView("./testdata/a")
require.NoError(t, err)
testViewAtA(t, v)
}
func TestViewAInTempRepo(t *testing.T) {
v, err := NewView(filepath.Join(createFakeRepo(t), "a"))
require.NoError(t, err)
testViewAtA(t, v)
}
func TestViewAInTempDir(t *testing.T) {
// Since this is not a fake repo it should not traverse up the tree.
v, err := NewView(filepath.Join(copyTestdata(t), "a"))
require.NoError(t, err)
// Check that this doesn't inherit .gitignore from root.
assert.False(t, v.Ignore("root.sh"))
assert.False(t, v.Ignore("root/foo"))
assert.False(t, v.Ignore("root_double"))
// Check current .gitignore
assert.True(t, v.Ignore("a.sh"))
assert.True(t, v.Ignore("a_double"))
assert.False(t, v.Ignore("newfile"))
// Nested .gitignores should apply in their path.
assert.True(t, v.Ignore("b/b.sh"))
assert.True(t, v.Ignore("b/whatever/b.sh"))
}
func testViewAtAB(t *testing.T, v *View) {
// Inherit .gitignore from root.
assert.True(t, v.Ignore("root.sh"))
assert.False(t, v.Ignore("root/foo"))
assert.True(t, v.Ignore("root_double"))
assert.True(t, v.Ignore("ignoredirectory/"))
// Inherit .gitignore from root/a.
assert.True(t, v.Ignore("a.sh"))
assert.True(t, v.Ignore("a_double"))
// Check current .gitignore
assert.True(t, v.Ignore("b.sh"))
assert.True(t, v.Ignore("b_double"))
assert.False(t, v.Ignore("newfile"))
}
func TestViewABInBricksRepo(t *testing.T) {
v, err := NewView("./testdata/a/b")
require.NoError(t, err)
testViewAtAB(t, v)
}
func TestViewABInTempRepo(t *testing.T) {
v, err := NewView(filepath.Join(createFakeRepo(t), "a", "b"))
require.NoError(t, err)
testViewAtAB(t, v)
}
func TestViewABInTempDir(t *testing.T) {
// Since this is not a fake repo it should not traverse up the tree.
v, err := NewView(filepath.Join(copyTestdata(t), "a", "b"))
require.NoError(t, err)
// Check that this doesn't inherit .gitignore from root.
assert.False(t, v.Ignore("root.sh"))
assert.False(t, v.Ignore("root/foo"))
assert.False(t, v.Ignore("root_double"))
// Check that this doesn't inherit .gitignore from root/a.
assert.False(t, v.Ignore("a.sh"))
assert.False(t, v.Ignore("a_double"))
// Check current .gitignore
assert.True(t, v.Ignore("b.sh"))
assert.True(t, v.Ignore("b_double"))
assert.False(t, v.Ignore("newfile"))
}