diff --git a/git/repository.go b/git/repository.go new file mode 100644 index 00000000..5a5711eb --- /dev/null +++ b/git/repository.go @@ -0,0 +1,208 @@ +package git + +import ( + "fmt" + "io/fs" + "os" + "path" + "path/filepath" + "strings" + + "github.com/databricks/bricks/folders" + ignore "github.com/sabhiram/go-gitignore" +) + +const gitIgnoreFileName = ".gitignore" + +// Repository represents a Git repository or a directory +// that could later be initialized as Git repository. +type Repository struct { + // real indicates if this is a real repository or a non-Git + // directory where we process .gitignore files. + real bool + + // rootPath is the absolute path to the repository root. + rootPath string + + // ignore contains a list of ignore patterns indexed by the + // path prefix relative to the repository root. + // + // Example prefixes: ".", "foo/bar" + // + // Note: prefixes use the forward slash instead of the + // OS-specific path separator. This matches Git convention. + ignore map[string][]*ignore.GitIgnore +} + +func (r *Repository) includeIgnoreFile(relativeIgnoreFilePath, relativeTo string) error { + absPath := filepath.Join(r.rootPath, relativeIgnoreFilePath) + + // The file must be stat-able and not a directory. + // If it doesn't exist or is a directory, do nothing. + stat, err := os.Stat(absPath) + if err != nil || stat.IsDir() { + return nil + } + + ignore, err := ignore.CompileIgnoreFile(absPath) + if err != nil { + return err + } + + relativeTo = path.Clean(filepath.ToSlash(relativeTo)) + r.ignore[relativeTo] = append(r.ignore[relativeTo], ignore) + return nil +} + +// Include ignore files in directories that are parent to `relPath`. +// +// If equal to "foo/bar" this loads ignore files +// located at the repository root and in the directory "foo". +// +// If equal to "." this function does nothing. +func (r *Repository) includeIgnoreFilesUpToPath(relPath string) error { + // Accumulate list of directories to load ignore file from. + paths := []string{ + ".", + } + for _, path := range strings.Split(relPath, string(os.PathSeparator)) { + paths = append(paths, filepath.Join(paths[len(paths)-1], path)) + } + + // Load ignore files. + for _, path := range paths { + // Path equal to `relPath` is loaded by [includeIgnoreFilesUnderPath]. + if path == relPath { + continue + } + err := r.includeIgnoreFile(filepath.Join(path, gitIgnoreFileName), path) + if err != nil { + return err + } + } + + return nil +} + +// Include ignore files in directories that are equal to or nested under `relPath`. +func (r *Repository) includeIgnoreFilesUnderPath(relPath string) error { + absPath := filepath.Join(r.rootPath, relPath) + err := filepath.WalkDir(absPath, r.includeIgnoreFilesWalkDirFn) + if err != nil { + return fmt.Errorf("unable to walk directory: %w", err) + } + return nil +} + +// includeIgnoreFilesWalkDirFn is called from [filepath.WalkDir] in includeIgnoreFilesUnderPath. +func (r *Repository) includeIgnoreFilesWalkDirFn(absPath string, d fs.DirEntry, err error) error { + if err != nil { + // If reading the target path fails bubble up the error. + if d == nil { + return err + } + // Ignore failure to read paths nested under the target path. + return filepath.SkipDir + } + + // Get path relative to root path. + pathRelativeToRoot, err := filepath.Rel(r.rootPath, absPath) + if err != nil { + return err + } + + // Check if directory is ignored before recursing into it. + if d.IsDir() && r.Ignore(pathRelativeToRoot) { + return filepath.SkipDir + } + + // Load .gitignore if we find one. + if d.Name() == gitIgnoreFileName { + err := r.includeIgnoreFile(pathRelativeToRoot, filepath.Dir(pathRelativeToRoot)) + if err != nil { + return err + } + } + + return nil +} + +// Include ignore files relevant for files nested under `relPath`. +func (r *Repository) includeIgnoreFilesForPath(relPath string) error { + err := r.includeIgnoreFilesUpToPath(relPath) + if err != nil { + return err + } + return r.includeIgnoreFilesUnderPath(relPath) +} + +// Ignore computes whether to ignore the specified path. +// The specified path is relative to the repository root path. +func (r *Repository) Ignore(relPath string) bool { + parts := strings.Split(filepath.ToSlash(relPath), "/") + + // Retain trailing slash for directory patterns. + // We know a trailing slash was present if the last element + // after splitting is an empty string. + trailingSlash := "" + if parts[len(parts)-1] == "" { + parts = parts[:len(parts)-1] + trailingSlash = "/" + } + + // Walk over path prefixes to check applicable gitignore files. + for i := range parts { + prefix := path.Clean(strings.Join(parts[:i], "/")) + suffix := path.Clean(strings.Join(parts[i:], "/")) + trailingSlash + + // For this prefix (e.g. ".", or "dir1/dir2") we check if the + // suffix is matched in the respective ignore files. + fs, ok := r.ignore[prefix] + if !ok { + continue + } + for _, f := range fs { + if f.MatchesPath(suffix) { + return true + } + } + } + + return false +} + +func NewRepository(path string) (*Repository, error) { + path, err := filepath.Abs(path) + if err != nil { + return nil, err + } + + real := true + rootPath, err := folders.FindDirWithLeaf(path, ".git") + if err != nil { + if !os.IsNotExist(err) { + return nil, err + } + // Cannot find `.git` directory. + // Treat the specified path as a potential repository root. + real = false + rootPath = path + } + + repo := &Repository{ + real: real, + rootPath: rootPath, + ignore: make(map[string][]*ignore.GitIgnore), + } + + // Always ignore ".git" directory. + repo.ignore["."] = append(repo.ignore["."], ignore.CompileIgnoreLines(".git")) + + // Load repository-wide excludes file. + err = repo.includeIgnoreFile(filepath.Join(".git", "info", "excludes"), ".") + if err != nil { + return nil, err + } + + return repo, nil +} diff --git a/git/repository_test.go b/git/repository_test.go new file mode 100644 index 00000000..33d52852 --- /dev/null +++ b/git/repository_test.go @@ -0,0 +1,28 @@ +package git + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRepository(t *testing.T) { + // Load this repository as test. + repo, err := NewRepository("..") + require.NoError(t, err) + + // Load all .gitignore files in this repository. + err = repo.includeIgnoreFilesForPath(".") + require.NoError(t, err) + + // Check that top level ignores work. + assert.True(t, repo.Ignore(".DS_Store")) + assert.True(t, repo.Ignore("foo.pyc")) + assert.False(t, repo.Ignore("vendor")) + assert.True(t, repo.Ignore("vendor/")) + + // Check that ignores under testdata work. + assert.True(t, repo.Ignore(filepath.Join("git", "testdata", "root.ignoreme"))) +} diff --git a/git/testdata/.gitignore b/git/testdata/.gitignore new file mode 100644 index 00000000..3d68fc1c --- /dev/null +++ b/git/testdata/.gitignore @@ -0,0 +1,9 @@ +root.* +/root/foo +**/root_double + +# Don't recurse into this directory. +/ignorethis + +# Directory pattern. +ignoredirectory/ diff --git a/git/testdata/a/.gitignore b/git/testdata/a/.gitignore new file mode 100644 index 00000000..718d2c3d --- /dev/null +++ b/git/testdata/a/.gitignore @@ -0,0 +1,2 @@ +a.* +**/a_double diff --git a/git/testdata/a/b/.gitignore b/git/testdata/a/b/.gitignore new file mode 100644 index 00000000..d66d9042 --- /dev/null +++ b/git/testdata/a/b/.gitignore @@ -0,0 +1,2 @@ +b.* +**/b_double diff --git a/git/testdata/ignorethis/.gitkeep b/git/testdata/ignorethis/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/git/view.go b/git/view.go new file mode 100644 index 00000000..9a09806d --- /dev/null +++ b/git/view.go @@ -0,0 +1,68 @@ +package git + +import ( + "path/filepath" + "strings" +) + +// View represents a view on a directory tree that takes into account +// all applicable .gitignore files. The directory tree does NOT need +// to be the repository root. +// +// For example: with a repository root at "myrepo", a view can be +// anchored at "myrepo/someproject" and still respect the ignore +// rules defined at "myrepo/.gitignore". +// +// We use this functionality to synchronize files from a path nested +// in a repository while respecting the repository's ignore rules. +type View struct { + repo *Repository + + // targetPath is the relative path within the repository we care about. + // For example: "." or "a/b". + targetPath string +} + +// Ignore computes whether to ignore the specified path. +// The specified path is relative to the view's target path. +func (v *View) Ignore(path string) bool { + path = filepath.ToSlash(path) + + // Retain trailing slash for directory patterns. + // Needs special handling because it is removed by path cleaning. + trailingSlash := "" + if strings.HasSuffix(path, "/") { + trailingSlash = "/" + } + + return v.repo.Ignore(filepath.Join(v.targetPath, path) + trailingSlash) +} + +func NewView(path string) (*View, error) { + path, err := filepath.Abs(path) + if err != nil { + return nil, err + } + + repo, err := NewRepository(path) + if err != nil { + return nil, err + } + + // Target path must be relative to the repository root path. + targetPath, err := filepath.Rel(repo.rootPath, path) + if err != nil { + return nil, err + } + + // Load ignore files relevant for this view's path. + err = repo.includeIgnoreFilesForPath(targetPath) + if err != nil { + return nil, err + } + + return &View{ + repo: repo, + targetPath: targetPath, + }, nil +} diff --git a/git/view_test.go b/git/view_test.go new file mode 100644 index 00000000..89a03ccf --- /dev/null +++ b/git/view_test.go @@ -0,0 +1,186 @@ +package git + +import ( + "io" + "io/fs" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func copyTestdata(t *testing.T) string { + tempDir := t.TempDir() + + // Copy everything under "testdata" to temporary directory. + err := filepath.WalkDir("testdata", func(path string, d fs.DirEntry, err error) error { + require.NoError(t, err) + + if d.IsDir() { + err := os.MkdirAll(filepath.Join(tempDir, path), 0755) + require.NoError(t, err) + return nil + } + + fin, err := os.Open(path) + require.NoError(t, err) + defer fin.Close() + + fout, err := os.Create(filepath.Join(tempDir, path)) + require.NoError(t, err) + defer fout.Close() + + _, err = io.Copy(fout, fin) + require.NoError(t, err) + return nil + }) + + require.NoError(t, err) + return filepath.Join(tempDir, "testdata") +} + +func createFakeRepo(t *testing.T) string { + absPath := copyTestdata(t) + + // Add .git directory to make it look like a Git repository. + err := os.Mkdir(filepath.Join(absPath, ".git"), 0755) + require.NoError(t, err) + return absPath +} + +func testViewAtRoot(t *testing.T, v *View) { + // Check .gitignore at root. + assert.True(t, v.Ignore("root.sh")) + assert.True(t, v.Ignore("root/foo")) + assert.True(t, v.Ignore("root_double")) + assert.False(t, v.Ignore("newfile")) + assert.True(t, v.Ignore("ignoredirectory/")) + + // Nested .gitignores should not affect root. + assert.False(t, v.Ignore("a.sh")) + + // Nested .gitignores should apply in their path. + assert.True(t, v.Ignore("a/a.sh")) + assert.True(t, v.Ignore("a/whatever/a.sh")) + + // .git must always be ignored. + assert.True(t, v.Ignore(".git")) +} + +func TestViewRootInBricksRepo(t *testing.T) { + v, err := NewView("./testdata") + require.NoError(t, err) + testViewAtRoot(t, v) +} + +func TestViewRootInTempRepo(t *testing.T) { + v, err := NewView(createFakeRepo(t)) + require.NoError(t, err) + testViewAtRoot(t, v) +} + +func TestViewRootInTempDir(t *testing.T) { + v, err := NewView(copyTestdata(t)) + require.NoError(t, err) + testViewAtRoot(t, v) +} + +func testViewAtA(t *testing.T, v *View) { + // Inherit .gitignore from root. + assert.True(t, v.Ignore("root.sh")) + assert.False(t, v.Ignore("root/foo")) + assert.True(t, v.Ignore("root_double")) + assert.True(t, v.Ignore("ignoredirectory/")) + + // Check current .gitignore + assert.True(t, v.Ignore("a.sh")) + assert.True(t, v.Ignore("a_double")) + assert.False(t, v.Ignore("newfile")) + + // Nested .gitignores should apply in their path. + assert.True(t, v.Ignore("b/b.sh")) + assert.True(t, v.Ignore("b/whatever/b.sh")) +} + +func TestViewAInBricksRepo(t *testing.T) { + v, err := NewView("./testdata/a") + require.NoError(t, err) + testViewAtA(t, v) +} + +func TestViewAInTempRepo(t *testing.T) { + v, err := NewView(filepath.Join(createFakeRepo(t), "a")) + require.NoError(t, err) + testViewAtA(t, v) +} + +func TestViewAInTempDir(t *testing.T) { + // Since this is not a fake repo it should not traverse up the tree. + v, err := NewView(filepath.Join(copyTestdata(t), "a")) + require.NoError(t, err) + + // Check that this doesn't inherit .gitignore from root. + assert.False(t, v.Ignore("root.sh")) + assert.False(t, v.Ignore("root/foo")) + assert.False(t, v.Ignore("root_double")) + + // Check current .gitignore + assert.True(t, v.Ignore("a.sh")) + assert.True(t, v.Ignore("a_double")) + assert.False(t, v.Ignore("newfile")) + + // Nested .gitignores should apply in their path. + assert.True(t, v.Ignore("b/b.sh")) + assert.True(t, v.Ignore("b/whatever/b.sh")) +} + +func testViewAtAB(t *testing.T, v *View) { + // Inherit .gitignore from root. + assert.True(t, v.Ignore("root.sh")) + assert.False(t, v.Ignore("root/foo")) + assert.True(t, v.Ignore("root_double")) + assert.True(t, v.Ignore("ignoredirectory/")) + + // Inherit .gitignore from root/a. + assert.True(t, v.Ignore("a.sh")) + assert.True(t, v.Ignore("a_double")) + + // Check current .gitignore + assert.True(t, v.Ignore("b.sh")) + assert.True(t, v.Ignore("b_double")) + assert.False(t, v.Ignore("newfile")) +} + +func TestViewABInBricksRepo(t *testing.T) { + v, err := NewView("./testdata/a/b") + require.NoError(t, err) + testViewAtAB(t, v) +} + +func TestViewABInTempRepo(t *testing.T) { + v, err := NewView(filepath.Join(createFakeRepo(t), "a", "b")) + require.NoError(t, err) + testViewAtAB(t, v) +} + +func TestViewABInTempDir(t *testing.T) { + // Since this is not a fake repo it should not traverse up the tree. + v, err := NewView(filepath.Join(copyTestdata(t), "a", "b")) + require.NoError(t, err) + + // Check that this doesn't inherit .gitignore from root. + assert.False(t, v.Ignore("root.sh")) + assert.False(t, v.Ignore("root/foo")) + assert.False(t, v.Ignore("root_double")) + + // Check that this doesn't inherit .gitignore from root/a. + assert.False(t, v.Ignore("a.sh")) + assert.False(t, v.Ignore("a_double")) + + // Check current .gitignore + assert.True(t, v.Ignore("b.sh")) + assert.True(t, v.Ignore("b_double")) + assert.False(t, v.Ignore("newfile")) +}