Reload .gitignore files if they have changed (#190)

This commit changes the code in repository.go to lazily load gitignore
files as opposed to the previous eager approach. This means that the
signature of the `Ignore` function family has changed to return `(bool,
error)`.

This lazy approach fits better when other code is responsible for
recursively walking the file tree, because we never know up front which
gitignore files need to be loaded to compute the ignores. It also means
we no longer have to "prime" the `Repository` instance with a particular
directory we're interested in and rather let calls to `Ignore` load
whatever is needed.

The fileset wrapper under `git/` internally taints all gitignore objects
to force a call to [os.Stat] followed by a reload if they have changed,
before calling into the [fileset.FileSet] functions for recursively
listing files.
This commit is contained in:
Pieter Noordhuis 2023-01-31 18:34:36 +01:00 committed by GitHub
parent 6737af4b06
commit a7bf7ba6c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 416 additions and 221 deletions

View File

@ -7,40 +7,73 @@ import (
"github.com/databricks/bricks/libs/fileset" "github.com/databricks/bricks/libs/fileset"
) )
// Retuns FileSet for the git repo located at `root` // FileSet is Git repository aware implementation of [fileset.FileSet].
func NewFileSet(root string) (*fileset.FileSet, error) { // It forces checking if gitignore files have been modified every
w := fileset.New(root) // time a call to [FileSet.All] or [FileSet.RecursiveListFiles] is made.
type FileSet struct {
fileset *fileset.FileSet
view *View
}
// NewFileSet returns [FileSet] for the Git repository located at `root`.
func NewFileSet(root string) (*FileSet, error) {
fs := fileset.New(root)
v, err := NewView(root) v, err := NewView(root)
if err != nil { if err != nil {
return nil, err return nil, err
} }
w.SetIgnorer(v) fs.SetIgnorer(v)
return w, nil return &FileSet{
fileset: fs,
view: v,
}, nil
}
func (f *FileSet) IgnoreFile(file string) (bool, error) {
return f.view.IgnoreFile(file)
}
func (f *FileSet) IgnoreDirectory(dir string) (bool, error) {
return f.view.IgnoreDirectory(dir)
}
func (f *FileSet) Root() string {
return f.fileset.Root()
}
func (f *FileSet) All() ([]fileset.File, error) {
f.view.repo.taintIgnoreRules()
return f.fileset.All()
}
func (f *FileSet) RecursiveListFiles(dir string) ([]fileset.File, error) {
f.view.repo.taintIgnoreRules()
return f.fileset.RecursiveListFiles(dir)
} }
// Only call this function for a bricks project root // Only call this function for a bricks project root
// since it will create a .gitignore file if missing // since it will create a .gitignore file if missing
func EnsureValidGitIgnoreExists(w *fileset.FileSet) error { func (f *FileSet) EnsureValidGitIgnoreExists() error {
if w.Ignorer().IgnoreDirectory(".databricks") { ign, err := f.view.IgnoreDirectory(".databricks")
if err != nil {
return err
}
if ign {
return nil return nil
} }
gitIgnorePath := filepath.Join(w.Root(), ".gitignore") gitIgnorePath := filepath.Join(f.fileset.Root(), ".gitignore")
f, err := os.OpenFile(gitIgnorePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644) file, err := os.OpenFile(gitIgnorePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
if err != nil { if err != nil {
return err return err
} }
defer f.Close() defer file.Close()
_, err = f.WriteString("\n.databricks\n")
_, err = file.WriteString("\n.databricks\n")
if err != nil { if err != nil {
return err return err
} }
// Reload view to update ignore rules. f.view.repo.taintIgnoreRules()
v, err := NewView(w.Root())
if err != nil {
return err
}
w.SetIgnorer(v)
return nil return nil
} }

111
git/ignore.go Normal file
View File

@ -0,0 +1,111 @@
package git
import (
"os"
"time"
ignore "github.com/sabhiram/go-gitignore"
)
// ignoreRules implements the interface for a gitignore-like file.
// It is backed by implementations for a file at a specific path (it may not exist)
// and an implementation for a set of in-memory ignore patterns.
type ignoreRules interface {
MatchesPath(path string) (bool, error)
// Taint forces checking if the underlying file needs to be reloaded.
// It checks the mtime of the file to see if has been modified after loading.
Taint()
}
// ignoreFile represents a gitignore file backed by a path.
// If the path doesn't exist (yet), it is treated as an empty file.
type ignoreFile struct {
absPath string
// Signal a reload of this file.
// Set this to call [os.Stat] and a potential reload
// of the file's contents next time it is used.
checkForReload bool
// Modified time for this file.
modTime time.Time
// Ignore patterns contained in this file.
patterns *ignore.GitIgnore
}
func newIgnoreFile(absPath string) ignoreRules {
return &ignoreFile{
absPath: absPath,
checkForReload: true,
}
}
func (f *ignoreFile) MatchesPath(path string) (bool, error) {
if f.checkForReload {
err := f.load()
if err != nil {
return false, err
}
// Don't check again in next call.
f.checkForReload = false
}
// A file that doesn't exist doesn't have ignore patterns.
if f.patterns == nil {
return false, nil
}
return f.patterns.MatchesPath(path), nil
}
func (f *ignoreFile) Taint() {
f.checkForReload = true
}
func (f *ignoreFile) load() error {
// The file must be stat-able.
// If it doesn't exist, treat it as an empty file.
stat, err := os.Stat(f.absPath)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
// If the underlying file has not been modified
// it does not need to be reloaded.
if stat.ModTime() == f.modTime {
return nil
}
f.modTime = stat.ModTime()
f.patterns, err = ignore.CompileIgnoreFile(f.absPath)
if err != nil {
return err
}
return nil
}
// stringIgnoreRules implements the [ignoreRules] interface
// for a set of in-memory ignore patterns.
type stringIgnoreRules struct {
patterns *ignore.GitIgnore
}
func newStringIgnoreRules(patterns []string) ignoreRules {
return &stringIgnoreRules{
patterns: ignore.CompileIgnoreLines(patterns...),
}
}
func (r *stringIgnoreRules) MatchesPath(path string) (bool, error) {
return r.patterns.MatchesPath(path), nil
}
func (r *stringIgnoreRules) Taint() {
// Tainting in-memory ignore patterns is a nop.
}

76
git/ignore_test.go Normal file
View File

@ -0,0 +1,76 @@
package git
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestIgnoreFile(t *testing.T) {
var ign bool
var err error
f := newIgnoreFile("./testdata/.gitignore")
ign, err = f.MatchesPath("root.foo")
require.NoError(t, err)
assert.True(t, ign)
ign, err = f.MatchesPath("i'm included")
require.NoError(t, err)
assert.False(t, ign)
}
func TestIgnoreFileDoesntExist(t *testing.T) {
var ign bool
var err error
// Files that don't exist are treated as an empty gitignore file.
f := newIgnoreFile("./testdata/thispathdoesntexist")
ign, err = f.MatchesPath("i'm included")
require.NoError(t, err)
assert.False(t, ign)
}
func TestIgnoreFileTaint(t *testing.T) {
var ign bool
var err error
tempDir := t.TempDir()
gitIgnorePath := filepath.Join(tempDir, ".gitignore")
// Files that don't exist are treated as an empty gitignore file.
f := newIgnoreFile(gitIgnorePath)
ign, err = f.MatchesPath("hello")
require.NoError(t, err)
assert.False(t, ign)
// Now create the .gitignore file.
err = os.WriteFile(gitIgnorePath, []byte("hello"), 0644)
require.NoError(t, err)
// Verify that the match still doesn't happen (no spontaneous reload).
ign, err = f.MatchesPath("hello")
require.NoError(t, err)
assert.False(t, ign)
// Now taint the file to force a reload and verify that the match does happen.
f.Taint()
ign, err = f.MatchesPath("hello")
require.NoError(t, err)
assert.True(t, ign)
}
func TestStringIgnoreRules(t *testing.T) {
var ign bool
var err error
f := newStringIgnoreRules([]string{"hello"})
ign, err = f.MatchesPath("hello")
require.NoError(t, err)
assert.True(t, ign)
ign, err = f.MatchesPath("world")
require.NoError(t, err)
assert.False(t, ign)
}

View File

@ -1,15 +1,12 @@
package git package git
import ( import (
"fmt"
"io/fs"
"os" "os"
"path" "path"
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/databricks/bricks/folders" "github.com/databricks/bricks/folders"
ignore "github.com/sabhiram/go-gitignore"
) )
const gitIgnoreFileName = ".gitignore" const gitIgnoreFileName = ".gitignore"
@ -31,114 +28,42 @@ type Repository struct {
// //
// Note: prefixes use the forward slash instead of the // Note: prefixes use the forward slash instead of the
// OS-specific path separator. This matches Git convention. // OS-specific path separator. This matches Git convention.
ignore map[string][]*ignore.GitIgnore ignore map[string][]ignoreRules
} }
func (r *Repository) includeIgnoreFile(relativeIgnoreFilePath, relativeTo string) error { // newIgnoreFile constructs a new [ignoreRules] implementation backed by
absPath := filepath.Join(r.rootPath, relativeIgnoreFilePath) // a file using the specified path relative to the repository root.
func (r *Repository) newIgnoreFile(relativeIgnoreFilePath string) ignoreRules {
// The file must be stat-able and not a directory. return newIgnoreFile(filepath.Join(r.rootPath, relativeIgnoreFilePath))
// If it doesn't exist or is a directory, do nothing.
stat, err := os.Stat(absPath)
if err != nil || stat.IsDir() {
return nil
}
ignore, err := ignore.CompileIgnoreFile(absPath)
if err != nil {
return err
}
relativeTo = path.Clean(filepath.ToSlash(relativeTo))
r.ignore[relativeTo] = append(r.ignore[relativeTo], ignore)
return nil
} }
// Include ignore files in directories that are parent to `relPath`. // getIgnoreRules returns a slice of [ignoreRules] that apply
// // for the specified prefix. The prefix must be cleaned by the caller.
// If equal to "foo/bar" this loads ignore files // It lazily initializes an entry for the specified prefix if it
// located at the repository root and in the directory "foo". // doesn't yet exist.
// func (r *Repository) getIgnoreRules(prefix string) []ignoreRules {
// If equal to "." this function does nothing. fs, ok := r.ignore[prefix]
func (r *Repository) includeIgnoreFilesUpToPath(relPath string) error { if ok {
// Accumulate list of directories to load ignore file from. return fs
paths := []string{
".",
}
for _, path := range strings.Split(relPath, string(os.PathSeparator)) {
paths = append(paths, filepath.Join(paths[len(paths)-1], path))
} }
// Load ignore files. r.ignore[prefix] = append(r.ignore[prefix], r.newIgnoreFile(filepath.Join(prefix, gitIgnoreFileName)))
for _, path := range paths { return r.ignore[prefix]
// Path equal to `relPath` is loaded by [includeIgnoreFilesUnderPath]. }
if path == relPath {
continue // taintIgnoreRules taints all ignore rules such that the underlying files
} // are checked for modification next time they are needed.
err := r.includeIgnoreFile(filepath.Join(path, gitIgnoreFileName), path) func (r *Repository) taintIgnoreRules() {
if err != nil { for _, fs := range r.ignore {
return err for _, f := range fs {
f.Taint()
} }
} }
return nil
}
// Include ignore files in directories that are equal to or nested under `relPath`.
func (r *Repository) includeIgnoreFilesUnderPath(relPath string) error {
absPath := filepath.Join(r.rootPath, relPath)
err := filepath.WalkDir(absPath, r.includeIgnoreFilesWalkDirFn)
if err != nil {
return fmt.Errorf("unable to walk directory: %w", err)
}
return nil
}
// includeIgnoreFilesWalkDirFn is called from [filepath.WalkDir] in includeIgnoreFilesUnderPath.
func (r *Repository) includeIgnoreFilesWalkDirFn(absPath string, d fs.DirEntry, err error) error {
if err != nil {
// If reading the target path fails bubble up the error.
if d == nil {
return err
}
// Ignore failure to read paths nested under the target path.
return filepath.SkipDir
}
// Get path relative to root path.
pathRelativeToRoot, err := filepath.Rel(r.rootPath, absPath)
if err != nil {
return err
}
// Check if directory is ignored before recursing into it.
if d.IsDir() && r.Ignore(pathRelativeToRoot) {
return filepath.SkipDir
}
// Load .gitignore if we find one.
if d.Name() == gitIgnoreFileName {
err := r.includeIgnoreFile(pathRelativeToRoot, filepath.Dir(pathRelativeToRoot))
if err != nil {
return err
}
}
return nil
}
// Include ignore files relevant for files nested under `relPath`.
func (r *Repository) includeIgnoreFilesForPath(relPath string) error {
err := r.includeIgnoreFilesUpToPath(relPath)
if err != nil {
return err
}
return r.includeIgnoreFilesUnderPath(relPath)
} }
// Ignore computes whether to ignore the specified path. // Ignore computes whether to ignore the specified path.
// The specified path is relative to the repository root path. // The specified path is relative to the repository root path.
func (r *Repository) Ignore(relPath string) bool { func (r *Repository) Ignore(relPath string) (bool, error) {
parts := strings.Split(filepath.ToSlash(relPath), "/") parts := strings.Split(filepath.ToSlash(relPath), "/")
// Retain trailing slash for directory patterns. // Retain trailing slash for directory patterns.
@ -156,19 +81,19 @@ func (r *Repository) Ignore(relPath string) bool {
suffix := path.Clean(strings.Join(parts[i:], "/")) + trailingSlash suffix := path.Clean(strings.Join(parts[i:], "/")) + trailingSlash
// For this prefix (e.g. ".", or "dir1/dir2") we check if the // For this prefix (e.g. ".", or "dir1/dir2") we check if the
// suffix is matched in the respective ignore files. // suffix is matched in the respective gitignore files.
fs, ok := r.ignore[prefix] for _, rules := range r.getIgnoreRules(prefix) {
if !ok { match, err := rules.MatchesPath(suffix)
continue if err != nil {
} return false, err
for _, f := range fs { }
if f.MatchesPath(suffix) { if match {
return true return true, nil
} }
} }
} }
return false return false, nil
} }
func NewRepository(path string) (*Repository, error) { func NewRepository(path string) (*Repository, error) {
@ -192,16 +117,22 @@ func NewRepository(path string) (*Repository, error) {
repo := &Repository{ repo := &Repository{
real: real, real: real,
rootPath: rootPath, rootPath: rootPath,
ignore: make(map[string][]*ignore.GitIgnore), ignore: make(map[string][]ignoreRules),
} }
// Always ignore ".git" directory. // Initialize root ignore rules.
repo.ignore["."] = append(repo.ignore["."], ignore.CompileIgnoreLines(".git")) // These are special and not lazily initialized because:
// 1) we include a hardcoded ignore pattern
// Load repository-wide excludes file. // 2) we include a gitignore file at a non-standard path
err = repo.includeIgnoreFile(filepath.Join(".git", "info", "excludes"), ".") repo.ignore["."] = []ignoreRules{
if err != nil { // Always ignore root .git directory.
return nil, err newStringIgnoreRules([]string{
".git",
}),
// Load repository-wide excludes file.
repo.newIgnoreFile(".git/info/excludes"),
// Load root gitignore file.
repo.newIgnoreFile(".gitignore"),
} }
return repo, nil return repo, nil

View File

@ -8,21 +8,30 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
// Wrap a Repository and expose a panicking version of [Repository.Ignore].
type testRepository struct {
t *testing.T
r *Repository
}
func (r *testRepository) Ignore(relPath string) bool {
ign, err := r.r.Ignore(relPath)
require.NoError(r.t, err)
return ign
}
func TestRepository(t *testing.T) { func TestRepository(t *testing.T) {
// Load this repository as test. // Load this repository as test.
repo, err := NewRepository("..") repo, err := NewRepository("..")
require.NoError(t, err) tr := testRepository{t, repo}
// Load all .gitignore files in this repository.
err = repo.includeIgnoreFilesForPath(".")
require.NoError(t, err) require.NoError(t, err)
// Check that top level ignores work. // Check that top level ignores work.
assert.True(t, repo.Ignore(".DS_Store")) assert.True(t, tr.Ignore(".DS_Store"))
assert.True(t, repo.Ignore("foo.pyc")) assert.True(t, tr.Ignore("foo.pyc"))
assert.False(t, repo.Ignore("vendor")) assert.False(t, tr.Ignore("vendor"))
assert.True(t, repo.Ignore("vendor/")) assert.True(t, tr.Ignore("vendor/"))
// Check that ignores under testdata work. // Check that ignores under testdata work.
assert.True(t, repo.Ignore(filepath.Join("git", "testdata", "root.ignoreme"))) assert.True(t, tr.Ignore(filepath.Join("git", "testdata", "root.ignoreme")))
} }

View File

@ -28,7 +28,7 @@ type View struct {
// Ignore computes whether to ignore the specified path. // Ignore computes whether to ignore the specified path.
// The specified path is relative to the view's target path. // The specified path is relative to the view's target path.
func (v *View) Ignore(path string) bool { func (v *View) Ignore(path string) (bool, error) {
path = filepath.ToSlash(path) path = filepath.ToSlash(path)
// Retain trailing slash for directory patterns. // Retain trailing slash for directory patterns.
@ -45,7 +45,7 @@ func (v *View) Ignore(path string) bool {
// apply to the specified file path. // apply to the specified file path.
// //
// This function is provided to implement [fileset.Ignorer]. // This function is provided to implement [fileset.Ignorer].
func (v *View) IgnoreFile(file string) bool { func (v *View) IgnoreFile(file string) (bool, error) {
return v.Ignore(file) return v.Ignore(file)
} }
@ -58,8 +58,15 @@ func (v *View) IgnoreFile(file string) bool {
// with a trailing slash. // with a trailing slash.
// //
// This function is provided to implement [fileset.Ignorer]. // This function is provided to implement [fileset.Ignorer].
func (v *View) IgnoreDirectory(dir string) bool { func (v *View) IgnoreDirectory(dir string) (bool, error) {
return v.Ignore(dir) || v.Ignore(dir+"/") ign, err := v.Ignore(dir)
if err != nil {
return false, err
}
if ign {
return ign, nil
}
return v.Ignore(dir + "/")
} }
func NewView(path string) (*View, error) { func NewView(path string) (*View, error) {
@ -79,12 +86,6 @@ func NewView(path string) (*View, error) {
return nil, err return nil, err
} }
// Load ignore files relevant for this view's path.
err = repo.includeIgnoreFilesForPath(targetPath)
if err != nil {
return nil, err
}
return &View{ return &View{
repo: repo, repo: repo,
targetPath: targetPath, targetPath: targetPath,

View File

@ -50,137 +50,151 @@ func createFakeRepo(t *testing.T) string {
return absPath return absPath
} }
func testViewAtRoot(t *testing.T, v *View) { // Wrap a View and expose a panicking version of [View.Ignore].
type testView struct {
t *testing.T
v *View
}
func (v *testView) Ignore(relPath string) bool {
ign, err := v.v.Ignore(relPath)
require.NoError(v.t, err)
return ign
}
func testViewAtRoot(t *testing.T, tv testView) {
// Check .gitignore at root. // Check .gitignore at root.
assert.True(t, v.Ignore("root.sh")) assert.True(t, tv.Ignore("root.sh"))
assert.True(t, v.Ignore("root/foo")) assert.True(t, tv.Ignore("root/foo"))
assert.True(t, v.Ignore("root_double")) assert.True(t, tv.Ignore("root_double"))
assert.False(t, v.Ignore("newfile")) assert.False(t, tv.Ignore("newfile"))
assert.True(t, v.Ignore("ignoredirectory/")) assert.True(t, tv.Ignore("ignoredirectory/"))
// Nested .gitignores should not affect root. // Nested .gitignores should not affect root.
assert.False(t, v.Ignore("a.sh")) assert.False(t, tv.Ignore("a.sh"))
// Nested .gitignores should apply in their path. // Nested .gitignores should apply in their path.
assert.True(t, v.Ignore("a/a.sh")) assert.True(t, tv.Ignore("a/a.sh"))
assert.True(t, v.Ignore("a/whatever/a.sh")) assert.True(t, tv.Ignore("a/whatever/a.sh"))
// .git must always be ignored. // .git must always be ignored.
assert.True(t, v.Ignore(".git")) assert.True(t, tv.Ignore(".git"))
} }
func TestViewRootInBricksRepo(t *testing.T) { func TestViewRootInBricksRepo(t *testing.T) {
v, err := NewView("./testdata") v, err := NewView("./testdata")
require.NoError(t, err) require.NoError(t, err)
testViewAtRoot(t, v) testViewAtRoot(t, testView{t, v})
} }
func TestViewRootInTempRepo(t *testing.T) { func TestViewRootInTempRepo(t *testing.T) {
v, err := NewView(createFakeRepo(t)) v, err := NewView(createFakeRepo(t))
require.NoError(t, err) require.NoError(t, err)
testViewAtRoot(t, v) testViewAtRoot(t, testView{t, v})
} }
func TestViewRootInTempDir(t *testing.T) { func TestViewRootInTempDir(t *testing.T) {
v, err := NewView(copyTestdata(t)) v, err := NewView(copyTestdata(t))
require.NoError(t, err) require.NoError(t, err)
testViewAtRoot(t, v) testViewAtRoot(t, testView{t, v})
} }
func testViewAtA(t *testing.T, v *View) { func testViewAtA(t *testing.T, tv testView) {
// Inherit .gitignore from root. // Inherit .gitignore from root.
assert.True(t, v.Ignore("root.sh")) assert.True(t, tv.Ignore("root.sh"))
assert.False(t, v.Ignore("root/foo")) assert.False(t, tv.Ignore("root/foo"))
assert.True(t, v.Ignore("root_double")) assert.True(t, tv.Ignore("root_double"))
assert.True(t, v.Ignore("ignoredirectory/")) assert.True(t, tv.Ignore("ignoredirectory/"))
// Check current .gitignore // Check current .gitignore
assert.True(t, v.Ignore("a.sh")) assert.True(t, tv.Ignore("a.sh"))
assert.True(t, v.Ignore("a_double")) assert.True(t, tv.Ignore("a_double"))
assert.False(t, v.Ignore("newfile")) assert.False(t, tv.Ignore("newfile"))
// Nested .gitignores should apply in their path. // Nested .gitignores should apply in their path.
assert.True(t, v.Ignore("b/b.sh")) assert.True(t, tv.Ignore("b/b.sh"))
assert.True(t, v.Ignore("b/whatever/b.sh")) assert.True(t, tv.Ignore("b/whatever/b.sh"))
} }
func TestViewAInBricksRepo(t *testing.T) { func TestViewAInBricksRepo(t *testing.T) {
v, err := NewView("./testdata/a") v, err := NewView("./testdata/a")
require.NoError(t, err) require.NoError(t, err)
testViewAtA(t, v) testViewAtA(t, testView{t, v})
} }
func TestViewAInTempRepo(t *testing.T) { func TestViewAInTempRepo(t *testing.T) {
v, err := NewView(filepath.Join(createFakeRepo(t), "a")) v, err := NewView(filepath.Join(createFakeRepo(t), "a"))
require.NoError(t, err) require.NoError(t, err)
testViewAtA(t, v) testViewAtA(t, testView{t, v})
} }
func TestViewAInTempDir(t *testing.T) { func TestViewAInTempDir(t *testing.T) {
// Since this is not a fake repo it should not traverse up the tree. // Since this is not a fake repo it should not traverse up the tree.
v, err := NewView(filepath.Join(copyTestdata(t), "a")) v, err := NewView(filepath.Join(copyTestdata(t), "a"))
require.NoError(t, err) require.NoError(t, err)
tv := testView{t, v}
// Check that this doesn't inherit .gitignore from root. // Check that this doesn't inherit .gitignore from root.
assert.False(t, v.Ignore("root.sh")) assert.False(t, tv.Ignore("root.sh"))
assert.False(t, v.Ignore("root/foo")) assert.False(t, tv.Ignore("root/foo"))
assert.False(t, v.Ignore("root_double")) assert.False(t, tv.Ignore("root_double"))
// Check current .gitignore // Check current .gitignore
assert.True(t, v.Ignore("a.sh")) assert.True(t, tv.Ignore("a.sh"))
assert.True(t, v.Ignore("a_double")) assert.True(t, tv.Ignore("a_double"))
assert.False(t, v.Ignore("newfile")) assert.False(t, tv.Ignore("newfile"))
// Nested .gitignores should apply in their path. // Nested .gitignores should apply in their path.
assert.True(t, v.Ignore("b/b.sh")) assert.True(t, tv.Ignore("b/b.sh"))
assert.True(t, v.Ignore("b/whatever/b.sh")) assert.True(t, tv.Ignore("b/whatever/b.sh"))
} }
func testViewAtAB(t *testing.T, v *View) { func testViewAtAB(t *testing.T, tv testView) {
// Inherit .gitignore from root. // Inherit .gitignore from root.
assert.True(t, v.Ignore("root.sh")) assert.True(t, tv.Ignore("root.sh"))
assert.False(t, v.Ignore("root/foo")) assert.False(t, tv.Ignore("root/foo"))
assert.True(t, v.Ignore("root_double")) assert.True(t, tv.Ignore("root_double"))
assert.True(t, v.Ignore("ignoredirectory/")) assert.True(t, tv.Ignore("ignoredirectory/"))
// Inherit .gitignore from root/a. // Inherit .gitignore from root/a.
assert.True(t, v.Ignore("a.sh")) assert.True(t, tv.Ignore("a.sh"))
assert.True(t, v.Ignore("a_double")) assert.True(t, tv.Ignore("a_double"))
// Check current .gitignore // Check current .gitignore
assert.True(t, v.Ignore("b.sh")) assert.True(t, tv.Ignore("b.sh"))
assert.True(t, v.Ignore("b_double")) assert.True(t, tv.Ignore("b_double"))
assert.False(t, v.Ignore("newfile")) assert.False(t, tv.Ignore("newfile"))
} }
func TestViewABInBricksRepo(t *testing.T) { func TestViewABInBricksRepo(t *testing.T) {
v, err := NewView("./testdata/a/b") v, err := NewView("./testdata/a/b")
require.NoError(t, err) require.NoError(t, err)
testViewAtAB(t, v) testViewAtAB(t, testView{t, v})
} }
func TestViewABInTempRepo(t *testing.T) { func TestViewABInTempRepo(t *testing.T) {
v, err := NewView(filepath.Join(createFakeRepo(t), "a", "b")) v, err := NewView(filepath.Join(createFakeRepo(t), "a", "b"))
require.NoError(t, err) require.NoError(t, err)
testViewAtAB(t, v) testViewAtAB(t, testView{t, v})
} }
func TestViewABInTempDir(t *testing.T) { func TestViewABInTempDir(t *testing.T) {
// Since this is not a fake repo it should not traverse up the tree. // Since this is not a fake repo it should not traverse up the tree.
v, err := NewView(filepath.Join(copyTestdata(t), "a", "b")) v, err := NewView(filepath.Join(copyTestdata(t), "a", "b"))
tv := testView{t, v}
require.NoError(t, err) require.NoError(t, err)
// Check that this doesn't inherit .gitignore from root. // Check that this doesn't inherit .gitignore from root.
assert.False(t, v.Ignore("root.sh")) assert.False(t, tv.Ignore("root.sh"))
assert.False(t, v.Ignore("root/foo")) assert.False(t, tv.Ignore("root/foo"))
assert.False(t, v.Ignore("root_double")) assert.False(t, tv.Ignore("root_double"))
// Check that this doesn't inherit .gitignore from root/a. // Check that this doesn't inherit .gitignore from root/a.
assert.False(t, v.Ignore("a.sh")) assert.False(t, tv.Ignore("a.sh"))
assert.False(t, v.Ignore("a_double")) assert.False(t, tv.Ignore("a_double"))
// Check current .gitignore // Check current .gitignore
assert.True(t, v.Ignore("b.sh")) assert.True(t, tv.Ignore("b.sh"))
assert.True(t, v.Ignore("b_double")) assert.True(t, tv.Ignore("b_double"))
assert.False(t, v.Ignore("newfile")) assert.False(t, tv.Ignore("newfile"))
} }

View File

@ -1,6 +1,7 @@
package fileset package fileset
import ( import (
"fmt"
"io/fs" "io/fs"
"path/filepath" "path/filepath"
) )
@ -55,13 +56,21 @@ func (w *FileSet) RecursiveListFiles(dir string) (fileList []File, err error) {
} }
if d.IsDir() { if d.IsDir() {
if w.ignore.IgnoreDirectory(relPath) { ign, err := w.ignore.IgnoreDirectory(relPath)
if err != nil {
return fmt.Errorf("cannot check if %s should be ignored: %w", relPath, err)
}
if ign {
return filepath.SkipDir return filepath.SkipDir
} }
return nil return nil
} }
if w.ignore.IgnoreFile(relPath) { ign, err := w.ignore.IgnoreFile(relPath)
if err != nil {
return fmt.Errorf("cannot check if %s should be ignored: %w", relPath, err)
}
if ign {
return nil return nil
} }

View File

@ -3,17 +3,17 @@ package fileset
// Ignorer is the interface for what determines if a path // Ignorer is the interface for what determines if a path
// in the [FileSet] must be ignored or not. // in the [FileSet] must be ignored or not.
type Ignorer interface { type Ignorer interface {
IgnoreFile(path string) bool IgnoreFile(path string) (bool, error)
IgnoreDirectory(path string) bool IgnoreDirectory(path string) (bool, error)
} }
// nopIgnorer implements an [Ignorer] that doesn't ignore anything. // nopIgnorer implements an [Ignorer] that doesn't ignore anything.
type nopIgnorer struct{} type nopIgnorer struct{}
func (nopIgnorer) IgnoreFile(path string) bool { func (nopIgnorer) IgnoreFile(path string) (bool, error) {
return false return false, nil
} }
func (nopIgnorer) IgnoreDirectory(path string) bool { func (nopIgnorer) IgnoreDirectory(path string) (bool, error) {
return false return false, nil
} }

View File

@ -8,7 +8,6 @@ import (
"time" "time"
"github.com/databricks/bricks/git" "github.com/databricks/bricks/git"
"github.com/databricks/bricks/libs/fileset"
"github.com/databricks/bricks/libs/sync/repofiles" "github.com/databricks/bricks/libs/sync/repofiles"
"github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go"
) )
@ -31,7 +30,7 @@ type SyncOptions struct {
type Sync struct { type Sync struct {
*SyncOptions *SyncOptions
fileSet *fileset.FileSet fileSet *git.FileSet
snapshot *Snapshot snapshot *Snapshot
repoFiles *repofiles.RepoFiles repoFiles *repofiles.RepoFiles
} }
@ -42,7 +41,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = git.EnsureValidGitIgnoreExists(fileSet) err = fileSet.EnsureValidGitIgnoreExists()
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -8,7 +8,6 @@ import (
"sync" "sync"
"github.com/databricks/bricks/git" "github.com/databricks/bricks/git"
"github.com/databricks/bricks/libs/fileset"
"github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/commands" "github.com/databricks/databricks-sdk-go/service/commands"
"github.com/databricks/databricks-sdk-go/service/scim" "github.com/databricks/databricks-sdk-go/service/scim"
@ -27,7 +26,7 @@ type project struct {
environment *Environment environment *Environment
wsc *databricks.WorkspaceClient wsc *databricks.WorkspaceClient
me *scim.User me *scim.User
fileSet *fileset.FileSet fileSet *git.FileSet
} }
// Configure is used as a PreRunE function for all commands that // Configure is used as a PreRunE function for all commands that
@ -70,7 +69,7 @@ func Initialize(ctx context.Context, root, env string) (context.Context, error)
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = git.EnsureValidGitIgnoreExists(fileSet) err = fileSet.EnsureValidGitIgnoreExists()
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -120,7 +119,7 @@ func (p *project) Root() string {
return p.root return p.root
} }
func (p *project) GetFileSet() *fileset.FileSet { func (p *project) GetFileSet() *git.FileSet {
return p.fileSet return p.fileSet
} }
@ -133,7 +132,11 @@ func (p *project) GetFileSet() *fileset.FileSet {
// accidentally check into git // accidentally check into git
func (p *project) CacheDir() (string, error) { func (p *project) CacheDir() (string, error) {
// assert cache dir is present in git ignore // assert cache dir is present in git ignore
if !p.fileSet.Ignorer().IgnoreDirectory(fmt.Sprintf("/%s/", CacheDirName)) { ign, err := p.fileSet.IgnoreDirectory(fmt.Sprintf("/%s/", CacheDirName))
if err != nil {
return "", fmt.Errorf("failed to check if directory %s is ignored: %w", CacheDirName, err)
}
if !ign {
return "", fmt.Errorf("please add /%s/ to .gitignore", CacheDirName) return "", fmt.Errorf("please add /%s/ to .gitignore", CacheDirName)
} }

View File

@ -6,6 +6,7 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"testing" "testing"
"time"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -57,6 +58,14 @@ func TestProjectInitializationAddsCacheDirToGitIgnore(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.Contains(t, string(fileBytes), ".databricks") assert.Contains(t, string(fileBytes), ".databricks")
// Muck with mtime of this file manually because in GitHub Actions runners the
// mtime isn't updated on write automatically (probably to save I/Os).
// We perform a reload of .gitignore files only if their mtime has changed.
// Add a minute to ensure it is different if the value is truncated to full seconds.
future := time.Now().Add(time.Minute)
err = os.Chtimes(gitIgnorePath, future, future)
require.NoError(t, err)
prj := Get(ctx) prj := Get(ctx)
_, err = prj.CacheDir() _, err = prj.CacheDir()
assert.NoError(t, err) assert.NoError(t, err)