Hardcode `.databricks` ignore pattern to ensure we never sync the cache directory (#295)

## Changes
<!-- Summary of your changes that are easy to understand -->
1. Add pattern to always ignore .databricks
2. Best effort creation of .gitignore with .databricks if it's needed

## Tests
<!-- How is this tested? -->
This commit is contained in:
shreyas-goenka 2023-04-04 15:44:57 +02:00 committed by GitHub
parent 8de7d32ed1
commit 902813a490
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 160 additions and 41 deletions

View File

@ -1,9 +1,6 @@
package git
import (
"os"
"path/filepath"
"github.com/databricks/bricks/libs/fileset"
)
@ -51,29 +48,6 @@ func (f *FileSet) RecursiveListFiles(dir string) ([]fileset.File, error) {
return f.fileset.RecursiveListFiles(dir)
}
// Only call this function for a bricks project root
// since it will create a .gitignore file if missing
func (f *FileSet) EnsureValidGitIgnoreExists() error {
ign, err := f.view.IgnoreDirectory(".databricks")
if err != nil {
return err
}
if ign {
return nil
}
gitIgnorePath := filepath.Join(f.fileset.Root(), ".gitignore")
file, err := os.OpenFile(gitIgnorePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return err
}
defer file.Close()
_, err = file.WriteString("\n.databricks\n")
if err != nil {
return err
}
f.view.repo.taintIgnoreRules()
return nil
return f.view.EnsureValidGitIgnoreExists()
}

View File

@ -169,3 +169,7 @@ func NewRepository(path string) (*Repository, error) {
return repo, nil
}
func (r *Repository) addIgnoreRule(rule ignoreRules) {
r.ignore["."] = append(r.ignore["."], rule)
}

View File

@ -0,0 +1 @@
.databricks

View File

@ -0,0 +1 @@
foo.py

View File

@ -1,6 +1,7 @@
package git
import (
"os"
"path/filepath"
"strings"
)
@ -91,3 +92,37 @@ func NewView(path string) (*View, error) {
targetPath: targetPath,
}, nil
}
func (v *View) EnsureValidGitIgnoreExists() error {
ign, err := v.IgnoreDirectory(".databricks")
if err != nil {
return err
}
// return early if .databricks is already being ignored
if ign {
return nil
}
// Create .gitignore with .databricks entry
gitIgnorePath := filepath.Join(v.repo.Root(), v.targetPath, ".gitignore")
file, err := os.OpenFile(gitIgnorePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return err
}
defer file.Close()
// Hard code .databricks ignore pattern so that we never sync it (irrespective)
// of .gitignore patterns
v.repo.addIgnoreRule(newStringIgnoreRules([]string{
".databricks",
}))
_, err = file.WriteString("\n.databricks\n")
if err != nil {
return err
}
v.repo.taintIgnoreRules()
return nil
}

View File

@ -11,11 +11,11 @@ import (
"github.com/stretchr/testify/require"
)
func copyTestdata(t *testing.T) string {
func copyTestdata(t *testing.T, name string) string {
tempDir := t.TempDir()
// Copy everything under "testdata" to temporary directory.
err := filepath.WalkDir("testdata", func(path string, d fs.DirEntry, err error) error {
// Copy everything under testdata ${name} to temporary directory.
err := filepath.WalkDir(name, func(path string, d fs.DirEntry, err error) error {
require.NoError(t, err)
if d.IsDir() {
@ -38,11 +38,11 @@ func copyTestdata(t *testing.T) string {
})
require.NoError(t, err)
return filepath.Join(tempDir, "testdata")
return filepath.Join(tempDir, name)
}
func createFakeRepo(t *testing.T) string {
absPath := copyTestdata(t)
func createFakeRepo(t *testing.T, testdataName string) string {
absPath := copyTestdata(t, testdataName)
// Add .git directory to make it look like a Git repository.
err := os.Mkdir(filepath.Join(absPath, ".git"), 0755)
@ -88,13 +88,13 @@ func TestViewRootInBricksRepo(t *testing.T) {
}
func TestViewRootInTempRepo(t *testing.T) {
v, err := NewView(createFakeRepo(t))
v, err := NewView(createFakeRepo(t, "testdata"))
require.NoError(t, err)
testViewAtRoot(t, testView{t, v})
}
func TestViewRootInTempDir(t *testing.T) {
v, err := NewView(copyTestdata(t))
v, err := NewView(copyTestdata(t, "testdata"))
require.NoError(t, err)
testViewAtRoot(t, testView{t, v})
}
@ -123,14 +123,14 @@ func TestViewAInBricksRepo(t *testing.T) {
}
func TestViewAInTempRepo(t *testing.T) {
v, err := NewView(filepath.Join(createFakeRepo(t), "a"))
v, err := NewView(filepath.Join(createFakeRepo(t, "testdata"), "a"))
require.NoError(t, err)
testViewAtA(t, testView{t, v})
}
func TestViewAInTempDir(t *testing.T) {
// Since this is not a fake repo it should not traverse up the tree.
v, err := NewView(filepath.Join(copyTestdata(t), "a"))
v, err := NewView(filepath.Join(copyTestdata(t, "testdata"), "a"))
require.NoError(t, err)
tv := testView{t, v}
@ -173,14 +173,14 @@ func TestViewABInBricksRepo(t *testing.T) {
}
func TestViewABInTempRepo(t *testing.T) {
v, err := NewView(filepath.Join(createFakeRepo(t), "a", "b"))
v, err := NewView(filepath.Join(createFakeRepo(t, "testdata"), "a", "b"))
require.NoError(t, err)
testViewAtAB(t, testView{t, v})
}
func TestViewABInTempDir(t *testing.T) {
// Since this is not a fake repo it should not traverse up the tree.
v, err := NewView(filepath.Join(copyTestdata(t), "a", "b"))
v, err := NewView(filepath.Join(copyTestdata(t, "testdata"), "a", "b"))
tv := testView{t, v}
require.NoError(t, err)
@ -198,3 +198,107 @@ func TestViewABInTempDir(t *testing.T) {
assert.True(t, tv.Ignore("b_double"))
assert.False(t, tv.Ignore("newfile"))
}
func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredAtRoot(t *testing.T) {
expected, err := os.ReadFile("./testdata_view_ignore/.gitignore")
require.NoError(t, err)
repoPath := createFakeRepo(t, "testdata_view_ignore")
// Since root .gitignore already has .databricks, there should be no edits
// to root .gitignore
v, err := NewView(repoPath)
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
require.NoError(t, err)
actual, err := os.ReadFile(filepath.Join(repoPath, ".gitignore"))
require.NoError(t, err)
assert.Equal(t, string(expected), string(actual))
}
func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredInSubdir(t *testing.T) {
expected, err := os.ReadFile("./testdata_view_ignore/a/.gitignore")
require.NoError(t, err)
repoPath := createFakeRepo(t, "testdata_view_ignore")
// Since root .gitignore already has .databricks, there should be no edits
// to a/.gitignore
v, err := NewView(filepath.Join(repoPath, "a"))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
require.NoError(t, err)
actual, err := os.ReadFile(filepath.Join(repoPath, v.targetPath, ".gitignore"))
require.NoError(t, err)
assert.Equal(t, string(expected), string(actual))
}
func TestViewAddsGitignoreWithCacheDir(t *testing.T) {
repoPath := createFakeRepo(t, "testdata")
err := os.Remove(filepath.Join(repoPath, ".gitignore"))
assert.NoError(t, err)
// Since root .gitignore was deleted, new view adds .databricks to root .gitignore
v, err := NewView(repoPath)
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
require.NoError(t, err)
actual, err := os.ReadFile(filepath.Join(repoPath, ".gitignore"))
require.NoError(t, err)
assert.Contains(t, string(actual), "\n.databricks\n")
}
func TestViewAddsGitignoreWithCacheDirAtSubdir(t *testing.T) {
repoPath := createFakeRepo(t, "testdata")
err := os.Remove(filepath.Join(repoPath, ".gitignore"))
require.NoError(t, err)
// Since root .gitignore was deleted, new view adds .databricks to a/.gitignore
v, err := NewView(filepath.Join(repoPath, "a"))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
require.NoError(t, err)
actual, err := os.ReadFile(filepath.Join(repoPath, v.targetPath, ".gitignore"))
require.NoError(t, err)
// created .gitignore has cache dir listed
assert.Contains(t, string(actual), "\n.databricks\n")
assert.NoFileExists(t, filepath.Join(repoPath, ".gitignore"))
}
func TestViewAlwaysIgnoresCacheDir(t *testing.T) {
repoPath := createFakeRepo(t, "testdata")
v, err := NewView(repoPath)
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
require.NoError(t, err)
// Delete root .gitignore which contains .databricks entry
err = os.Remove(filepath.Join(repoPath, ".gitignore"))
require.NoError(t, err)
// taint rules to reload .gitignore
v.repo.taintIgnoreRules()
// assert .databricks is still being ignored
ign1, err := v.IgnoreDirectory(".databricks")
require.NoError(t, err)
assert.True(t, ign1)
ign2, err := v.IgnoreDirectory("a/.databricks")
require.NoError(t, err)
assert.True(t, ign2)
}

View File

@ -80,7 +80,7 @@ func TestProjectInitializationDoesNotAddCacheDirToGitIgnoreIfAlreadyPresent(t *t
gitIgnorePath := filepath.Join(projectDir, ".gitignore")
err = os.WriteFile(gitIgnorePath, []byte("/.databricks/"), 0o644)
err = os.WriteFile(gitIgnorePath, []byte(".databricks"), 0o644)
assert.NoError(t, err)
_, err = Initialize(context.Background(), projectDir, DefaultEnvironment)

View File

@ -1,2 +1,2 @@
/.databricks/
.databricks