From a26461c5a3968591f998643a583c52772c11df4f Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Wed, 5 Mar 2025 11:57:05 +0100 Subject: [PATCH 1/4] Do not modify/create .gitignore in bundle root (#2429) ## Changes - Do not modify or edit .gitignore in bundle root. - Instead create .databricks/.gitignore with content set to "*" ## Why Merging our changes into existing .gitignore is complicated and adding .gitignore where it's not expected adds to the noise. Other tools also use the approach in this PR (e.g. ruff creates .ruff_cache/.gitignore). ## Tests - Modified templates/default-sql to capture this new file. --- acceptance/bundle/templates/dbt-sql/script | 1 + .../templates/default-python/classic/script | 1 + .../default-python/serverless/script | 1 + .../my_default_sql/.databricks/out.gitignore | 1 + .../bundle/templates/default-sql/script | 3 + .../experimental-jobs-as-code/script | 1 + bundle/bundle.go | 2 + integration/cmd/sync/sync_test.go | 75 ++++++++-------- libs/git/fileset.go | 4 - libs/git/fileset_test.go | 33 ------- libs/git/view.go | 33 ++----- libs/git/view_test.go | 88 ------------------- libs/sync/gitignore.go | 26 ++++++ libs/sync/sync.go | 5 +- libs/sync/sync_test.go | 17 +--- 15 files changed, 82 insertions(+), 209 deletions(-) create mode 100644 acceptance/bundle/templates/default-sql/output/my_default_sql/.databricks/out.gitignore create mode 100644 libs/sync/gitignore.go diff --git a/acceptance/bundle/templates/dbt-sql/script b/acceptance/bundle/templates/dbt-sql/script index 3a2660de5..427f655a6 100644 --- a/acceptance/bundle/templates/dbt-sql/script +++ b/acceptance/bundle/templates/dbt-sql/script @@ -6,3 +6,4 @@ trace $CLI bundle validate -t prod # Do not affect this repository's git behaviour #2318 mv .gitignore out.gitignore +rm .databricks/.gitignore diff --git a/acceptance/bundle/templates/default-python/classic/script b/acceptance/bundle/templates/default-python/classic/script index 7e5524065..955e05925 100644 --- a/acceptance/bundle/templates/default-python/classic/script +++ b/acceptance/bundle/templates/default-python/classic/script @@ -6,6 +6,7 @@ trace $CLI bundle validate -t prod # Do not affect this repository's git behaviour #2318 mv .gitignore out.gitignore +rm .databricks/.gitignore cd ../../ diff --git a/acceptance/bundle/templates/default-python/serverless/script b/acceptance/bundle/templates/default-python/serverless/script index e5fcb7741..d7b047fec 100644 --- a/acceptance/bundle/templates/default-python/serverless/script +++ b/acceptance/bundle/templates/default-python/serverless/script @@ -6,3 +6,4 @@ trace $CLI bundle validate -t prod # Do not affect this repository's git behaviour #2318 mv .gitignore out.gitignore +rm .databricks/.gitignore diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/.databricks/out.gitignore b/acceptance/bundle/templates/default-sql/output/my_default_sql/.databricks/out.gitignore new file mode 100644 index 000000000..72e8ffc0d --- /dev/null +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/.databricks/out.gitignore @@ -0,0 +1 @@ +* diff --git a/acceptance/bundle/templates/default-sql/script b/acceptance/bundle/templates/default-sql/script index 7ea0d863c..01b393a8c 100644 --- a/acceptance/bundle/templates/default-sql/script +++ b/acceptance/bundle/templates/default-sql/script @@ -6,3 +6,6 @@ trace $CLI bundle validate -t prod # Do not affect this repository's git behaviour #2318 mv .gitignore out.gitignore + +# Only for this test (default-sql), record .databricks/.gitignore in the output +mv .databricks/.gitignore .databricks/out.gitignore diff --git a/acceptance/bundle/templates/experimental-jobs-as-code/script b/acceptance/bundle/templates/experimental-jobs-as-code/script index 08e48fc5f..ed9a07dd5 100644 --- a/acceptance/bundle/templates/experimental-jobs-as-code/script +++ b/acceptance/bundle/templates/experimental-jobs-as-code/script @@ -11,3 +11,4 @@ rm -fr .venv resources/__pycache__ uv.lock my_jobs_as_code.egg-info # Do not affect this repository's git behaviour #2318 mv .gitignore out.gitignore +rm .databricks/.gitignore diff --git a/bundle/bundle.go b/bundle/bundle.go index 9cb8916f5..dad99d6e1 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -21,6 +21,7 @@ import ( "github.com/databricks/cli/libs/fileset" "github.com/databricks/cli/libs/locker" "github.com/databricks/cli/libs/log" + libsync "github.com/databricks/cli/libs/sync" "github.com/databricks/cli/libs/tags" "github.com/databricks/cli/libs/terraform" "github.com/databricks/cli/libs/vfs" @@ -198,6 +199,7 @@ func (b *Bundle) CacheDir(ctx context.Context, paths ...string) (string, error) return "", err } + libsync.WriteGitIgnore(ctx, b.BundleRootPath) return dir, nil } diff --git a/integration/cmd/sync/sync_test.go b/integration/cmd/sync/sync_test.go index 88e6ed89a..337db8fca 100644 --- a/integration/cmd/sync/sync_test.go +++ b/integration/cmd/sync/sync_test.go @@ -224,22 +224,21 @@ func (a *syncTest) snapshotContains(files []string) { _, ok := s.LastModifiedTimes[filePath] assert.True(a.t, ok, "%s not in snapshot file: %v", filePath, s.LastModifiedTimes) } - assert.Equal(a.t, len(files), len(s.LastModifiedTimes)) + assert.Equal(a.t, len(files), len(s.LastModifiedTimes), "files=%s s.LastModifiedTimes=%s", files, s.LastModifiedTimes) } func TestSyncFullFileSync(t *testing.T) { ctx, assertSync := setupSyncTest(t, "--full", "--watch") - // .gitignore is created by the sync process to enforce .databricks is not synced assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) + assertSync.remoteDirContent(ctx, "", repoFiles) // New file localFilePath := filepath.Join(assertSync.localRoot, "foo.txt") f := testfile.CreateFile(t, localFilePath) defer f.Close(t) assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo.txt", ".gitignore")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo.txt")) assertSync.remoteFileContent(ctx, "foo.txt", "") // Write to file @@ -255,24 +254,23 @@ func TestSyncFullFileSync(t *testing.T) { // delete f.Remove(t) assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) + assertSync.remoteDirContent(ctx, "", repoFiles) } func TestSyncIncrementalFileSync(t *testing.T) { ctx, assertSync := setupSyncTest(t, "--watch") - // .gitignore is created by the sync process to enforce .databricks is not synced assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) + assertSync.remoteDirContent(ctx, "", repoFiles) // New file localFilePath := filepath.Join(assertSync.localRoot, "foo.txt") f := testfile.CreateFile(t, localFilePath) defer f.Close(t) assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo.txt", ".gitignore")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo.txt")) assertSync.remoteFileContent(ctx, "foo.txt", "") - assertSync.snapshotContains(append(repoFiles, "foo.txt", ".gitignore")) + assertSync.snapshotContains(append(repoFiles, "foo.txt")) // Write to file f.Overwrite(t, `{"statement": "Mi Gente"}`) @@ -287,16 +285,15 @@ func TestSyncIncrementalFileSync(t *testing.T) { // delete f.Remove(t) assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) - assertSync.snapshotContains(append(repoFiles, ".gitignore")) + assertSync.remoteDirContent(ctx, "", repoFiles) + assertSync.snapshotContains(repoFiles) } func TestSyncNestedFolderSync(t *testing.T) { ctx, assertSync := setupSyncTest(t, "--watch") - // .gitignore is created by the sync process to enforce .databricks is not synced assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) + assertSync.remoteDirContent(ctx, "", repoFiles) // New file localFilePath := filepath.Join(assertSync.localRoot, "dir1/dir2/dir3/foo.txt") @@ -305,25 +302,24 @@ func TestSyncNestedFolderSync(t *testing.T) { f := testfile.CreateFile(t, localFilePath) defer f.Close(t) assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "dir1")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "dir1")) assertSync.remoteDirContent(ctx, "dir1", []string{"dir2"}) assertSync.remoteDirContent(ctx, "dir1/dir2", []string{"dir3"}) assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{"foo.txt"}) - assertSync.snapshotContains(append(repoFiles, ".gitignore", "dir1/dir2/dir3/foo.txt")) + assertSync.snapshotContains(append(repoFiles, "dir1/dir2/dir3/foo.txt")) // delete f.Remove(t) assertSync.waitForCompletionMarker() assertSync.remoteNotExist(ctx, "dir1") - assertSync.snapshotContains(append(repoFiles, ".gitignore")) + assertSync.snapshotContains(repoFiles) } func TestSyncNestedFolderDoesntFailOnNonEmptyDirectory(t *testing.T) { ctx, assertSync := setupSyncTest(t, "--watch") - // .gitignore is created by the sync process to enforce .databricks is not synced assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) + assertSync.remoteDirContent(ctx, "", repoFiles) // New file localFilePath := filepath.Join(assertSync.localRoot, "dir1/dir2/dir3/foo.txt") @@ -353,9 +349,8 @@ func TestSyncNestedFolderDoesntFailOnNonEmptyDirectory(t *testing.T) { func TestSyncNestedSpacePlusAndHashAreEscapedSync(t *testing.T) { ctx, assertSync := setupSyncTest(t, "--watch") - // .gitignore is created by the sync process to enforce .databricks is not synced assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) + assertSync.remoteDirContent(ctx, "", repoFiles) // New file localFilePath := filepath.Join(assertSync.localRoot, "dir1/a b+c/c+d e/e+f g#i.txt") @@ -364,17 +359,17 @@ func TestSyncNestedSpacePlusAndHashAreEscapedSync(t *testing.T) { f := testfile.CreateFile(t, localFilePath) defer f.Close(t) assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "dir1")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "dir1")) assertSync.remoteDirContent(ctx, "dir1", []string{"a b+c"}) assertSync.remoteDirContent(ctx, "dir1/a b+c", []string{"c+d e"}) assertSync.remoteDirContent(ctx, "dir1/a b+c/c+d e", []string{"e+f g#i.txt"}) - assertSync.snapshotContains(append(repoFiles, ".gitignore", "dir1/a b+c/c+d e/e+f g#i.txt")) + assertSync.snapshotContains(append(repoFiles, "dir1/a b+c/c+d e/e+f g#i.txt")) // delete f.Remove(t) assertSync.waitForCompletionMarker() assertSync.remoteNotExist(ctx, "dir1/a b+c/c+d e") - assertSync.snapshotContains(append(repoFiles, ".gitignore")) + assertSync.snapshotContains(repoFiles) } // This is a check for the edge case when a user does the following: @@ -395,23 +390,23 @@ func TestSyncIncrementalFileOverwritesFolder(t *testing.T) { f := testfile.CreateFile(t, localFilePath) defer f.Close(t) assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo")) assertSync.remoteDirContent(ctx, "foo", []string{"bar.txt"}) - assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo/bar.txt")) + assertSync.snapshotContains(append(repoFiles, "foo/bar.txt")) // delete foo/bar.txt f.Remove(t) os.Remove(filepath.Join(assertSync.localRoot, "foo")) assertSync.waitForCompletionMarker() assertSync.remoteNotExist(ctx, "foo") - assertSync.snapshotContains(append(repoFiles, ".gitignore")) + assertSync.snapshotContains(repoFiles) f2 := testfile.CreateFile(t, filepath.Join(assertSync.localRoot, "foo")) defer f2.Close(t) assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo")) assertSync.objectType(ctx, "foo", "FILE") - assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo")) + assertSync.snapshotContains(append(repoFiles, "foo")) } func TestSyncIncrementalSyncPythonNotebookToFile(t *testing.T) { @@ -425,23 +420,23 @@ func TestSyncIncrementalSyncPythonNotebookToFile(t *testing.T) { // notebook was uploaded properly assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo")) assertSync.objectType(ctx, "foo", "NOTEBOOK") assertSync.language(ctx, "foo", "PYTHON") - assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo.py")) + assertSync.snapshotContains(append(repoFiles, "foo.py")) // convert to vanilla python file f.Overwrite(t, "# No longer a python notebook") assertSync.waitForCompletionMarker() assertSync.objectType(ctx, "foo.py", "FILE") - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo.py")) - assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo.py")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo.py")) + assertSync.snapshotContains(append(repoFiles, "foo.py")) // delete the vanilla python file f.Remove(t) assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) - assertSync.snapshotContains(append(repoFiles, ".gitignore")) + assertSync.remoteDirContent(ctx, "", repoFiles) + assertSync.snapshotContains(repoFiles) } func TestSyncIncrementalSyncFileToPythonNotebook(t *testing.T) { @@ -454,17 +449,17 @@ func TestSyncIncrementalSyncFileToPythonNotebook(t *testing.T) { assertSync.waitForCompletionMarker() // assert file upload - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo.py")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo.py")) assertSync.objectType(ctx, "foo.py", "FILE") - assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo.py")) + assertSync.snapshotContains(append(repoFiles, "foo.py")) // convert to notebook f.Overwrite(t, "# Databricks notebook source") assertSync.waitForCompletionMarker() assertSync.objectType(ctx, "foo", "NOTEBOOK") assertSync.language(ctx, "foo", "PYTHON") - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo")) - assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo.py")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo")) + assertSync.snapshotContains(append(repoFiles, "foo.py")) } func TestSyncIncrementalSyncPythonNotebookDelete(t *testing.T) { @@ -478,14 +473,14 @@ func TestSyncIncrementalSyncPythonNotebookDelete(t *testing.T) { assertSync.waitForCompletionMarker() // notebook was uploaded properly - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo")) + assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo")) assertSync.objectType(ctx, "foo", "NOTEBOOK") assertSync.language(ctx, "foo", "PYTHON") // Delete notebook f.Remove(t) assertSync.waitForCompletionMarker() - assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) + assertSync.remoteDirContent(ctx, "", repoFiles) } func TestSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) { diff --git a/libs/git/fileset.go b/libs/git/fileset.go index 8391548c9..7c0c372c9 100644 --- a/libs/git/fileset.go +++ b/libs/git/fileset.go @@ -43,7 +43,3 @@ func (f *FileSet) Files() ([]fileset.File, error) { f.view.repo.taintIgnoreRules() return f.fileset.Files() } - -func (f *FileSet) EnsureValidGitIgnoreExists() error { - return f.view.EnsureValidGitIgnoreExists() -} diff --git a/libs/git/fileset_test.go b/libs/git/fileset_test.go index 6d239edf5..cd85ee810 100644 --- a/libs/git/fileset_test.go +++ b/libs/git/fileset_test.go @@ -1,10 +1,8 @@ package git import ( - "os" "path" "path/filepath" - "strings" "testing" "github.com/databricks/cli/libs/vfs" @@ -51,34 +49,3 @@ func TestFileSetNonCleanRoot(t *testing.T) { require.NoError(t, err) assert.Len(t, files, 3) } - -func TestFileSetAddsCacheDirToGitIgnore(t *testing.T) { - projectDir := t.TempDir() - fileSet, err := NewFileSetAtRoot(vfs.MustNew(projectDir)) - require.NoError(t, err) - err = fileSet.EnsureValidGitIgnoreExists() - require.NoError(t, err) - - gitIgnorePath := filepath.Join(projectDir, ".gitignore") - assert.FileExists(t, gitIgnorePath) - fileBytes, err := os.ReadFile(gitIgnorePath) - assert.NoError(t, err) - assert.Contains(t, string(fileBytes), ".databricks") -} - -func TestFileSetDoesNotCacheDirToGitIgnoreIfAlreadyPresent(t *testing.T) { - projectDir := t.TempDir() - gitIgnorePath := filepath.Join(projectDir, ".gitignore") - - fileSet, err := NewFileSetAtRoot(vfs.MustNew(projectDir)) - require.NoError(t, err) - err = os.WriteFile(gitIgnorePath, []byte(".databricks"), 0o644) - require.NoError(t, err) - - err = fileSet.EnsureValidGitIgnoreExists() - require.NoError(t, err) - - b, err := os.ReadFile(gitIgnorePath) - require.NoError(t, err) - assert.Equal(t, 1, strings.Count(string(b), ".databricks")) -} diff --git a/libs/git/view.go b/libs/git/view.go index db22dfc5d..142cc4947 100644 --- a/libs/git/view.go +++ b/libs/git/view.go @@ -90,46 +90,25 @@ func NewView(worktreeRoot, root vfs.Path) (*View, error) { target = strings.TrimPrefix(target, string(os.PathSeparator)) target = path.Clean(filepath.ToSlash(target)) - return &View{ + result := &View{ repo: repo, targetPath: target, - }, nil + } + + result.SetupDefaults() + return result, nil } func NewViewAtRoot(root vfs.Path) (*View, error) { return NewView(root, root) } -func (v *View) EnsureValidGitIgnoreExists() error { - ign, err := v.IgnoreDirectory(".databricks") - if err != nil { - return err - } - - // return early if .databricks is already being ignored - if ign { - return nil - } - - // Create .gitignore with .databricks entry - gitIgnorePath := filepath.Join(v.repo.Root(), v.targetPath, ".gitignore") - file, err := os.OpenFile(gitIgnorePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o644) - if err != nil { - return err - } - defer file.Close() - +func (v *View) SetupDefaults() { // Hard code .databricks ignore pattern so that we never sync it (irrespective) // of .gitignore patterns v.repo.addIgnoreRule(newStringIgnoreRules([]string{ ".databricks", })) - _, err = file.WriteString("\n.databricks\n") - if err != nil { - return err - } - v.repo.taintIgnoreRules() - return nil } diff --git a/libs/git/view_test.go b/libs/git/view_test.go index 96881fdee..9d7a4cdec 100644 --- a/libs/git/view_test.go +++ b/libs/git/view_test.go @@ -209,100 +209,12 @@ func TestViewABInTempDir(t *testing.T) { assert.False(t, tv.Ignore("newfile")) } -func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredAtRoot(t *testing.T) { - expected, err := os.ReadFile("./testdata_view_ignore/.gitignore") - require.NoError(t, err) - - repoPath := createFakeRepo(t, "testdata_view_ignore") - - // Since root .gitignore already has .databricks, there should be no edits - // to root .gitignore - v, err := NewViewAtRoot(vfs.MustNew(repoPath)) - require.NoError(t, err) - - err = v.EnsureValidGitIgnoreExists() - require.NoError(t, err) - - actual, err := os.ReadFile(filepath.Join(repoPath, ".gitignore")) - require.NoError(t, err) - - assert.Equal(t, string(expected), string(actual)) -} - -func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredInSubdir(t *testing.T) { - expected, err := os.ReadFile("./testdata_view_ignore/a/.gitignore") - require.NoError(t, err) - - repoPath := createFakeRepo(t, "testdata_view_ignore") - - // Since root .gitignore already has .databricks, there should be no edits - // to a/.gitignore - v, err := NewView(vfs.MustNew(repoPath), vfs.MustNew(filepath.Join(repoPath, "a"))) - require.NoError(t, err) - - err = v.EnsureValidGitIgnoreExists() - require.NoError(t, err) - - actual, err := os.ReadFile(filepath.Join(repoPath, v.targetPath, ".gitignore")) - require.NoError(t, err) - - assert.Equal(t, string(expected), string(actual)) -} - -func TestViewAddsGitignoreWithCacheDir(t *testing.T) { - repoPath := createFakeRepo(t, "testdata") - err := os.Remove(filepath.Join(repoPath, ".gitignore")) - assert.NoError(t, err) - - // Since root .gitignore was deleted, new view adds .databricks to root .gitignore - v, err := NewViewAtRoot(vfs.MustNew(repoPath)) - require.NoError(t, err) - - err = v.EnsureValidGitIgnoreExists() - require.NoError(t, err) - - actual, err := os.ReadFile(filepath.Join(repoPath, ".gitignore")) - require.NoError(t, err) - - assert.Contains(t, string(actual), "\n.databricks\n") -} - -func TestViewAddsGitignoreWithCacheDirAtSubdir(t *testing.T) { - repoPath := createFakeRepo(t, "testdata") - err := os.Remove(filepath.Join(repoPath, ".gitignore")) - require.NoError(t, err) - - // Since root .gitignore was deleted, new view adds .databricks to a/.gitignore - v, err := NewView(vfs.MustNew(repoPath), vfs.MustNew(filepath.Join(repoPath, "a"))) - require.NoError(t, err) - - err = v.EnsureValidGitIgnoreExists() - require.NoError(t, err) - - actual, err := os.ReadFile(filepath.Join(repoPath, v.targetPath, ".gitignore")) - require.NoError(t, err) - - // created .gitignore has cache dir listed - assert.Contains(t, string(actual), "\n.databricks\n") - assert.NoFileExists(t, filepath.Join(repoPath, ".gitignore")) -} - func TestViewAlwaysIgnoresCacheDir(t *testing.T) { repoPath := createFakeRepo(t, "testdata") v, err := NewViewAtRoot(vfs.MustNew(repoPath)) require.NoError(t, err) - err = v.EnsureValidGitIgnoreExists() - require.NoError(t, err) - - // Delete root .gitignore which contains .databricks entry - err = os.Remove(filepath.Join(repoPath, ".gitignore")) - require.NoError(t, err) - - // taint rules to reload .gitignore - v.repo.taintIgnoreRules() - // assert .databricks is still being ignored ign1, err := v.IgnoreDirectory(".databricks") require.NoError(t, err) diff --git a/libs/sync/gitignore.go b/libs/sync/gitignore.go new file mode 100644 index 000000000..b3888a0cc --- /dev/null +++ b/libs/sync/gitignore.go @@ -0,0 +1,26 @@ +package sync + +import ( + "context" + "os" + "path/filepath" + + "github.com/databricks/cli/libs/log" +) + +func WriteGitIgnore(ctx context.Context, dir string) { + gitignorePath := filepath.Join(dir, ".databricks", ".gitignore") + file, err := os.OpenFile(gitignorePath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o644) + if err != nil { + if os.IsExist(err) { + return + } + log.Debugf(ctx, "Failed to create %s: %s", gitignorePath, err) + } + + defer file.Close() + _, err = file.WriteString("*\n") + if err != nil { + log.Debugf(ctx, "Error writing to %s: %s", gitignorePath, err) + } +} diff --git a/libs/sync/sync.go b/libs/sync/sync.go index f13fa934a..4d14f745a 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -69,10 +69,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { return nil, err } - err = fileSet.EnsureValidGitIgnoreExists() - if err != nil { - return nil, err - } + WriteGitIgnore(ctx, opts.LocalRoot.Native()) includeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Include) if err != nil { diff --git a/libs/sync/sync_test.go b/libs/sync/sync_test.go index f30431770..1b5498275 100644 --- a/libs/sync/sync_test.go +++ b/libs/sync/sync_test.go @@ -40,9 +40,6 @@ func TestGetFileSet(t *testing.T) { fileSet, err := git.NewFileSetAtRoot(root) require.NoError(t, err) - err = fileSet.EnsureValidGitIgnoreExists() - require.NoError(t, err) - inc, err := fileset.NewGlobSet(root, []string{}) require.NoError(t, err) @@ -59,7 +56,7 @@ func TestGetFileSet(t *testing.T) { fileList, err := s.GetFileList(ctx) require.NoError(t, err) - require.Len(t, fileList, 10) + require.Len(t, fileList, 9) inc, err = fileset.NewGlobSet(root, []string{}) require.NoError(t, err) @@ -77,7 +74,7 @@ func TestGetFileSet(t *testing.T) { fileList, err = s.GetFileList(ctx) require.NoError(t, err) - require.Len(t, fileList, 2) + require.Len(t, fileList, 1) inc, err = fileset.NewGlobSet(root, []string{"./.databricks/*.go"}) require.NoError(t, err) @@ -95,7 +92,7 @@ func TestGetFileSet(t *testing.T) { fileList, err = s.GetFileList(ctx) require.NoError(t, err) - require.Len(t, fileList, 11) + require.Len(t, fileList, 10) } func TestRecursiveExclude(t *testing.T) { @@ -106,9 +103,6 @@ func TestRecursiveExclude(t *testing.T) { fileSet, err := git.NewFileSetAtRoot(root) require.NoError(t, err) - err = fileSet.EnsureValidGitIgnoreExists() - require.NoError(t, err) - inc, err := fileset.NewGlobSet(root, []string{}) require.NoError(t, err) @@ -125,7 +119,7 @@ func TestRecursiveExclude(t *testing.T) { fileList, err := s.GetFileList(ctx) require.NoError(t, err) - require.Len(t, fileList, 7) + require.Len(t, fileList, 6) } func TestNegateExclude(t *testing.T) { @@ -136,9 +130,6 @@ func TestNegateExclude(t *testing.T) { fileSet, err := git.NewFileSetAtRoot(root) require.NoError(t, err) - err = fileSet.EnsureValidGitIgnoreExists() - require.NoError(t, err) - inc, err := fileset.NewGlobSet(root, []string{}) require.NoError(t, err) From 3a3076d9ea83242067bc9f50b1cbc3b8d2d99cdb Mon Sep 17 00:00:00 2001 From: Russell Clarey Date: Wed, 5 Mar 2025 12:17:03 +0100 Subject: [PATCH 2/4] Output deprecated annotations in the JSON schema (#2422) ## Changes Start outputting `deprecated` and `deprecationMessage` annotations in the JSON schema ## Why So that deprecated fields are shown as deprecated in VSCode (and other editors) ## Tests - manually tested |||| |---|---|---| |Screenshot 2025-03-03 at 16 04 21|Screenshot 2025-03-03 at 16 04 29|Screenshot 2025-03-03 at 16 06 03| --- .github/workflows/push.yml | 5 ++++- bundle/config/root.go | 2 +- bundle/internal/annotation/descriptor.go | 1 + bundle/internal/schema/annotations.go | 6 ++++++ bundle/internal/schema/annotations.yml | 9 +++++++++ bundle/schema/jsonschema.json | 13 +++++++++++-- libs/jsonschema/extension.go | 4 ++++ libs/jsonschema/from_type.go | 8 ++------ libs/jsonschema/from_type_test.go | 9 ++++----- libs/jsonschema/schema.go | 5 +++++ 10 files changed, 47 insertions(+), 15 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index cbae43f2f..670a3d270 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -145,7 +145,10 @@ jobs: go run main.go bundle schema > schema.json # Add markdownDescription keyword to ajv - echo "module.exports=function(a){a.addKeyword('markdownDescription')}" >> keywords.js + echo "module.exports = function(a) { + a.addKeyword('markdownDescription'); + a.addKeyword('deprecationMessage'); + }" >> keywords.js for file in ./bundle/internal/schema/testdata/pass/*.yml; do ajv test -s schema.json -d $file --valid -c=./keywords.js diff --git a/bundle/config/root.go b/bundle/config/root.go index b974bcec5..d44e25fa2 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -52,7 +52,7 @@ type Root struct { Targets map[string]*Target `json:"targets,omitempty"` // DEPRECATED. Left for backward compatibility with Targets - Environments map[string]*Target `json:"environments,omitempty" bundle:"deprecated"` + Environments map[string]*Target `json:"environments,omitempty"` // Sync section specifies options for files synchronization Sync Sync `json:"sync,omitempty"` diff --git a/bundle/internal/annotation/descriptor.go b/bundle/internal/annotation/descriptor.go index 26c1a0b06..afcc0ce9f 100644 --- a/bundle/internal/annotation/descriptor.go +++ b/bundle/internal/annotation/descriptor.go @@ -7,6 +7,7 @@ type Descriptor struct { Default any `json:"default,omitempty"` Enum []any `json:"enum,omitempty"` MarkdownExamples string `json:"markdown_examples,omitempty"` + DeprecationMessage string `json:"deprecation_message,omitempty"` } const Placeholder = "PLACEHOLDER" diff --git a/bundle/internal/schema/annotations.go b/bundle/internal/schema/annotations.go index ee3c25ca1..86ffbce73 100644 --- a/bundle/internal/schema/annotations.go +++ b/bundle/internal/schema/annotations.go @@ -127,6 +127,12 @@ func assignAnnotation(s *jsonschema.Schema, a annotation.Descriptor) { if a.Default != nil { s.Default = a.Default } + + if a.DeprecationMessage != "" { + s.Deprecated = true + s.DeprecationMessage = a.DeprecationMessage + } + s.MarkdownDescription = convertLinksToAbsoluteUrl(a.MarkdownDescription) s.Title = a.Title s.Enum = a.Enum diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index e658f6e53..44e10c13a 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -61,6 +61,8 @@ github.com/databricks/cli/bundle/config.Experimental: "pydabs": "description": |- The PyDABs configuration. + "deprecation_message": |- + Deprecated: please use python instead "python": "description": |- Configures loading of Python code defined with 'databricks-bundles' package. @@ -220,6 +222,11 @@ github.com/databricks/cli/bundle/config.Root: The bundle attributes when deploying to this target. "markdown_description": |- The bundle attributes when deploying to this target, + "environments": + "description": |- + PLACEHOLDER + "deprecation_message": |- + Deprecated: please use targets instead "experimental": "description": |- Defines attributes for experimental features. @@ -308,6 +315,8 @@ github.com/databricks/cli/bundle/config.Target: "compute_id": "description": |- Deprecated. The ID of the compute to use for this target. + "deprecation_message": |- + Deprecated: please use cluster_id instead "default": "description": |- Whether this target is the default target. diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index edc651d5f..5731f72fd 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -1217,7 +1217,9 @@ "properties": { "pydabs": { "description": "The PyDABs configuration.", - "$ref": "#/$defs/github.com/databricks/cli/bundle/config.PyDABs" + "$ref": "#/$defs/github.com/databricks/cli/bundle/config.PyDABs", + "deprecationMessage": "Deprecated: please use python instead", + "deprecated": true }, "python": { "description": "Configures loading of Python code defined with 'databricks-bundles' package.", @@ -1506,7 +1508,9 @@ }, "compute_id": { "description": "Deprecated. The ID of the compute to use for this target.", - "$ref": "#/$defs/string" + "$ref": "#/$defs/string", + "deprecationMessage": "Deprecated: please use cluster_id instead", + "deprecated": true }, "default": { "description": "Whether this target is the default target.", @@ -7409,6 +7413,11 @@ "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Bundle", "markdownDescription": "The bundle attributes when deploying to this target," }, + "environments": { + "$ref": "#/$defs/map/github.com/databricks/cli/bundle/config.Target", + "deprecationMessage": "Deprecated: please use targets instead", + "deprecated": true + }, "experimental": { "description": "Defines attributes for experimental features.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Experimental" diff --git a/libs/jsonschema/extension.go b/libs/jsonschema/extension.go index 9badf86a5..e3cae1a7f 100644 --- a/libs/jsonschema/extension.go +++ b/libs/jsonschema/extension.go @@ -40,4 +40,8 @@ type Extension struct { // https://code.visualstudio.com/docs/languages/json#_use-rich-formatting-in-hovers // Also it can be used in documentation generation. MarkdownDescription string `json:"markdownDescription,omitempty"` + + // This field is not in the JSON schema spec, but it is supported in VSCode + // It is used to provide a warning for deprectated fields + DeprecationMessage string `json:"deprecationMessage,omitempty"` } diff --git a/libs/jsonschema/from_type.go b/libs/jsonschema/from_type.go index ce25cb023..6e0ba7b26 100644 --- a/libs/jsonschema/from_type.go +++ b/libs/jsonschema/from_type.go @@ -18,10 +18,6 @@ var skipTags = []string{ // Annotation for internal bundle fields that should not be exposed to customers. // Fields can be tagged as "internal" to remove them from the generated schema. "internal", - - // Annotation for bundle fields that have been deprecated. - // Fields tagged as "deprecated" are omitted from the generated schema. - "deprecated", } type constructor struct { @@ -259,8 +255,8 @@ func (c *constructor) fromTypeStruct(typ reflect.Type) (Schema, error) { structFields := getStructFields(typ) for _, structField := range structFields { bundleTags := strings.Split(structField.Tag.Get("bundle"), ",") - // Fields marked as "readonly", "internal" or "deprecated" are skipped - // while generating the schema + // Fields marked as "readonly" or "internal" are skipped while generating + // the schema skip := false for _, tag := range skipTags { if slices.Contains(bundleTags, tag) { diff --git a/libs/jsonschema/from_type_test.go b/libs/jsonschema/from_type_test.go index cdfdcfd10..ae9ff54b1 100644 --- a/libs/jsonschema/from_type_test.go +++ b/libs/jsonschema/from_type_test.go @@ -17,11 +17,10 @@ func TestFromTypeBasic(t *testing.T) { TriplePointer ***int `json:"triple_pointer,omitempty"` // These fields should be ignored in the resulting schema. - NotAnnotated string - DashedTag string `json:"-"` - InternalTagged string `json:"internal_tagged" bundle:"internal"` - DeprecatedTagged string `json:"deprecated_tagged" bundle:"deprecated"` - ReadOnlyTagged string `json:"readonly_tagged" bundle:"readonly"` + NotAnnotated string + DashedTag string `json:"-"` + InternalTagged string `json:"internal_tagged" bundle:"internal"` + ReadOnlyTagged string `json:"readonly_tagged" bundle:"readonly"` } strRef := "#/$defs/string" diff --git a/libs/jsonschema/schema.go b/libs/jsonschema/schema.go index 85f6a0328..6abeacfa4 100644 --- a/libs/jsonschema/schema.go +++ b/libs/jsonschema/schema.go @@ -80,6 +80,11 @@ type Schema struct { // Examples of the value for properties in the schema. // https://json-schema.org/understanding-json-schema/reference/annotations Examples any `json:"examples,omitempty"` + + // A boolean that indicates the field should not be used and may be removed + // in the future. + // https://json-schema.org/understanding-json-schema/reference/annotations + Deprecated bool `json:"deprecated,omitempty"` } // Default value defined in a JSON Schema, represented as a string. From 6ae353d84bf5f2a824f395a0bd56a20af634d3ca Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 5 Mar 2025 15:19:33 +0100 Subject: [PATCH 3/4] Use schema field for pipeline in builtin template (#2347) ## Changes The `schema` field implies the lifecycle of tables is no longer tied to the lifecycle of the pipeline, as was the case with the `target` field. More information about using the "catalog" and "schema" properties can be found here: https://docs.databricks.com/en/delta-live-tables/target-schema.html ## Tests n/a --------- Co-authored-by: Lennart Kats (databricks) --- .../default-python/classic/out.compare-vs-serverless.diff | 2 +- .../my_default_python/resources/my_default_python.pipeline.yml | 2 +- .../default-python/serverless-customcatalog/output.txt | 2 +- .../my_default_python/resources/my_default_python.pipeline.yml | 2 +- integration/bundle/testdata/default_python/bundle_summary.txt | 2 +- .../resources/{{.project_name}}.pipeline.yml.tmpl | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff index 6890badf0..8fdcb8359 100644 --- a/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff +++ b/acceptance/bundle/templates/default-python/classic/out.compare-vs-serverless.diff @@ -48,7 +48,7 @@ - catalog: main + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + # catalog: catalog_name - target: my_default_python_${bundle.target} + schema: my_default_python_${bundle.target} - serverless: true libraries: - notebook: diff --git a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml index 4176f765d..d07d4fa6c 100644 --- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml +++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml @@ -5,7 +5,7 @@ resources: name: my_default_python_pipeline ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: # catalog: catalog_name - target: my_default_python_${bundle.target} + schema: my_default_python_${bundle.target} libraries: - notebook: path: ../src/dlt_pipeline.ipynb diff --git a/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt index 30726013b..a6a92dfd4 100644 --- a/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt +++ b/acceptance/bundle/templates/default-python/serverless-customcatalog/output.txt @@ -18,5 +18,5 @@ See also the documentation at https://docs.databricks.com/dev-tools/bundles/inde - ## Catalog is required for serverless compute - catalog: main + catalog: customcatalog - target: my_default_python_${bundle.target} + schema: my_default_python_${bundle.target} serverless: true diff --git a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml index 6dac62ded..f8a0521db 100644 --- a/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml +++ b/acceptance/bundle/templates/default-python/serverless/output/my_default_python/resources/my_default_python.pipeline.yml @@ -5,7 +5,7 @@ resources: name: my_default_python_pipeline ## Catalog is required for serverless compute catalog: main - target: my_default_python_${bundle.target} + schema: my_default_python_${bundle.target} serverless: true libraries: - notebook: diff --git a/integration/bundle/testdata/default_python/bundle_summary.txt b/integration/bundle/testdata/default_python/bundle_summary.txt index 968009759..0b4cbb181 100644 --- a/integration/bundle/testdata/default_python/bundle_summary.txt +++ b/integration/bundle/testdata/default_python/bundle_summary.txt @@ -164,7 +164,7 @@ } ], "name": "[dev [USERNAME]] project_name_$UNIQUE_PRJ_pipeline", - "target": "project_name_$UNIQUE_PRJ_dev", + "schema": "project_name_$UNIQUE_PRJ_dev", "url": "[DATABRICKS_URL]/pipelines/[UUID]?o=[NUMID]" } } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index 024c1ab15..2e858d681 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -13,7 +13,7 @@ resources: {{- else}} catalog: {{default_catalog}} {{- end}} - target: {{.project_name}}_${bundle.target} + schema: {{.project_name}}_${bundle.target} {{- if $with_serverless }} serverless: true {{- end}} From a24a7f573844573dd9ea6fbca4cb79cb8da1fa58 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 5 Mar 2025 19:50:59 +0530 Subject: [PATCH 4/4] Remove omitempty tag for exit code and execution time in telemetry (#2433) --- libs/telemetry/protos/databricks_cli_log.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libs/telemetry/protos/databricks_cli_log.go b/libs/telemetry/protos/databricks_cli_log.go index 9e4e59596..64baa6b38 100644 --- a/libs/telemetry/protos/databricks_cli_log.go +++ b/libs/telemetry/protos/databricks_cli_log.go @@ -24,10 +24,12 @@ type ExecutionContext struct { FromWebTerminal bool `json:"from_web_terminal,omitempty"` // Time taken for the CLI command to execute. - ExecutionTimeMs int64 `json:"execution_time_ms,omitempty"` + // We want to serialize the zero value as well so the omitempty tag is not set. + ExecutionTimeMs int64 `json:"execution_time_ms"` // Exit code of the CLI command. - ExitCode int64 `json:"exit_code,omitempty"` + // We want to serialize the zero value as well so the omitempty tag is not set. + ExitCode int64 `json:"exit_code"` } type CliTestEvent struct {