From 0ad790e46863f947df5cdb0425d0aa217ae781e3 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 5 Dec 2024 11:13:13 +0100 Subject: [PATCH] Properly read Git metadata when running inside workspace (#1945) ## Changes Since there is no .git directory in Workspace file system, we need to make an API call to api/2.0/workspace/get-status?return_git_info=true to fetch git the root of the repo, current branch, commit and origin. Added new function FetchRepositoryInfo that either looks up and parses .git or calls remote API depending on env. Refactor Repository/View/FileSet to accept repository root rather than calculate it. This helps because: - Repository is currently created in multiple places and finding the repository root is becoming relatively expensive (API call needed). - Repository/FileSet/View do not have access to current Bundle which is where WorkplaceClient is stored. ## Tests - Tested manually by running "bundle validate --json" inside web terminal within Databricks env. - Added integration tests for the new API. --------- Co-authored-by: Andrew Nester Co-authored-by: Pieter Noordhuis --- bundle/bundle.go | 4 + bundle/bundle_read_only.go | 4 + bundle/config/mutator/load_git_details.go | 57 +++--- bundle/config/validate/files_to_sync_test.go | 1 + bundle/deploy/files/sync.go | 9 +- cmd/sync/sync.go | 32 +++- internal/git_fetch_test.go | 172 +++++++++++++++++++ libs/diag/diagnostic.go | 13 ++ libs/git/fileset.go | 10 +- libs/git/fileset_test.go | 24 ++- libs/git/info.go | 161 +++++++++++++++++ libs/git/repository.go | 14 +- libs/git/view.go | 8 +- libs/git/view_test.go | 30 ++-- libs/sync/snapshot_test.go | 12 +- libs/sync/sync.go | 11 +- libs/sync/sync_test.go | 6 +- 17 files changed, 475 insertions(+), 93 deletions(-) create mode 100644 internal/git_fetch_test.go create mode 100644 libs/git/info.go diff --git a/bundle/bundle.go b/bundle/bundle.go index 46710538a..76c87c24c 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -48,6 +48,10 @@ type Bundle struct { // Exclusively use this field for filesystem operations. SyncRoot vfs.Path + // Path to the root of git worktree containing the bundle. + // https://git-scm.com/docs/git-worktree + WorktreeRoot vfs.Path + // Config contains the bundle configuration. // It is loaded from the bundle configuration files and mutators may update it. Config config.Root diff --git a/bundle/bundle_read_only.go b/bundle/bundle_read_only.go index ceab95c0b..4bdd94e59 100644 --- a/bundle/bundle_read_only.go +++ b/bundle/bundle_read_only.go @@ -32,6 +32,10 @@ func (r ReadOnlyBundle) SyncRoot() vfs.Path { return r.b.SyncRoot } +func (r ReadOnlyBundle) WorktreeRoot() vfs.Path { + return r.b.WorktreeRoot +} + func (r ReadOnlyBundle) WorkspaceClient() *databricks.WorkspaceClient { return r.b.WorkspaceClient() } diff --git a/bundle/config/mutator/load_git_details.go b/bundle/config/mutator/load_git_details.go index 77558d9b5..82255552a 100644 --- a/bundle/config/mutator/load_git_details.go +++ b/bundle/config/mutator/load_git_details.go @@ -7,7 +7,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/git" - "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/vfs" ) type loadGitDetails struct{} @@ -21,45 +21,40 @@ func (m *loadGitDetails) Name() string { } func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - // Load relevant git repository - repo, err := git.NewRepository(b.BundleRoot) + var diags diag.Diagnostics + info, err := git.FetchRepositoryInfo(ctx, b.BundleRoot.Native(), b.WorkspaceClient()) if err != nil { - return diag.FromErr(err) + diags = append(diags, diag.WarningFromErr(err)...) } - // Read branch name of current checkout - branch, err := repo.CurrentBranch() - if err == nil { - b.Config.Bundle.Git.ActualBranch = branch - if b.Config.Bundle.Git.Branch == "" { - // Only load branch if there's no user defined value - b.Config.Bundle.Git.Inferred = true - b.Config.Bundle.Git.Branch = branch - } + if info.WorktreeRoot == "" { + b.WorktreeRoot = b.BundleRoot } else { - log.Warnf(ctx, "failed to load current branch: %s", err) + b.WorktreeRoot = vfs.MustNew(info.WorktreeRoot) + } + + b.Config.Bundle.Git.ActualBranch = info.CurrentBranch + if b.Config.Bundle.Git.Branch == "" { + // Only load branch if there's no user defined value + b.Config.Bundle.Git.Inferred = true + b.Config.Bundle.Git.Branch = info.CurrentBranch } // load commit hash if undefined if b.Config.Bundle.Git.Commit == "" { - commit, err := repo.LatestCommit() - if err != nil { - log.Warnf(ctx, "failed to load latest commit: %s", err) - } else { - b.Config.Bundle.Git.Commit = commit - } - } - // load origin url if undefined - if b.Config.Bundle.Git.OriginURL == "" { - remoteUrl := repo.OriginUrl() - b.Config.Bundle.Git.OriginURL = remoteUrl + b.Config.Bundle.Git.Commit = info.LatestCommit } - // repo.Root() returns the absolute path of the repo - relBundlePath, err := filepath.Rel(repo.Root(), b.BundleRoot.Native()) - if err != nil { - return diag.FromErr(err) + // load origin url if undefined + if b.Config.Bundle.Git.OriginURL == "" { + b.Config.Bundle.Git.OriginURL = info.OriginURL } - b.Config.Bundle.Git.BundleRootPath = filepath.ToSlash(relBundlePath) - return nil + + relBundlePath, err := filepath.Rel(b.WorktreeRoot.Native(), b.BundleRoot.Native()) + if err != nil { + diags = append(diags, diag.FromErr(err)...) + } else { + b.Config.Bundle.Git.BundleRootPath = filepath.ToSlash(relBundlePath) + } + return diags } diff --git a/bundle/config/validate/files_to_sync_test.go b/bundle/config/validate/files_to_sync_test.go index 2a598fa72..30af9026d 100644 --- a/bundle/config/validate/files_to_sync_test.go +++ b/bundle/config/validate/files_to_sync_test.go @@ -44,6 +44,7 @@ func setupBundleForFilesToSyncTest(t *testing.T) *bundle.Bundle { BundleRoot: vfs.MustNew(dir), SyncRootPath: dir, SyncRoot: vfs.MustNew(dir), + WorktreeRoot: vfs.MustNew(dir), Config: config.Root{ Bundle: config.Bundle{ Target: "default", diff --git a/bundle/deploy/files/sync.go b/bundle/deploy/files/sync.go index 347ed3079..e3abc5fef 100644 --- a/bundle/deploy/files/sync.go +++ b/bundle/deploy/files/sync.go @@ -28,10 +28,11 @@ func GetSyncOptions(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.SyncOp } opts := &sync.SyncOptions{ - LocalRoot: rb.SyncRoot(), - Paths: rb.Config().Sync.Paths, - Include: includes, - Exclude: rb.Config().Sync.Exclude, + WorktreeRoot: rb.WorktreeRoot(), + LocalRoot: rb.SyncRoot(), + Paths: rb.Config().Sync.Paths, + Include: includes, + Exclude: rb.Config().Sync.Exclude, RemotePath: rb.Config().Workspace.FilePath, Host: rb.WorkspaceClient().Config.Host, diff --git a/cmd/sync/sync.go b/cmd/sync/sync.go index 2092d9e33..6d722fb08 100644 --- a/cmd/sync/sync.go +++ b/cmd/sync/sync.go @@ -12,6 +12,8 @@ import ( "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/flags" + "github.com/databricks/cli/libs/git" + "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/sync" "github.com/databricks/cli/libs/vfs" "github.com/spf13/cobra" @@ -37,6 +39,7 @@ func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, args []string, b * opts.Full = f.full opts.PollInterval = f.interval + opts.WorktreeRoot = b.WorktreeRoot return opts, nil } @@ -60,11 +63,30 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn } } + ctx := cmd.Context() + client := root.WorkspaceClient(ctx) + + localRoot := vfs.MustNew(args[0]) + info, err := git.FetchRepositoryInfo(ctx, localRoot.Native(), client) + + if err != nil { + log.Warnf(ctx, "Failed to read git info: %s", err) + } + + var worktreeRoot vfs.Path + + if info.WorktreeRoot == "" { + worktreeRoot = localRoot + } else { + worktreeRoot = vfs.MustNew(info.WorktreeRoot) + } + opts := sync.SyncOptions{ - LocalRoot: vfs.MustNew(args[0]), - Paths: []string{"."}, - Include: nil, - Exclude: nil, + WorktreeRoot: worktreeRoot, + LocalRoot: localRoot, + Paths: []string{"."}, + Include: nil, + Exclude: nil, RemotePath: args[1], Full: f.full, @@ -75,7 +97,7 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn // The sync code will automatically create this directory if it doesn't // exist and add it to the `.gitignore` file in the root. SnapshotBasePath: filepath.Join(args[0], ".databricks"), - WorkspaceClient: root.WorkspaceClient(cmd.Context()), + WorkspaceClient: client, OutputHandler: outputHandler, } diff --git a/internal/git_fetch_test.go b/internal/git_fetch_test.go new file mode 100644 index 000000000..5dab6be76 --- /dev/null +++ b/internal/git_fetch_test.go @@ -0,0 +1,172 @@ +package internal + +import ( + "os" + "os/exec" + "path" + "path/filepath" + "testing" + + "github.com/databricks/cli/internal/acc" + "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/git" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const examplesRepoUrl = "https://github.com/databricks/bundle-examples" +const examplesRepoProvider = "gitHub" + +func assertFullGitInfo(t *testing.T, expectedRoot string, info git.RepositoryInfo) { + assert.Equal(t, "main", info.CurrentBranch) + assert.NotEmpty(t, info.LatestCommit) + assert.Equal(t, examplesRepoUrl, info.OriginURL) + assert.Equal(t, expectedRoot, info.WorktreeRoot) +} + +func assertEmptyGitInfo(t *testing.T, info git.RepositoryInfo) { + assertSparseGitInfo(t, "", info) +} + +func assertSparseGitInfo(t *testing.T, expectedRoot string, info git.RepositoryInfo) { + assert.Equal(t, "", info.CurrentBranch) + assert.Equal(t, "", info.LatestCommit) + assert.Equal(t, "", info.OriginURL) + assert.Equal(t, expectedRoot, info.WorktreeRoot) +} + +func TestAccFetchRepositoryInfoAPI_FromRepo(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + me, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + + targetPath := acc.RandomName(path.Join("/Workspace/Users", me.UserName, "/testing-clone-bundle-examples-")) + stdout, stderr := RequireSuccessfulRun(t, "repos", "create", examplesRepoUrl, examplesRepoProvider, "--path", targetPath) + t.Cleanup(func() { + RequireSuccessfulRun(t, "repos", "delete", targetPath) + }) + + assert.Empty(t, stderr.String()) + assert.NotEmpty(t, stdout.String()) + ctx = dbr.MockRuntime(ctx, true) + + for _, inputPath := range []string{ + path.Join(targetPath, "knowledge_base/dashboard_nyc_taxi"), + targetPath, + } { + t.Run(inputPath, func(t *testing.T) { + info, err := git.FetchRepositoryInfo(ctx, inputPath, wt.W) + assert.NoError(t, err) + assertFullGitInfo(t, targetPath, info) + }) + } +} + +func TestAccFetchRepositoryInfoAPI_FromNonRepo(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + me, err := wt.W.CurrentUser.Me(ctx) + require.NoError(t, err) + + rootPath := acc.RandomName(path.Join("/Workspace/Users", me.UserName, "testing-nonrepo-")) + _, stderr := RequireSuccessfulRun(t, "workspace", "mkdirs", path.Join(rootPath, "a/b/c")) + t.Cleanup(func() { + RequireSuccessfulRun(t, "workspace", "delete", "--recursive", rootPath) + }) + + assert.Empty(t, stderr.String()) + ctx = dbr.MockRuntime(ctx, true) + + tests := []struct { + input string + msg string + }{ + { + input: path.Join(rootPath, "a/b/c"), + msg: "", + }, + { + input: rootPath, + msg: "", + }, + { + input: path.Join(rootPath, "/non-existent"), + msg: "doesn't exist", + }, + } + + for _, test := range tests { + t.Run(test.input+" <==> "+test.msg, func(t *testing.T) { + info, err := git.FetchRepositoryInfo(ctx, test.input, wt.W) + if test.msg == "" { + assert.NoError(t, err) + } else { + assert.Error(t, err) + assert.Contains(t, err.Error(), test.msg) + } + assertEmptyGitInfo(t, info) + }) + } +} + +func TestAccFetchRepositoryInfoDotGit_FromGitRepo(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + repo := cloneRepoLocally(t, examplesRepoUrl) + + for _, inputPath := range []string{ + filepath.Join(repo, "knowledge_base/dashboard_nyc_taxi"), + repo, + } { + t.Run(inputPath, func(t *testing.T) { + info, err := git.FetchRepositoryInfo(ctx, inputPath, wt.W) + assert.NoError(t, err) + assertFullGitInfo(t, repo, info) + }) + } +} + +func cloneRepoLocally(t *testing.T, repoUrl string) string { + tempDir := t.TempDir() + localRoot := filepath.Join(tempDir, "repo") + + cmd := exec.Command("git", "clone", "--depth=1", examplesRepoUrl, localRoot) + err := cmd.Run() + require.NoError(t, err) + return localRoot +} + +func TestAccFetchRepositoryInfoDotGit_FromNonGitRepo(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + tempDir := t.TempDir() + root := filepath.Join(tempDir, "repo") + require.NoError(t, os.MkdirAll(filepath.Join(root, "a/b/c"), 0700)) + + tests := []string{ + filepath.Join(root, "a/b/c"), + root, + filepath.Join(root, "/non-existent"), + } + + for _, input := range tests { + t.Run(input, func(t *testing.T) { + info, err := git.FetchRepositoryInfo(ctx, input, wt.W) + assert.NoError(t, err) + assertEmptyGitInfo(t, info) + }) + } +} + +func TestAccFetchRepositoryInfoDotGit_FromBrokenGitRepo(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + tempDir := t.TempDir() + root := filepath.Join(tempDir, "repo") + path := filepath.Join(root, "a/b/c") + require.NoError(t, os.MkdirAll(path, 0700)) + require.NoError(t, os.WriteFile(filepath.Join(root, ".git"), []byte(""), 0000)) + + info, err := git.FetchRepositoryInfo(ctx, path, wt.W) + assert.NoError(t, err) + assertSparseGitInfo(t, root, info) +} diff --git a/libs/diag/diagnostic.go b/libs/diag/diagnostic.go index 254ecbd7d..a4f8c7b6b 100644 --- a/libs/diag/diagnostic.go +++ b/libs/diag/diagnostic.go @@ -53,6 +53,19 @@ func FromErr(err error) Diagnostics { } } +// FromErr returns a new warning diagnostic from the specified error, if any. +func WarningFromErr(err error) Diagnostics { + if err == nil { + return nil + } + return []Diagnostic{ + { + Severity: Warning, + Summary: err.Error(), + }, + } +} + // Warningf creates a new warning diagnostic. func Warningf(format string, args ...any) Diagnostics { return []Diagnostic{ diff --git a/libs/git/fileset.go b/libs/git/fileset.go index bb1cd4692..8391548c9 100644 --- a/libs/git/fileset.go +++ b/libs/git/fileset.go @@ -13,10 +13,10 @@ type FileSet struct { view *View } -// NewFileSet returns [FileSet] for the Git repository located at `root`. -func NewFileSet(root vfs.Path, paths ...[]string) (*FileSet, error) { +// NewFileSet returns [FileSet] for the directory `root` which is contained within Git worktree located at `worktreeRoot`. +func NewFileSet(worktreeRoot, root vfs.Path, paths ...[]string) (*FileSet, error) { fs := fileset.New(root, paths...) - v, err := NewView(root) + v, err := NewView(worktreeRoot, root) if err != nil { return nil, err } @@ -27,6 +27,10 @@ func NewFileSet(root vfs.Path, paths ...[]string) (*FileSet, error) { }, nil } +func NewFileSetAtRoot(root vfs.Path, paths ...[]string) (*FileSet, error) { + return NewFileSet(root, root, paths...) +} + func (f *FileSet) IgnoreFile(file string) (bool, error) { return f.view.IgnoreFile(file) } diff --git a/libs/git/fileset_test.go b/libs/git/fileset_test.go index 37f3611d1..f4fd931fd 100644 --- a/libs/git/fileset_test.go +++ b/libs/git/fileset_test.go @@ -12,8 +12,8 @@ import ( "github.com/stretchr/testify/require" ) -func testFileSetAll(t *testing.T, root string) { - fileSet, err := NewFileSet(vfs.MustNew(root)) +func testFileSetAll(t *testing.T, worktreeRoot, root string) { + fileSet, err := NewFileSet(vfs.MustNew(worktreeRoot), vfs.MustNew(root)) require.NoError(t, err) files, err := fileSet.Files() require.NoError(t, err) @@ -24,18 +24,28 @@ func testFileSetAll(t *testing.T, root string) { } func TestFileSetListAllInRepo(t *testing.T) { - testFileSetAll(t, "./testdata") + testFileSetAll(t, "./testdata", "./testdata") +} + +func TestFileSetListAllInRepoDifferentRoot(t *testing.T) { + testFileSetAll(t, ".", "./testdata") } func TestFileSetListAllInTempDir(t *testing.T) { - testFileSetAll(t, copyTestdata(t, "./testdata")) + dir := copyTestdata(t, "./testdata") + testFileSetAll(t, dir, dir) +} + +func TestFileSetListAllInTempDirDifferentRoot(t *testing.T) { + dir := copyTestdata(t, "./testdata") + testFileSetAll(t, filepath.Dir(dir), dir) } func TestFileSetNonCleanRoot(t *testing.T) { // Test what happens if the root directory can be simplified. // Path simplification is done by most filepath functions. // This should yield the same result as above test. - fileSet, err := NewFileSet(vfs.MustNew("./testdata/../testdata")) + fileSet, err := NewFileSetAtRoot(vfs.MustNew("./testdata/../testdata")) require.NoError(t, err) files, err := fileSet.Files() require.NoError(t, err) @@ -44,7 +54,7 @@ func TestFileSetNonCleanRoot(t *testing.T) { func TestFileSetAddsCacheDirToGitIgnore(t *testing.T) { projectDir := t.TempDir() - fileSet, err := NewFileSet(vfs.MustNew(projectDir)) + fileSet, err := NewFileSetAtRoot(vfs.MustNew(projectDir)) require.NoError(t, err) fileSet.EnsureValidGitIgnoreExists() @@ -59,7 +69,7 @@ func TestFileSetDoesNotCacheDirToGitIgnoreIfAlreadyPresent(t *testing.T) { projectDir := t.TempDir() gitIgnorePath := filepath.Join(projectDir, ".gitignore") - fileSet, err := NewFileSet(vfs.MustNew(projectDir)) + fileSet, err := NewFileSetAtRoot(vfs.MustNew(projectDir)) require.NoError(t, err) err = os.WriteFile(gitIgnorePath, []byte(".databricks"), 0o644) require.NoError(t, err) diff --git a/libs/git/info.go b/libs/git/info.go new file mode 100644 index 000000000..13c298113 --- /dev/null +++ b/libs/git/info.go @@ -0,0 +1,161 @@ +package git + +import ( + "context" + "errors" + "io/fs" + "net/http" + "os" + "path" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/dbr" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/client" +) + +type RepositoryInfo struct { + // Various metadata about the repo. Each could be "" if it could not be read. No error is returned for such case. + OriginURL string + LatestCommit string + CurrentBranch string + + // Absolute path to determined worktree root or "" if worktree root could not be determined. + WorktreeRoot string +} + +type gitInfo struct { + Branch string `json:"branch"` + HeadCommitID string `json:"head_commit_id"` + Path string `json:"path"` + URL string `json:"url"` +} + +type response struct { + GitInfo *gitInfo `json:"git_info,omitempty"` +} + +// Fetch repository information either by quering .git or by fetching it from API (for dabs-in-workspace case). +// - In case we could not find git repository, all string fields of RepositoryInfo will be "" and err will be nil. +// - If there were any errors when trying to determine git root (e.g. API call returned an error or there were permission issues +// reading the file system), all strings fields of RepositoryInfo will be "" and err will be non-nil. +// - If we could determine git worktree root but there were errors when reading metadata (origin, branch, commit), those errors +// will be logged as warnings, RepositoryInfo is guaranteed to have non-empty WorktreeRoot and other fields on best effort basis. +// - In successful case, all fields are set to proper git repository metadata. +func FetchRepositoryInfo(ctx context.Context, path string, w *databricks.WorkspaceClient) (RepositoryInfo, error) { + if strings.HasPrefix(path, "/Workspace/") && dbr.RunsOnRuntime(ctx) { + return fetchRepositoryInfoAPI(ctx, path, w) + } else { + return fetchRepositoryInfoDotGit(ctx, path) + } +} + +func fetchRepositoryInfoAPI(ctx context.Context, path string, w *databricks.WorkspaceClient) (RepositoryInfo, error) { + result := RepositoryInfo{} + + apiClient, err := client.New(w.Config) + if err != nil { + return result, err + } + + var response response + const apiEndpoint = "/api/2.0/workspace/get-status" + + err = apiClient.Do( + ctx, + http.MethodGet, + apiEndpoint, + nil, + map[string]string{ + "path": path, + "return_git_info": "true", + }, + &response, + ) + + if err != nil { + return result, err + } + + // Check if GitInfo is present and extract relevant fields + gi := response.GitInfo + if gi != nil { + fixedPath := ensureWorkspacePrefix(gi.Path) + result.OriginURL = gi.URL + result.LatestCommit = gi.HeadCommitID + result.CurrentBranch = gi.Branch + result.WorktreeRoot = fixedPath + } else { + log.Warnf(ctx, "Failed to load git info from %s", apiEndpoint) + } + + return result, nil +} + +func ensureWorkspacePrefix(p string) string { + if !strings.HasPrefix(p, "/Workspace/") { + return path.Join("/Workspace", p) + } + return p +} + +func fetchRepositoryInfoDotGit(ctx context.Context, path string) (RepositoryInfo, error) { + result := RepositoryInfo{} + + rootDir, err := findLeafInTree(path, GitDirectoryName) + if rootDir == "" { + return result, err + } + + result.WorktreeRoot = rootDir + + repo, err := NewRepository(vfs.MustNew(rootDir)) + if err != nil { + log.Warnf(ctx, "failed to read .git: %s", err) + + // return early since operations below won't work + return result, nil + } + + result.OriginURL = repo.OriginUrl() + + result.CurrentBranch, err = repo.CurrentBranch() + if err != nil { + log.Warnf(ctx, "failed to load current branch: %s", err) + } + + result.LatestCommit, err = repo.LatestCommit() + if err != nil { + log.Warnf(ctx, "failed to load latest commit: %s", err) + } + + return result, nil +} + +func findLeafInTree(p string, leafName string) (string, error) { + var err error + for i := 0; i < 10000; i++ { + _, err = os.Stat(filepath.Join(p, leafName)) + + if err == nil { + // Found [leafName] in p + return p, nil + } + + // ErrNotExist means we continue traversal up the tree. + if errors.Is(err, fs.ErrNotExist) { + parent := filepath.Dir(p) + if parent == p { + return "", nil + } + p = parent + continue + } + break + } + + return "", err +} diff --git a/libs/git/repository.go b/libs/git/repository.go index f0e9e1eb2..b94297ab9 100644 --- a/libs/git/repository.go +++ b/libs/git/repository.go @@ -1,9 +1,7 @@ package git import ( - "errors" "fmt" - "io/fs" "net/url" "path" "path/filepath" @@ -204,17 +202,7 @@ func (r *Repository) Ignore(relPath string) (bool, error) { return false, nil } -func NewRepository(path vfs.Path) (*Repository, error) { - rootDir, err := vfs.FindLeafInTree(path, GitDirectoryName) - if err != nil { - if !errors.Is(err, fs.ErrNotExist) { - return nil, err - } - // Cannot find `.git` directory. - // Treat the specified path as a potential repository root checkout. - rootDir = path - } - +func NewRepository(rootDir vfs.Path) (*Repository, error) { // Derive $GIT_DIR and $GIT_COMMON_DIR paths if this is a real repository. // If it isn't a real repository, they'll point to the (non-existent) `.git` directory. gitDir, gitCommonDir, err := resolveGitDirs(rootDir) diff --git a/libs/git/view.go b/libs/git/view.go index 2d2e39a60..2eaba1f8b 100644 --- a/libs/git/view.go +++ b/libs/git/view.go @@ -72,8 +72,8 @@ func (v *View) IgnoreDirectory(dir string) (bool, error) { return v.Ignore(dir + "/") } -func NewView(root vfs.Path) (*View, error) { - repo, err := NewRepository(root) +func NewView(worktreeRoot, root vfs.Path) (*View, error) { + repo, err := NewRepository(worktreeRoot) if err != nil { return nil, err } @@ -96,6 +96,10 @@ func NewView(root vfs.Path) (*View, error) { }, nil } +func NewViewAtRoot(root vfs.Path) (*View, error) { + return NewView(root, root) +} + func (v *View) EnsureValidGitIgnoreExists() error { ign, err := v.IgnoreDirectory(".databricks") if err != nil { diff --git a/libs/git/view_test.go b/libs/git/view_test.go index 76fba3458..06f6f9419 100644 --- a/libs/git/view_test.go +++ b/libs/git/view_test.go @@ -90,19 +90,19 @@ func testViewAtRoot(t *testing.T, tv testView) { } func TestViewRootInBricksRepo(t *testing.T) { - v, err := NewView(vfs.MustNew("./testdata")) + v, err := NewViewAtRoot(vfs.MustNew("./testdata")) require.NoError(t, err) testViewAtRoot(t, testView{t, v}) } func TestViewRootInTempRepo(t *testing.T) { - v, err := NewView(vfs.MustNew(createFakeRepo(t, "testdata"))) + v, err := NewViewAtRoot(vfs.MustNew(createFakeRepo(t, "testdata"))) require.NoError(t, err) testViewAtRoot(t, testView{t, v}) } func TestViewRootInTempDir(t *testing.T) { - v, err := NewView(vfs.MustNew(copyTestdata(t, "testdata"))) + v, err := NewViewAtRoot(vfs.MustNew(copyTestdata(t, "testdata"))) require.NoError(t, err) testViewAtRoot(t, testView{t, v}) } @@ -125,20 +125,21 @@ func testViewAtA(t *testing.T, tv testView) { } func TestViewAInBricksRepo(t *testing.T) { - v, err := NewView(vfs.MustNew("./testdata/a")) + v, err := NewView(vfs.MustNew("."), vfs.MustNew("./testdata/a")) require.NoError(t, err) testViewAtA(t, testView{t, v}) } func TestViewAInTempRepo(t *testing.T) { - v, err := NewView(vfs.MustNew(filepath.Join(createFakeRepo(t, "testdata"), "a"))) + repo := createFakeRepo(t, "testdata") + v, err := NewView(vfs.MustNew(repo), vfs.MustNew(filepath.Join(repo, "a"))) require.NoError(t, err) testViewAtA(t, testView{t, v}) } func TestViewAInTempDir(t *testing.T) { // Since this is not a fake repo it should not traverse up the tree. - v, err := NewView(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a"))) + v, err := NewViewAtRoot(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a"))) require.NoError(t, err) tv := testView{t, v} @@ -175,20 +176,21 @@ func testViewAtAB(t *testing.T, tv testView) { } func TestViewABInBricksRepo(t *testing.T) { - v, err := NewView(vfs.MustNew("./testdata/a/b")) + v, err := NewView(vfs.MustNew("."), vfs.MustNew("./testdata/a/b")) require.NoError(t, err) testViewAtAB(t, testView{t, v}) } func TestViewABInTempRepo(t *testing.T) { - v, err := NewView(vfs.MustNew(filepath.Join(createFakeRepo(t, "testdata"), "a", "b"))) + repo := createFakeRepo(t, "testdata") + v, err := NewView(vfs.MustNew(repo), vfs.MustNew(filepath.Join(repo, "a", "b"))) require.NoError(t, err) testViewAtAB(t, testView{t, v}) } func TestViewABInTempDir(t *testing.T) { // Since this is not a fake repo it should not traverse up the tree. - v, err := NewView(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a", "b"))) + v, err := NewViewAtRoot(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a", "b"))) tv := testView{t, v} require.NoError(t, err) @@ -215,7 +217,7 @@ func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredAtRoot(t *testing.T) // Since root .gitignore already has .databricks, there should be no edits // to root .gitignore - v, err := NewView(vfs.MustNew(repoPath)) + v, err := NewViewAtRoot(vfs.MustNew(repoPath)) require.NoError(t, err) err = v.EnsureValidGitIgnoreExists() @@ -235,7 +237,7 @@ func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredInSubdir(t *testing.T // Since root .gitignore already has .databricks, there should be no edits // to a/.gitignore - v, err := NewView(vfs.MustNew(filepath.Join(repoPath, "a"))) + v, err := NewView(vfs.MustNew(repoPath), vfs.MustNew(filepath.Join(repoPath, "a"))) require.NoError(t, err) err = v.EnsureValidGitIgnoreExists() @@ -253,7 +255,7 @@ func TestViewAddsGitignoreWithCacheDir(t *testing.T) { assert.NoError(t, err) // Since root .gitignore was deleted, new view adds .databricks to root .gitignore - v, err := NewView(vfs.MustNew(repoPath)) + v, err := NewViewAtRoot(vfs.MustNew(repoPath)) require.NoError(t, err) err = v.EnsureValidGitIgnoreExists() @@ -271,7 +273,7 @@ func TestViewAddsGitignoreWithCacheDirAtSubdir(t *testing.T) { require.NoError(t, err) // Since root .gitignore was deleted, new view adds .databricks to a/.gitignore - v, err := NewView(vfs.MustNew(filepath.Join(repoPath, "a"))) + v, err := NewView(vfs.MustNew(repoPath), vfs.MustNew(filepath.Join(repoPath, "a"))) require.NoError(t, err) err = v.EnsureValidGitIgnoreExists() @@ -288,7 +290,7 @@ func TestViewAddsGitignoreWithCacheDirAtSubdir(t *testing.T) { func TestViewAlwaysIgnoresCacheDir(t *testing.T) { repoPath := createFakeRepo(t, "testdata") - v, err := NewView(vfs.MustNew(repoPath)) + v, err := NewViewAtRoot(vfs.MustNew(repoPath)) require.NoError(t, err) err = v.EnsureValidGitIgnoreExists() diff --git a/libs/sync/snapshot_test.go b/libs/sync/snapshot_test.go index b7830406d..eef526e58 100644 --- a/libs/sync/snapshot_test.go +++ b/libs/sync/snapshot_test.go @@ -30,7 +30,7 @@ func TestDiff(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) + fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ @@ -94,7 +94,7 @@ func TestSymlinkDiff(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) + fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ @@ -125,7 +125,7 @@ func TestFolderDiff(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) + fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ @@ -170,7 +170,7 @@ func TestPythonNotebookDiff(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) + fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ @@ -245,7 +245,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) + fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ @@ -282,7 +282,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { // Create temp project dir projectDir := t.TempDir() - fileSet, err := git.NewFileSet(vfs.MustNew(projectDir)) + fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir)) require.NoError(t, err) state := Snapshot{ SnapshotState: &SnapshotState{ diff --git a/libs/sync/sync.go b/libs/sync/sync.go index cc9c73944..6bd26f224 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -19,10 +19,11 @@ import ( type OutputHandler func(context.Context, <-chan Event) type SyncOptions struct { - LocalRoot vfs.Path - Paths []string - Include []string - Exclude []string + WorktreeRoot vfs.Path + LocalRoot vfs.Path + Paths []string + Include []string + Exclude []string RemotePath string @@ -62,7 +63,7 @@ type Sync struct { // New initializes and returns a new [Sync] instance. func New(ctx context.Context, opts SyncOptions) (*Sync, error) { - fileSet, err := git.NewFileSet(opts.LocalRoot, opts.Paths) + fileSet, err := git.NewFileSet(opts.WorktreeRoot, opts.LocalRoot, opts.Paths) if err != nil { return nil, err } diff --git a/libs/sync/sync_test.go b/libs/sync/sync_test.go index 2d800f466..6168dc217 100644 --- a/libs/sync/sync_test.go +++ b/libs/sync/sync_test.go @@ -37,7 +37,7 @@ func TestGetFileSet(t *testing.T) { dir := setupFiles(t) root := vfs.MustNew(dir) - fileSet, err := git.NewFileSet(root) + fileSet, err := git.NewFileSetAtRoot(root) require.NoError(t, err) err = fileSet.EnsureValidGitIgnoreExists() @@ -103,7 +103,7 @@ func TestRecursiveExclude(t *testing.T) { dir := setupFiles(t) root := vfs.MustNew(dir) - fileSet, err := git.NewFileSet(root) + fileSet, err := git.NewFileSetAtRoot(root) require.NoError(t, err) err = fileSet.EnsureValidGitIgnoreExists() @@ -133,7 +133,7 @@ func TestNegateExclude(t *testing.T) { dir := setupFiles(t) root := vfs.MustNew(dir) - fileSet, err := git.NewFileSet(root) + fileSet, err := git.NewFileSetAtRoot(root) require.NoError(t, err) err = fileSet.EnsureValidGitIgnoreExists()