Properly read git metadata when inside Workspace

Since there is no .git directory in Workspace file system, we need to make
an API call to fetch git checkout status (root of the repo, current branch, etc).
(api/2.0/workspace/get-status?return_git_info=true).

Refactor Repository to accept repository root rather than calculate it.
This helps, because Repository is currently created in multiple places and
finding the repository root is expensive.
This commit is contained in:
Denis Bilenko 2024-11-27 14:29:18 +01:00
parent 00bd98f898
commit c3b02d9321
15 changed files with 217 additions and 83 deletions

View File

@ -48,6 +48,9 @@ type Bundle struct {
// Exclusively use this field for filesystem operations.
SyncRoot vfs.Path
// Path to root of git worktree
WorktreeRoot vfs.Path
// Config contains the bundle configuration.
// It is loaded from the bundle configuration files and mutators may update it.
Config config.Root

View File

@ -32,6 +32,10 @@ func (r ReadOnlyBundle) SyncRoot() vfs.Path {
return r.b.SyncRoot
}
func (r ReadOnlyBundle) WorktreeRoot() vfs.Path {
return r.b.WorktreeRoot
}
func (r ReadOnlyBundle) WorkspaceClient() *databricks.WorkspaceClient {
return r.b.WorkspaceClient()
}

View File

@ -7,7 +7,6 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/log"
)
type loadGitDetails struct{}
@ -21,38 +20,28 @@ func (m *loadGitDetails) Name() string {
}
func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
// Load relevant git repository
repo, err := git.NewRepository(b.BundleRoot)
info, err := git.FetchRepositoryInfo(ctx, b.BundleRoot, b.WorkspaceClient())
if err != nil {
return diag.FromErr(err)
}
// Read branch name of current checkout
branch, err := repo.CurrentBranch()
if err == nil {
b.Config.Bundle.Git.ActualBranch = branch
if b.Config.Bundle.Git.Branch == "" {
// Only load branch if there's no user defined value
b.Config.Bundle.Git.Inferred = true
b.Config.Bundle.Git.Branch = branch
}
} else {
log.Warnf(ctx, "failed to load current branch: %s", err)
b.WorktreeRoot = info.WorktreeRoot
b.Config.Bundle.Git.ActualBranch = info.CurrentBranch
if b.Config.Bundle.Git.Branch == "" {
// Only load branch if there's no user defined value
b.Config.Bundle.Git.Inferred = true
b.Config.Bundle.Git.Branch = info.CurrentBranch
}
// load commit hash if undefined
if b.Config.Bundle.Git.Commit == "" {
commit, err := repo.LatestCommit()
if err != nil {
log.Warnf(ctx, "failed to load latest commit: %s", err)
} else {
b.Config.Bundle.Git.Commit = commit
}
b.Config.Bundle.Git.Commit = info.LatestCommit
}
// load origin url if undefined
if b.Config.Bundle.Git.OriginURL == "" {
remoteUrl := repo.OriginUrl()
b.Config.Bundle.Git.OriginURL = remoteUrl
b.Config.Bundle.Git.OriginURL = info.OriginURL
}
// Compute relative path of the bundle root from the Git repo root.
@ -60,11 +49,12 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagn
if err != nil {
return diag.FromErr(err)
}
// repo.Root() returns the absolute path of the repo
relBundlePath, err := filepath.Rel(repo.Root(), absBundlePath)
relBundlePath, err := filepath.Rel(info.WorktreeRoot.Native(), absBundlePath)
if err != nil {
return diag.FromErr(err)
}
b.Config.Bundle.Git.BundleRootPath = filepath.ToSlash(relBundlePath)
return nil
}

View File

@ -44,6 +44,7 @@ func setupBundleForFilesToSyncTest(t *testing.T) *bundle.Bundle {
BundleRoot: vfs.MustNew(dir),
SyncRootPath: dir,
SyncRoot: vfs.MustNew(dir),
WorktreeRoot: vfs.MustNew(dir),
Config: config.Root{
Bundle: config.Bundle{
Target: "default",

View File

@ -28,10 +28,11 @@ func GetSyncOptions(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.SyncOp
}
opts := &sync.SyncOptions{
LocalRoot: rb.SyncRoot(),
Paths: rb.Config().Sync.Paths,
Include: includes,
Exclude: rb.Config().Sync.Exclude,
WorktreeRoot: rb.WorktreeRoot(),
LocalRoot: rb.SyncRoot(),
Paths: rb.Config().Sync.Paths,
Include: includes,
Exclude: rb.Config().Sync.Exclude,
RemotePath: rb.Config().Workspace.FilePath,
Host: rb.WorkspaceClient().Config.Host,

View File

@ -12,6 +12,7 @@ import (
"github.com/databricks/cli/bundle/deploy/files"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/flags"
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/sync"
"github.com/databricks/cli/libs/vfs"
"github.com/spf13/cobra"
@ -37,6 +38,7 @@ func (f *syncFlags) syncOptionsFromBundle(cmd *cobra.Command, args []string, b *
opts.Full = f.full
opts.PollInterval = f.interval
opts.WorktreeRoot = b.WorktreeRoot
return opts, nil
}
@ -60,11 +62,21 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn
}
}
ctx := cmd.Context()
client := root.WorkspaceClient(ctx)
localRoot := vfs.MustNew(args[0])
info, err := git.FetchRepositoryInfo(ctx, localRoot, client)
if err != nil {
return nil, err
}
opts := sync.SyncOptions{
LocalRoot: vfs.MustNew(args[0]),
Paths: []string{"."},
Include: nil,
Exclude: nil,
WorktreeRoot: info.WorktreeRoot,
LocalRoot: localRoot,
Paths: []string{"."},
Include: nil,
Exclude: nil,
RemotePath: args[1],
Full: f.full,
@ -75,7 +87,7 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn
// The sync code will automatically create this directory if it doesn't
// exist and add it to the `.gitignore` file in the root.
SnapshotBasePath: filepath.Join(args[0], ".databricks"),
WorkspaceClient: root.WorkspaceClient(cmd.Context()),
WorkspaceClient: client,
OutputHandler: outputHandler,
}

View File

@ -13,10 +13,10 @@ type FileSet struct {
view *View
}
// NewFileSet returns [FileSet] for the Git repository located at `root`.
func NewFileSet(root vfs.Path, paths ...[]string) (*FileSet, error) {
// NewFileSet returns [FileSet] for the directory `root` which is contained within Git repository located at `worktreeRoot`.
func NewFileSet(worktreeRoot, root vfs.Path, paths ...[]string) (*FileSet, error) {
fs := fileset.New(root, paths...)
v, err := NewView(root)
v, err := NewView(worktreeRoot, root)
if err != nil {
return nil, err
}
@ -27,6 +27,10 @@ func NewFileSet(root vfs.Path, paths ...[]string) (*FileSet, error) {
}, nil
}
func NewFileSetAtRoot(root vfs.Path, paths ...[]string) (*FileSet, error) {
return NewFileSet(root, root, paths...)
}
func (f *FileSet) IgnoreFile(file string) (bool, error) {
return f.view.IgnoreFile(file)
}

View File

@ -13,7 +13,7 @@ import (
)
func testFileSetAll(t *testing.T, root string) {
fileSet, err := NewFileSet(vfs.MustNew(root))
fileSet, err := NewFileSetAtRoot(vfs.MustNew(root))
require.NoError(t, err)
files, err := fileSet.Files()
require.NoError(t, err)
@ -35,7 +35,7 @@ func TestFileSetNonCleanRoot(t *testing.T) {
// Test what happens if the root directory can be simplified.
// Path simplification is done by most filepath functions.
// This should yield the same result as above test.
fileSet, err := NewFileSet(vfs.MustNew("./testdata/../testdata"))
fileSet, err := NewFileSetAtRoot(vfs.MustNew("./testdata/../testdata"))
require.NoError(t, err)
files, err := fileSet.Files()
require.NoError(t, err)
@ -44,7 +44,7 @@ func TestFileSetNonCleanRoot(t *testing.T) {
func TestFileSetAddsCacheDirToGitIgnore(t *testing.T) {
projectDir := t.TempDir()
fileSet, err := NewFileSet(vfs.MustNew(projectDir))
fileSet, err := NewFileSetAtRoot(vfs.MustNew(projectDir))
require.NoError(t, err)
fileSet.EnsureValidGitIgnoreExists()
@ -59,7 +59,7 @@ func TestFileSetDoesNotCacheDirToGitIgnoreIfAlreadyPresent(t *testing.T) {
projectDir := t.TempDir()
gitIgnorePath := filepath.Join(projectDir, ".gitignore")
fileSet, err := NewFileSet(vfs.MustNew(projectDir))
fileSet, err := NewFileSetAtRoot(vfs.MustNew(projectDir))
require.NoError(t, err)
err = os.WriteFile(gitIgnorePath, []byte(".databricks"), 0o644)
require.NoError(t, err)

124
libs/git/info.go Normal file
View File

@ -0,0 +1,124 @@
package git
import (
"context"
"errors"
"io/fs"
"net/http"
"strings"
"github.com/databricks/cli/libs/dbr"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/vfs"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/client"
)
type GitRepositoryInfo struct {
OriginURL string
LatestCommit string
CurrentBranch string
WorktreeRoot vfs.Path
}
type gitInfo struct {
Branch string `json:"branch"`
HeadCommitID string `json:"head_commit_id"`
Path string `json:"path"`
URL string `json:"url"`
}
type response struct {
GitInfo *gitInfo `json:"git_info,omitempty"`
}
func FetchRepositoryInfo(ctx context.Context, path vfs.Path, w *databricks.WorkspaceClient) (GitRepositoryInfo, error) {
if strings.HasPrefix(path.Native(), "/Workspace/") && dbr.RunsOnRuntime(ctx) {
return FetchRepositoryInfoAPI(ctx, path, w)
} else {
return FetchRepositoryInfoDotGit(ctx, path)
}
}
func FetchRepositoryInfoAPI(ctx context.Context, path vfs.Path, w *databricks.WorkspaceClient) (GitRepositoryInfo, error) {
apiClient, err := client.New(w.Config)
if err != nil {
return GitRepositoryInfo{}, err
}
var response response
const apiEndpoint = "/api/2.0/workspace/get-status"
err = apiClient.Do(
ctx,
http.MethodGet,
apiEndpoint,
nil,
map[string]string{
"path": path.Native(),
"return_git_info": "true",
},
&response,
)
if err != nil {
return GitRepositoryInfo{}, err
}
// Check if GitInfo is present and extract relevant fields
gi := response.GitInfo
if gi == nil {
log.Warnf(ctx, "Failed to load git info from %s", apiEndpoint)
} else {
fixedPath := fixResponsePath(gi.Path)
return GitRepositoryInfo{
OriginURL: gi.URL,
LatestCommit: gi.HeadCommitID,
CurrentBranch: gi.Branch,
WorktreeRoot: vfs.MustNew(fixedPath),
}, nil
}
return GitRepositoryInfo{
WorktreeRoot: path,
}, nil
}
func fixResponsePath(path string) string {
if !strings.HasPrefix(path, "/Workspace/") {
return "/Workspace/" + path
}
return path
}
func FetchRepositoryInfoDotGit(ctx context.Context, path vfs.Path) (GitRepositoryInfo, error) {
rootDir, err := vfs.FindLeafInTree(path, GitDirectoryName)
if err != nil {
if !errors.Is(err, fs.ErrNotExist) {
return GitRepositoryInfo{}, err
}
rootDir = path
}
repo, err := NewRepository(rootDir)
if err != nil {
return GitRepositoryInfo{}, err
}
branch, err := repo.CurrentBranch()
if err != nil {
return GitRepositoryInfo{}, nil
}
commit, err := repo.LatestCommit()
if err != nil {
return GitRepositoryInfo{}, nil
}
return GitRepositoryInfo{
OriginURL: repo.OriginUrl(),
LatestCommit: commit,
CurrentBranch: branch,
WorktreeRoot: rootDir,
}, nil
}

View File

@ -1,9 +1,7 @@
package git
import (
"errors"
"fmt"
"io/fs"
"net/url"
"path"
"path/filepath"
@ -204,17 +202,7 @@ func (r *Repository) Ignore(relPath string) (bool, error) {
return false, nil
}
func NewRepository(path vfs.Path) (*Repository, error) {
rootDir, err := vfs.FindLeafInTree(path, GitDirectoryName)
if err != nil {
if !errors.Is(err, fs.ErrNotExist) {
return nil, err
}
// Cannot find `.git` directory.
// Treat the specified path as a potential repository root checkout.
rootDir = path
}
func NewRepository(rootDir vfs.Path) (*Repository, error) {
// Derive $GIT_DIR and $GIT_COMMON_DIR paths if this is a real repository.
// If it isn't a real repository, they'll point to the (non-existent) `.git` directory.
gitDir, gitCommonDir, err := resolveGitDirs(rootDir)

View File

@ -72,8 +72,8 @@ func (v *View) IgnoreDirectory(dir string) (bool, error) {
return v.Ignore(dir + "/")
}
func NewView(root vfs.Path) (*View, error) {
repo, err := NewRepository(root)
func NewView(worktreeRoot, root vfs.Path) (*View, error) {
repo, err := NewRepository(worktreeRoot)
if err != nil {
return nil, err
}
@ -96,6 +96,10 @@ func NewView(root vfs.Path) (*View, error) {
}, nil
}
func NewViewAtRoot(root vfs.Path) (*View, error) {
return NewView(root, root)
}
func (v *View) EnsureValidGitIgnoreExists() error {
ign, err := v.IgnoreDirectory(".databricks")
if err != nil {

View File

@ -90,19 +90,19 @@ func testViewAtRoot(t *testing.T, tv testView) {
}
func TestViewRootInBricksRepo(t *testing.T) {
v, err := NewView(vfs.MustNew("./testdata"))
v, err := NewViewAtRoot(vfs.MustNew("./testdata"))
require.NoError(t, err)
testViewAtRoot(t, testView{t, v})
}
func TestViewRootInTempRepo(t *testing.T) {
v, err := NewView(vfs.MustNew(createFakeRepo(t, "testdata")))
v, err := NewViewAtRoot(vfs.MustNew(createFakeRepo(t, "testdata")))
require.NoError(t, err)
testViewAtRoot(t, testView{t, v})
}
func TestViewRootInTempDir(t *testing.T) {
v, err := NewView(vfs.MustNew(copyTestdata(t, "testdata")))
v, err := NewViewAtRoot(vfs.MustNew(copyTestdata(t, "testdata")))
require.NoError(t, err)
testViewAtRoot(t, testView{t, v})
}
@ -125,20 +125,21 @@ func testViewAtA(t *testing.T, tv testView) {
}
func TestViewAInBricksRepo(t *testing.T) {
v, err := NewView(vfs.MustNew("./testdata/a"))
v, err := NewView(vfs.MustNew("."), vfs.MustNew("./testdata/a"))
require.NoError(t, err)
testViewAtA(t, testView{t, v})
}
func TestViewAInTempRepo(t *testing.T) {
v, err := NewView(vfs.MustNew(filepath.Join(createFakeRepo(t, "testdata"), "a")))
repo := createFakeRepo(t, "testdata")
v, err := NewView(vfs.MustNew(repo), vfs.MustNew(filepath.Join(repo, "a")))
require.NoError(t, err)
testViewAtA(t, testView{t, v})
}
func TestViewAInTempDir(t *testing.T) {
// Since this is not a fake repo it should not traverse up the tree.
v, err := NewView(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a")))
v, err := NewViewAtRoot(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a")))
require.NoError(t, err)
tv := testView{t, v}
@ -175,20 +176,21 @@ func testViewAtAB(t *testing.T, tv testView) {
}
func TestViewABInBricksRepo(t *testing.T) {
v, err := NewView(vfs.MustNew("./testdata/a/b"))
v, err := NewView(vfs.MustNew("."), vfs.MustNew("./testdata/a/b"))
require.NoError(t, err)
testViewAtAB(t, testView{t, v})
}
func TestViewABInTempRepo(t *testing.T) {
v, err := NewView(vfs.MustNew(filepath.Join(createFakeRepo(t, "testdata"), "a", "b")))
repo := createFakeRepo(t, "testdata")
v, err := NewView(vfs.MustNew(repo), vfs.MustNew(filepath.Join(repo, "a", "b")))
require.NoError(t, err)
testViewAtAB(t, testView{t, v})
}
func TestViewABInTempDir(t *testing.T) {
// Since this is not a fake repo it should not traverse up the tree.
v, err := NewView(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a", "b")))
v, err := NewViewAtRoot(vfs.MustNew(filepath.Join(copyTestdata(t, "testdata"), "a", "b")))
tv := testView{t, v}
require.NoError(t, err)
@ -215,7 +217,7 @@ func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredAtRoot(t *testing.T)
// Since root .gitignore already has .databricks, there should be no edits
// to root .gitignore
v, err := NewView(vfs.MustNew(repoPath))
v, err := NewViewAtRoot(vfs.MustNew(repoPath))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
@ -235,7 +237,7 @@ func TestViewDoesNotChangeGitignoreIfCacheDirAlreadyIgnoredInSubdir(t *testing.T
// Since root .gitignore already has .databricks, there should be no edits
// to a/.gitignore
v, err := NewView(vfs.MustNew(filepath.Join(repoPath, "a")))
v, err := NewView(vfs.MustNew(repoPath), vfs.MustNew(filepath.Join(repoPath, "a")))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
@ -253,7 +255,7 @@ func TestViewAddsGitignoreWithCacheDir(t *testing.T) {
assert.NoError(t, err)
// Since root .gitignore was deleted, new view adds .databricks to root .gitignore
v, err := NewView(vfs.MustNew(repoPath))
v, err := NewViewAtRoot(vfs.MustNew(repoPath))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
@ -271,7 +273,7 @@ func TestViewAddsGitignoreWithCacheDirAtSubdir(t *testing.T) {
require.NoError(t, err)
// Since root .gitignore was deleted, new view adds .databricks to a/.gitignore
v, err := NewView(vfs.MustNew(filepath.Join(repoPath, "a")))
v, err := NewView(vfs.MustNew(repoPath), vfs.MustNew(filepath.Join(repoPath, "a")))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()
@ -288,7 +290,7 @@ func TestViewAddsGitignoreWithCacheDirAtSubdir(t *testing.T) {
func TestViewAlwaysIgnoresCacheDir(t *testing.T) {
repoPath := createFakeRepo(t, "testdata")
v, err := NewView(vfs.MustNew(repoPath))
v, err := NewViewAtRoot(vfs.MustNew(repoPath))
require.NoError(t, err)
err = v.EnsureValidGitIgnoreExists()

View File

@ -30,7 +30,7 @@ func TestDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{
@ -94,7 +94,7 @@ func TestSymlinkDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{
@ -125,7 +125,7 @@ func TestFolderDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{
@ -170,7 +170,7 @@ func TestPythonNotebookDiff(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{
@ -245,7 +245,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{
@ -282,7 +282,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) {
// Create temp project dir
projectDir := t.TempDir()
fileSet, err := git.NewFileSet(vfs.MustNew(projectDir))
fileSet, err := git.NewFileSetAtRoot(vfs.MustNew(projectDir))
require.NoError(t, err)
state := Snapshot{
SnapshotState: &SnapshotState{

View File

@ -19,10 +19,11 @@ import (
type OutputHandler func(context.Context, <-chan Event)
type SyncOptions struct {
LocalRoot vfs.Path
Paths []string
Include []string
Exclude []string
WorktreeRoot vfs.Path
LocalRoot vfs.Path
Paths []string
Include []string
Exclude []string
RemotePath string
@ -62,7 +63,7 @@ type Sync struct {
// New initializes and returns a new [Sync] instance.
func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
fileSet, err := git.NewFileSet(opts.LocalRoot, opts.Paths)
fileSet, err := git.NewFileSet(opts.WorktreeRoot, opts.LocalRoot, opts.Paths)
if err != nil {
return nil, err
}

View File

@ -37,7 +37,7 @@ func TestGetFileSet(t *testing.T) {
dir := setupFiles(t)
root := vfs.MustNew(dir)
fileSet, err := git.NewFileSet(root)
fileSet, err := git.NewFileSetAtRoot(root)
require.NoError(t, err)
err = fileSet.EnsureValidGitIgnoreExists()
@ -103,7 +103,7 @@ func TestRecursiveExclude(t *testing.T) {
dir := setupFiles(t)
root := vfs.MustNew(dir)
fileSet, err := git.NewFileSet(root)
fileSet, err := git.NewFileSetAtRoot(root)
require.NoError(t, err)
err = fileSet.EnsureValidGitIgnoreExists()
@ -133,7 +133,7 @@ func TestNegateExclude(t *testing.T) {
dir := setupFiles(t)
root := vfs.MustNew(dir)
fileSet, err := git.NewFileSet(root)
fileSet, err := git.NewFileSetAtRoot(root)
require.NoError(t, err)
err = fileSet.EnsureValidGitIgnoreExists()