diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index c38013e2..36994076 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -24,4 +24,8 @@ type Bundle struct { // Lock configures locking behavior on deployment. Lock Lock `json:"lock" bundle:"readonly"` + + // Contains Git information like current commit, current branch and + // origin url. Automatically loaded by reading .git directory if not specified + Git Git `json:"git"` } diff --git a/bundle/config/git.go b/bundle/config/git.go new file mode 100644 index 00000000..7ada8dfb --- /dev/null +++ b/bundle/config/git.go @@ -0,0 +1,7 @@ +package config + +type Git struct { + Branch string `json:"branch,omitempty"` + OriginURL string `json:"origin_url,omitempty"` + Commit string `json:"commit,omitempty" bundle:"readonly"` +} diff --git a/bundle/config/mutator/load_git_details.go b/bundle/config/mutator/load_git_details.go new file mode 100644 index 00000000..269cd1f5 --- /dev/null +++ b/bundle/config/mutator/load_git_details.go @@ -0,0 +1,51 @@ +package mutator + +import ( + "context" + + "github.com/databricks/bricks/bundle" + "github.com/databricks/bricks/libs/git" + "github.com/databricks/bricks/libs/log" +) + +type loadGitDetails struct{} + +func LoadGitDetails() *loadGitDetails { + return &loadGitDetails{} +} + +func (m *loadGitDetails) Name() string { + return "LoadGitDetails" +} + +func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) ([]bundle.Mutator, error) { + // Load relevant git repository + repo, err := git.NewRepository(b.Config.Path) + if err != nil { + return nil, err + } + // load branch name if undefined + if b.Config.Bundle.Git.Branch == "" { + branch, err := repo.CurrentBranch() + if err != nil { + log.Warnf(ctx, "failed to load current branch: %s", err) + } else { + b.Config.Bundle.Git.Branch = branch + } + } + // load commit hash if undefined + if b.Config.Bundle.Git.Commit == "" { + commit, err := repo.LatestCommit() + if err != nil { + log.Warnf(ctx, "failed to load latest commit: %s", err) + } else { + b.Config.Bundle.Git.Commit = commit + } + } + // load origin url if undefined + if b.Config.Bundle.Git.OriginURL == "" { + remoteUrl := repo.OriginUrl() + b.Config.Bundle.Git.OriginURL = remoteUrl + } + return nil, nil +} diff --git a/bundle/config/mutator/mutator.go b/bundle/config/mutator/mutator.go index 99e1bffe..b24a34ba 100644 --- a/bundle/config/mutator/mutator.go +++ b/bundle/config/mutator/mutator.go @@ -9,6 +9,7 @@ func DefaultMutators() []bundle.Mutator { DefineDefaultInclude(), ProcessRootIncludes(), DefineDefaultEnvironment(), + LoadGitDetails(), } } diff --git a/bundle/tests/autoload_git/bundle.yml b/bundle/tests/autoload_git/bundle.yml new file mode 100644 index 00000000..d0e1de60 --- /dev/null +++ b/bundle/tests/autoload_git/bundle.yml @@ -0,0 +1,4 @@ +bundle: + name: autoload git config test + git: + branch: foo diff --git a/bundle/tests/autoload_git_test.go b/bundle/tests/autoload_git_test.go new file mode 100644 index 00000000..dd456a5c --- /dev/null +++ b/bundle/tests/autoload_git_test.go @@ -0,0 +1,15 @@ +package config_tests + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGitConfig(t *testing.T) { + b := load(t, "./autoload_git") + assert.Equal(t, "foo", b.Config.Bundle.Git.Branch) + sshUrl := "git@github.com:databricks/bricks.git" + httpsUrl := "https://github.com/databricks/bricks" + assert.Contains(t, []string{sshUrl, httpsUrl}, b.Config.Bundle.Git.OriginURL) +} diff --git a/libs/git/reference.go b/libs/git/reference.go new file mode 100644 index 00000000..66bd5403 --- /dev/null +++ b/libs/git/reference.go @@ -0,0 +1,96 @@ +package git + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strings" +) + +type ReferenceType string + +var ErrNotAReferencePointer = fmt.Errorf("HEAD does not point to another reference") +var ErrNotABranch = fmt.Errorf("HEAD is not a reference to a git branch") + +const ( + // pointer to a secondary reference file path containing sha-1 object ID. + // eg: `ref: refs/heads/my-branch-name` + ReferenceTypePointer = ReferenceType("pointer") + // A hexadecimal encoded SHA1 hash + ReferenceTypeSHA1 = ReferenceType("sha-1") +) + +// relevant documentation about git references: +// https://git-scm.com/book/en/v2/Git-Internals-Git-References +type Reference struct { + Type ReferenceType + Content string +} + +const ReferencePrefix = "ref: " +const HeadPathPrefix = "refs/heads/" + +// asserts if a string is a 40 character hexadecimal encoded string +func isSHA1(s string) bool { + re := regexp.MustCompile("^[0-9a-f]{40}$") + return re.MatchString(s) +} + +func LoadReferenceFile(path string) (*Reference, error) { + // read referebce file content + b, err := os.ReadFile(path) + if os.IsNotExist(err) { + return nil, nil + } + if err != nil { + return nil, err + } + + // trim new line characters + content := strings.TrimRight(string(b), "\r\n") + + // determine HEAD type + var refType ReferenceType + switch { + case strings.HasPrefix(content, ReferencePrefix): + refType = ReferenceTypePointer + case isSHA1(content): + refType = ReferenceTypeSHA1 + default: + return nil, fmt.Errorf("unknown format for git HEAD: %s", content) + } + + return &Reference{ + Type: refType, + Content: content, + }, nil +} + +// resolves the path to the secondary reference file pointd to. eg: if the file +// contents are `ref: a/b/c`, then this function returns `a/b/c` +func (ref *Reference) ResolvePath() (string, error) { + if ref.Type != ReferenceTypePointer { + return "", ErrNotAReferencePointer + } + refPath := strings.TrimPrefix(ref.Content, ReferencePrefix) + return filepath.FromSlash(refPath), nil +} + +// resolves the name of the current branch from the reference file content. For example +// `ref: refs/heads/my-branch` returns `my-branch` +func (ref *Reference) CurrentBranch() (string, error) { + branchRefPath, err := ref.ResolvePath() + if err == ErrNotAReferencePointer { + return "", ErrNotABranch + } + if err != nil { + return "", err + } + // normalize branch ref path to work accross different operating systems + branchRefPath = filepath.ToSlash(branchRefPath) + if !strings.HasPrefix(branchRefPath, HeadPathPrefix) { + return "", fmt.Errorf("reference path %s does not have expected prefix %s", branchRefPath, HeadPathPrefix) + } + return strings.TrimPrefix(branchRefPath, HeadPathPrefix), nil +} diff --git a/libs/git/reference_test.go b/libs/git/reference_test.go new file mode 100644 index 00000000..1b08e989 --- /dev/null +++ b/libs/git/reference_test.go @@ -0,0 +1,100 @@ +package git + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestReferenceReferencePathForObjectID(t *testing.T) { + ref := &Reference{ + Type: ReferenceTypeSHA1, + Content: strings.Repeat("a", 40), + } + _, err := ref.ResolvePath() + assert.ErrorIs(t, err, ErrNotAReferencePointer) +} + +func TestReferenceCurrentBranchForObjectID(t *testing.T) { + ref := &Reference{ + Type: ReferenceTypeSHA1, + Content: strings.Repeat("a", 40), + } + _, err := ref.CurrentBranch() + assert.ErrorIs(t, err, ErrNotABranch) +} + +func TestReferenceCurrentBranchForReference(t *testing.T) { + ref := &Reference{ + Type: ReferenceTypePointer, + Content: `ref: refs/heads/my-branch`, + } + branch, err := ref.CurrentBranch() + assert.NoError(t, err) + assert.Equal(t, "my-branch", branch) +} + +func TestReferenceReferencePathForReference(t *testing.T) { + ref := &Reference{ + Type: ReferenceTypePointer, + Content: `ref: refs/heads/my-branch`, + } + path, err := ref.ResolvePath() + assert.NoError(t, err) + assert.Equal(t, filepath.FromSlash("refs/heads/my-branch"), path) +} + +func TestReferenceLoadingForObjectID(t *testing.T) { + tmp := t.TempDir() + f, err := os.Create(filepath.Join(tmp, "HEAD")) + require.NoError(t, err) + defer f.Close() + f.WriteString(strings.Repeat("e", 40) + "\r\n") + + ref, err := LoadReferenceFile(filepath.Join(tmp, "HEAD")) + assert.NoError(t, err) + assert.Equal(t, ReferenceTypeSHA1, ref.Type) + assert.Equal(t, strings.Repeat("e", 40), ref.Content) +} + +func TestReferenceLoadingForReference(t *testing.T) { + tmp := t.TempDir() + f, err := os.OpenFile(filepath.Join(tmp, "HEAD"), os.O_CREATE|os.O_WRONLY, os.ModePerm) + require.NoError(t, err) + defer f.Close() + f.WriteString("ref: refs/heads/foo\n") + + ref, err := LoadReferenceFile(filepath.Join(tmp, "HEAD")) + assert.NoError(t, err) + assert.Equal(t, ReferenceTypePointer, ref.Type) + assert.Equal(t, "ref: refs/heads/foo", ref.Content) +} + +func TestReferenceLoadingFailsForInvalidContent(t *testing.T) { + tmp := t.TempDir() + f, err := os.OpenFile(filepath.Join(tmp, "HEAD"), os.O_CREATE|os.O_WRONLY, os.ModePerm) + require.NoError(t, err) + defer f.Close() + f.WriteString("abc") + + _, err = LoadReferenceFile(filepath.Join(tmp, "HEAD")) + assert.ErrorContains(t, err, "unknown format for git HEAD") +} + +func TestReferenceIsSha1(t *testing.T) { + a := strings.Repeat("0", 40) + b := strings.Repeat("f", 40) + c := strings.Repeat("0", 39) + d := strings.Repeat("F", 40) + e := strings.Repeat("0", 41) + + assert.True(t, isSHA1(a)) + assert.True(t, isSHA1(b)) + assert.False(t, isSHA1(c)) + assert.False(t, isSHA1(d)) + assert.False(t, isSHA1(e)) +} diff --git a/libs/git/repository.go b/libs/git/repository.go index 99f04977..5e16c0c3 100644 --- a/libs/git/repository.go +++ b/libs/git/repository.go @@ -30,6 +30,12 @@ type Repository struct { // Note: prefixes use the forward slash instead of the // OS-specific path separator. This matches Git convention. ignore map[string][]ignoreRules + + // config contains a merged view of the user specific and the repository + // specific git configuration loaded from .git/config files. + // + // Also see: https://git-scm.com/docs/git-config. + config *config } // Root returns the repository root. @@ -37,17 +43,75 @@ func (r *Repository) Root() string { return r.rootPath } +func (r *Repository) CurrentBranch() (string, error) { + // load .git/HEAD + ref, err := LoadReferenceFile(filepath.Join(r.rootPath, ".git", "HEAD")) + if err != nil { + return "", err + } + if ref == nil { + return "", nil + } + + // case: when a git object like commit,tag or remote branch is checked out + if ref.Type == ReferenceTypeSHA1 { + return "", nil + } + return ref.CurrentBranch() +} + +func (r *Repository) LatestCommit() (string, error) { + // load .git/HEAD + ref, err := LoadReferenceFile(filepath.Join(r.rootPath, ".git", "HEAD")) + if err != nil { + return "", err + } + if ref == nil { + // return empty string when head file does not exist + return "", nil + } + + // case: when a git object like commit,tag or remote branch is checked out + if ref.Type == ReferenceTypeSHA1 { + return ref.Content, nil + } + + // read reference from .git/HEAD + branchHeadPath, err := ref.ResolvePath() + if err != nil { + return "", err + } + branchHeadRef, err := LoadReferenceFile(filepath.Join(r.rootPath, ".git", branchHeadPath)) + if err != nil { + return "", err + } + if branchHeadRef == nil { + // return empty string when head file does not exist + return "", nil + } + if branchHeadRef.Type != ReferenceTypeSHA1 { + return "", fmt.Errorf("git reference at %s was expected to be a SHA-1 commit id", branchHeadPath) + } + return branchHeadRef.Content, nil +} + +// return origin url if it's defined, otherwise an empty string +func (r *Repository) OriginUrl() string { + return r.config.variables["remote.origin.url"] +} + // loadConfig loads and combines user specific and repository specific configuration files. -func (r *Repository) loadConfig() (*config, error) { +func (r *Repository) loadConfig() error { config, err := globalGitConfig() if err != nil { - return nil, fmt.Errorf("unable to load user specific gitconfig: %w", err) + return fmt.Errorf("unable to load user specific gitconfig: %w", err) } err = config.loadFile(filepath.Join(r.rootPath, ".git/config")) if err != nil { - return nil, fmt.Errorf("unable to load repository specific gitconfig: %w", err) + return fmt.Errorf("unable to load repository specific gitconfig: %w", err) } - return config, nil + r.config = config + return nil } // newIgnoreFile constructs a new [ignoreRules] implementation backed by @@ -139,13 +203,13 @@ func NewRepository(path string) (*Repository, error) { ignore: make(map[string][]ignoreRules), } - config, err := repo.loadConfig() + err = repo.loadConfig() if err != nil { // Error doesn't need to be rewrapped. return nil, err } - coreExcludesPath, err := config.coreExcludesFile() + coreExcludesPath, err := repo.config.coreExcludesFile() if err != nil { return nil, fmt.Errorf("unable to access core excludes file: %w", err) } diff --git a/libs/git/repository_test.go b/libs/git/repository_test.go index 34fbf928..fb0e3808 100644 --- a/libs/git/repository_test.go +++ b/libs/git/repository_test.go @@ -1,7 +1,10 @@ package git import ( + "fmt" + "os" "path/filepath" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -14,12 +17,115 @@ type testRepository struct { r *Repository } -func (r *testRepository) Ignore(relPath string) bool { - ign, err := r.r.Ignore(relPath) - require.NoError(r.t, err) +func newTestRepository(t *testing.T) *testRepository { + tmp := t.TempDir() + err := os.Mkdir(filepath.Join(tmp, ".git"), os.ModePerm) + require.NoError(t, err) + + f1, err := os.Create(filepath.Join(tmp, ".git", "config")) + require.NoError(t, err) + defer f1.Close() + + f1.WriteString( + `[core] + repositoryformatversion = 0 + filemode = true + bare = false + logallrefupdates = true + ignorecase = true + precomposeunicode = true +`) + + f2, err := os.Create(filepath.Join(tmp, ".git", "HEAD")) + require.NoError(t, err) + defer f2.Close() + + _, err = f2.WriteString(`ref: refs/heads/main`) + require.NoError(t, err) + + repo, err := NewRepository(tmp) + require.NoError(t, err) + + return &testRepository{ + t: t, + r: repo, + } +} + +func (testRepo *testRepository) checkoutCommit(commitId string) { + f, err := os.OpenFile(filepath.Join(testRepo.r.rootPath, ".git", "HEAD"), os.O_WRONLY|os.O_TRUNC, os.ModePerm) + require.NoError(testRepo.t, err) + defer f.Close() + + _, err = f.WriteString(commitId) + require.NoError(testRepo.t, err) +} + +func (testRepo *testRepository) addBranch(name string, latestCommit string) { + // create dir for branch head reference + branchDir := filepath.Join(testRepo.r.rootPath, ".git", "refs", "heads") + err := os.MkdirAll(branchDir, os.ModePerm) + require.NoError(testRepo.t, err) + + // create branch head reference file + f, err := os.OpenFile(filepath.Join(branchDir, name), os.O_CREATE|os.O_WRONLY, os.ModePerm) + require.NoError(testRepo.t, err) + defer f.Close() + + // enter the latest commit in the branch reference file + _, err = f.WriteString(latestCommit) + require.NoError(testRepo.t, err) +} + +func (testRepo *testRepository) checkoutBranch(name string) { + f, err := os.OpenFile(filepath.Join(testRepo.r.rootPath, ".git", "HEAD"), os.O_WRONLY|os.O_TRUNC, os.ModePerm) + require.NoError(testRepo.t, err) + defer f.Close() + + _, err = f.WriteString("ref: refs/heads/" + name) + require.NoError(testRepo.t, err) +} + +// add remote origin url to test repo +func (testRepo *testRepository) addOriginUrl(url string) { + // open config in append mode + f, err := os.OpenFile(filepath.Join(testRepo.r.rootPath, ".git", "config"), os.O_WRONLY|os.O_APPEND, os.ModePerm) + require.NoError(testRepo.t, err) + defer f.Close() + + _, err = f.WriteString( + fmt.Sprintf(`[remote "origin"] + url = %s`, url)) + require.NoError(testRepo.t, err) + + // reload config to reflect the remote url + err = testRepo.r.loadConfig() + require.NoError(testRepo.t, err) +} + +func (testRepo *testRepository) Ignore(relPath string) bool { + ign, err := testRepo.r.Ignore(relPath) + require.NoError(testRepo.t, err) return ign } +func (testRepo *testRepository) assertBranch(expected string) { + branch, err := testRepo.r.CurrentBranch() + assert.NoError(testRepo.t, err) + assert.Equal(testRepo.t, expected, branch) +} + +func (testRepo *testRepository) assertCommit(expected string) { + commit, err := testRepo.r.LatestCommit() + assert.NoError(testRepo.t, err) + assert.Equal(testRepo.t, expected, commit) +} + +func (testRepo *testRepository) assertOriginUrl(expected string) { + originUrl := testRepo.r.OriginUrl() + assert.Equal(testRepo.t, expected, originUrl) +} + func TestRepository(t *testing.T) { // Load this repository as test. repo, err := NewRepository("../..") @@ -38,3 +144,65 @@ func TestRepository(t *testing.T) { // Check that ignores under testdata work. assert.True(t, tr.Ignore(filepath.Join("libs", "git", "testdata", "root.ignoreme"))) } + +func TestRepositoryGitConfigForEmptyRepo(t *testing.T) { + repo := newTestRepository(t) + repo.assertBranch("main") + repo.assertCommit("") + repo.assertOriginUrl("") +} + +func TestRepositoryGitConfig(t *testing.T) { + repo := newTestRepository(t) + repo.addBranch("foo", strings.Repeat("1", 40)) + repo.addBranch("bar", strings.Repeat("2", 40)) + repo.assertBranch("main") + repo.assertCommit("") + repo.assertOriginUrl("") + + repo.checkoutBranch("foo") + repo.assertBranch("foo") + repo.assertCommit(strings.Repeat("1", 40)) + repo.assertOriginUrl("") + + repo.addOriginUrl("https://www.foo.com/bar") + repo.assertBranch("foo") + repo.assertCommit(strings.Repeat("1", 40)) + repo.assertOriginUrl("https://www.foo.com/bar") + + repo.checkoutBranch("bar") + repo.assertBranch("bar") + repo.assertCommit(strings.Repeat("2", 40)) + repo.assertOriginUrl("https://www.foo.com/bar") + + repo.checkoutCommit(strings.Repeat("3", 40)) + repo.assertBranch("") + repo.assertCommit(strings.Repeat("3", 40)) + repo.assertOriginUrl("https://www.foo.com/bar") +} + +func TestRepositoryGitConfigForSshUrl(t *testing.T) { + repo := newTestRepository(t) + repo.addOriginUrl(`git@foo.com:databricks/bar.git`) + + repo.assertBranch("main") + repo.assertCommit("") + repo.assertOriginUrl("git@foo.com:databricks/bar.git") +} + +func TestRepositoryGitConfigWhenNotARepo(t *testing.T) { + tmp := t.TempDir() + repo, err := NewRepository(tmp) + require.NoError(t, err) + + branch, err := repo.CurrentBranch() + assert.NoError(t, err) + assert.Equal(t, "", branch) + + commit, err := repo.LatestCommit() + assert.NoError(t, err) + assert.Equal(t, "", commit) + + originUrl := repo.OriginUrl() + assert.Equal(t, "", originUrl) +} diff --git a/libs/git/utils.go b/libs/git/utils.go new file mode 100644 index 00000000..13ce2c9e --- /dev/null +++ b/libs/git/utils.go @@ -0,0 +1,28 @@ +package git + +import ( + "strings" + + giturls "github.com/whilp/git-urls" +) + +func ToHttpsUrl(url string) (string, error) { + originUrl, err := giturls.Parse(url) + if err != nil { + return "", err + } + if originUrl.Scheme == "https" { + return originUrl.String(), nil + } + // if current repo is checked out with a SSH key + if originUrl.Scheme != "https" { + originUrl.Scheme = "https" + } + // `git@` is not required for HTTPS + if originUrl.User != nil { + originUrl.User = nil + } + // Remove `.git` suffix, if present. + originUrl.Path = strings.TrimSuffix(originUrl.Path, ".git") + return originUrl.String(), nil +} diff --git a/libs/git/utils_test.go b/libs/git/utils_test.go new file mode 100644 index 00000000..52a912da --- /dev/null +++ b/libs/git/utils_test.go @@ -0,0 +1,13 @@ +package git + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestToHttpsUrlForSsh(t *testing.T) { + url, err := ToHttpsUrl("user@foo.com:org/repo-name.git") + assert.NoError(t, err) + assert.Equal(t, "https://foo.com/org/repo-name", url) +}