Add support for cloning repositories (#544)

## Changes
Adds support for cloning public and private github repositories for
databricks templates

## Tests
Integration tests
This commit is contained in:
shreyas-goenka 2023-07-25 15:36:20 +02:00 committed by GitHub
parent 9a88fa602d
commit 8fdc0fec81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 169 additions and 0 deletions

View File

@ -0,0 +1,63 @@
package internal
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/databricks/cli/libs/git"
"github.com/stretchr/testify/assert"
)
func TestAccGitClone(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
tmpDir := t.TempDir()
ctx := context.Background()
var err error
err = git.Clone(ctx, "https://github.com/databricks/databricks-empty-ide-project.git", "", tmpDir)
assert.NoError(t, err)
// assert repo content
assert.NoError(t, err)
b, err := os.ReadFile(filepath.Join(tmpDir, "README-IDE.md"))
assert.NoError(t, err)
assert.Contains(t, string(b), "This folder contains a project that was synchronized from an IDE.")
// assert current branch is ide, ie default for the repo
b, err = os.ReadFile(filepath.Join(tmpDir, ".git/HEAD"))
assert.NoError(t, err)
assert.Contains(t, string(b), "ide")
}
func TestAccGitCloneWithOnlyRepoNameOnAlternateBranch(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
tmpDir := t.TempDir()
ctx := context.Background()
var err error
err = git.Clone(ctx, "notebook-best-practices", "dais-2022", tmpDir)
// assert on repo content
assert.NoError(t, err)
b, err := os.ReadFile(filepath.Join(tmpDir, "README.md"))
assert.NoError(t, err)
assert.Contains(t, string(b), "Software engineering best practices for Databricks notebooks")
// assert current branch is main, ie default for the repo
b, err = os.ReadFile(filepath.Join(tmpDir, ".git/HEAD"))
assert.NoError(t, err)
assert.Contains(t, string(b), "dais-2022")
}
func TestAccGitCloneRepositoryDoesNotExist(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
tmpDir := t.TempDir()
err := git.Clone(context.Background(), "doesnot-exist", "", tmpDir)
assert.Contains(t, err.Error(), `repository 'https://github.com/databricks/doesnot-exist/' not found`)
}

72
libs/git/clone.go Normal file
View File

@ -0,0 +1,72 @@
package git
import (
"bytes"
"context"
"errors"
"fmt"
"os/exec"
"regexp"
"strings"
)
// source: https://stackoverflow.com/questions/59081778/rules-for-special-characters-in-github-repository-name
var githubRepoRegex = regexp.MustCompile(`^[\w-\.]+$`)
const githubUrl = "https://github.com"
const databricksOrg = "databricks"
type cloneOptions struct {
// Branch or tag to clone
Reference string
// URL for the repository
RepositoryUrl string
// Local path to clone repository at
TargetPath string
}
func (opts cloneOptions) args() []string {
args := []string{"clone", opts.RepositoryUrl, opts.TargetPath, "--depth=1", "--no-tags"}
if opts.Reference != "" {
args = append(args, "--branch", opts.Reference)
}
return args
}
func Clone(ctx context.Context, url, reference, targetPath string) error {
// We assume only the repository name has been if input does not contain any
// `/` characters and the url is only made up of alphanumeric characters and
// ".", "_" and "-". This repository is resolved again databricks github account.
fullUrl := url
if githubRepoRegex.MatchString(url) {
fullUrl = strings.Join([]string{githubUrl, databricksOrg, url}, "/")
}
opts := cloneOptions{
Reference: reference,
RepositoryUrl: fullUrl,
TargetPath: targetPath,
}
cmd := exec.CommandContext(ctx, "git", opts.args()...)
var cmdErr bytes.Buffer
cmd.Stderr = &cmdErr
// start git clone
err := cmd.Start()
if errors.Is(err, exec.ErrNotFound) {
return fmt.Errorf("please install git CLI to clone a repository: %w", err)
}
if err != nil {
return err
}
// wait for git clone to complete
err = cmd.Wait()
if err != nil {
return fmt.Errorf("git clone failed: %w. %s", err, cmdErr.String())
}
return nil
}

34
libs/git/clone_test.go Normal file
View File

@ -0,0 +1,34 @@
package git
import (
"context"
"os/exec"
"testing"
"github.com/stretchr/testify/assert"
)
func TestGitCloneArgs(t *testing.T) {
// case: No branch / tag specified. In this case git clones the default branch
assert.Equal(t, []string{"clone", "abc", "/def", "--depth=1", "--no-tags"}, cloneOptions{
Reference: "",
RepositoryUrl: "abc",
TargetPath: "/def",
}.args())
// case: A branch is specified.
assert.Equal(t, []string{"clone", "abc", "/def", "--depth=1", "--no-tags", "--branch", "my-branch"}, cloneOptions{
Reference: "my-branch",
RepositoryUrl: "abc",
TargetPath: "/def",
}.args())
}
func TestGitCloneWithGitNotFound(t *testing.T) {
// We set $PATH here so the git CLI cannot be found by the clone function
t.Setenv("PATH", "")
tmpDir := t.TempDir()
err := Clone(context.Background(), "abc", "", tmpDir)
assert.ErrorIs(t, err, exec.ErrNotFound)
}