From 8fdc0fec81854d1b7b9013d0b09326d1d8d8303f Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 25 Jul 2023 15:36:20 +0200 Subject: [PATCH] Add support for cloning repositories (#544) ## Changes Adds support for cloning public and private github repositories for databricks templates ## Tests Integration tests --- internal/git_clone_test.go | 63 +++++++++++++++++++++++++++++++++ libs/git/clone.go | 72 ++++++++++++++++++++++++++++++++++++++ libs/git/clone_test.go | 34 ++++++++++++++++++ 3 files changed, 169 insertions(+) create mode 100644 internal/git_clone_test.go create mode 100644 libs/git/clone.go create mode 100644 libs/git/clone_test.go diff --git a/internal/git_clone_test.go b/internal/git_clone_test.go new file mode 100644 index 00000000..b280ebc7 --- /dev/null +++ b/internal/git_clone_test.go @@ -0,0 +1,63 @@ +package internal + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/libs/git" + "github.com/stretchr/testify/assert" +) + +func TestAccGitClone(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + tmpDir := t.TempDir() + ctx := context.Background() + var err error + + err = git.Clone(ctx, "https://github.com/databricks/databricks-empty-ide-project.git", "", tmpDir) + assert.NoError(t, err) + + // assert repo content + assert.NoError(t, err) + b, err := os.ReadFile(filepath.Join(tmpDir, "README-IDE.md")) + assert.NoError(t, err) + assert.Contains(t, string(b), "This folder contains a project that was synchronized from an IDE.") + + // assert current branch is ide, ie default for the repo + b, err = os.ReadFile(filepath.Join(tmpDir, ".git/HEAD")) + assert.NoError(t, err) + assert.Contains(t, string(b), "ide") +} + +func TestAccGitCloneWithOnlyRepoNameOnAlternateBranch(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + tmpDir := t.TempDir() + ctx := context.Background() + var err error + + err = git.Clone(ctx, "notebook-best-practices", "dais-2022", tmpDir) + + // assert on repo content + assert.NoError(t, err) + b, err := os.ReadFile(filepath.Join(tmpDir, "README.md")) + assert.NoError(t, err) + assert.Contains(t, string(b), "Software engineering best practices for Databricks notebooks") + + // assert current branch is main, ie default for the repo + b, err = os.ReadFile(filepath.Join(tmpDir, ".git/HEAD")) + assert.NoError(t, err) + assert.Contains(t, string(b), "dais-2022") +} + +func TestAccGitCloneRepositoryDoesNotExist(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + tmpDir := t.TempDir() + + err := git.Clone(context.Background(), "doesnot-exist", "", tmpDir) + assert.Contains(t, err.Error(), `repository 'https://github.com/databricks/doesnot-exist/' not found`) +} diff --git a/libs/git/clone.go b/libs/git/clone.go new file mode 100644 index 00000000..ec663272 --- /dev/null +++ b/libs/git/clone.go @@ -0,0 +1,72 @@ +package git + +import ( + "bytes" + "context" + "errors" + "fmt" + "os/exec" + "regexp" + "strings" +) + +// source: https://stackoverflow.com/questions/59081778/rules-for-special-characters-in-github-repository-name +var githubRepoRegex = regexp.MustCompile(`^[\w-\.]+$`) + +const githubUrl = "https://github.com" +const databricksOrg = "databricks" + +type cloneOptions struct { + // Branch or tag to clone + Reference string + + // URL for the repository + RepositoryUrl string + + // Local path to clone repository at + TargetPath string +} + +func (opts cloneOptions) args() []string { + args := []string{"clone", opts.RepositoryUrl, opts.TargetPath, "--depth=1", "--no-tags"} + if opts.Reference != "" { + args = append(args, "--branch", opts.Reference) + } + return args +} + +func Clone(ctx context.Context, url, reference, targetPath string) error { + // We assume only the repository name has been if input does not contain any + // `/` characters and the url is only made up of alphanumeric characters and + // ".", "_" and "-". This repository is resolved again databricks github account. + fullUrl := url + if githubRepoRegex.MatchString(url) { + fullUrl = strings.Join([]string{githubUrl, databricksOrg, url}, "/") + } + + opts := cloneOptions{ + Reference: reference, + RepositoryUrl: fullUrl, + TargetPath: targetPath, + } + + cmd := exec.CommandContext(ctx, "git", opts.args()...) + var cmdErr bytes.Buffer + cmd.Stderr = &cmdErr + + // start git clone + err := cmd.Start() + if errors.Is(err, exec.ErrNotFound) { + return fmt.Errorf("please install git CLI to clone a repository: %w", err) + } + if err != nil { + return err + } + + // wait for git clone to complete + err = cmd.Wait() + if err != nil { + return fmt.Errorf("git clone failed: %w. %s", err, cmdErr.String()) + } + return nil +} diff --git a/libs/git/clone_test.go b/libs/git/clone_test.go new file mode 100644 index 00000000..8101178f --- /dev/null +++ b/libs/git/clone_test.go @@ -0,0 +1,34 @@ +package git + +import ( + "context" + "os/exec" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGitCloneArgs(t *testing.T) { + // case: No branch / tag specified. In this case git clones the default branch + assert.Equal(t, []string{"clone", "abc", "/def", "--depth=1", "--no-tags"}, cloneOptions{ + Reference: "", + RepositoryUrl: "abc", + TargetPath: "/def", + }.args()) + + // case: A branch is specified. + assert.Equal(t, []string{"clone", "abc", "/def", "--depth=1", "--no-tags", "--branch", "my-branch"}, cloneOptions{ + Reference: "my-branch", + RepositoryUrl: "abc", + TargetPath: "/def", + }.args()) +} + +func TestGitCloneWithGitNotFound(t *testing.T) { + // We set $PATH here so the git CLI cannot be found by the clone function + t.Setenv("PATH", "") + tmpDir := t.TempDir() + + err := Clone(context.Background(), "abc", "", tmpDir) + assert.ErrorIs(t, err, exec.ErrNotFound) +}