databricks-cli/internal/helpers.go

616 lines
16 KiB
Go

package internal
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"math/rand"
"net/http"
"os"
"path"
"path/filepath"
"reflect"
"strings"
"sync"
"testing"
"time"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/flags"
"github.com/databricks/cli/cmd"
_ "github.com/databricks/cli/cmd/version"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/filer"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/files"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"github.com/stretchr/testify/require"
_ "github.com/databricks/cli/cmd/workspace"
)
const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
// GetEnvOrSkipTest proceeds with test only with that env variable
func GetEnvOrSkipTest(t *testing.T, name string) string {
value := os.Getenv(name)
if value == "" {
t.Skipf("Environment variable %s is missing", name)
}
return value
}
// RandomName gives random name with optional prefix. e.g. qa.RandomName("tf-")
func RandomName(prefix ...string) string {
randLen := 12
b := make([]byte, randLen)
for i := range b {
b[i] = charset[rand.Intn(randLen)]
}
if len(prefix) > 0 {
return fmt.Sprintf("%s%s", strings.Join(prefix, ""), b)
}
return string(b)
}
// Helper for running the root command in the background.
// It ensures that the background goroutine terminates upon
// test completion through cancelling the command context.
type cobraTestRunner struct {
*testing.T
args []string
stdout bytes.Buffer
stderr bytes.Buffer
stdinR *io.PipeReader
stdinW *io.PipeWriter
ctx context.Context
// Line-by-line output.
// Background goroutines populate these channels by reading from stdout/stderr pipes.
stdoutLines <-chan string
stderrLines <-chan string
errch <-chan error
}
func consumeLines(ctx context.Context, wg *sync.WaitGroup, r io.Reader) <-chan string {
ch := make(chan string, 1000)
wg.Add(1)
go func() {
defer close(ch)
defer wg.Done()
scanner := bufio.NewScanner(r)
for scanner.Scan() {
select {
case <-ctx.Done():
return
case ch <- scanner.Text():
}
}
}()
return ch
}
func (t *cobraTestRunner) registerFlagCleanup(c *cobra.Command) {
// Find target command that will be run. Example: if the command run is `databricks fs cp`,
// target command corresponds to `cp`
targetCmd, _, err := c.Find(t.args)
if err != nil && strings.HasPrefix(err.Error(), "unknown command") {
// even if command is unknown, we can proceed
require.NotNil(t, targetCmd)
} else {
require.NoError(t, err)
}
// Force initialization of default flags.
// These are initialized by cobra at execution time and would otherwise
// not be cleaned up by the cleanup function below.
targetCmd.InitDefaultHelpFlag()
targetCmd.InitDefaultVersionFlag()
// Restore flag values to their original value on test completion.
targetCmd.Flags().VisitAll(func(f *pflag.Flag) {
v := reflect.ValueOf(f.Value)
if v.Kind() == reflect.Ptr {
v = v.Elem()
}
// Store copy of the current flag value.
reset := reflect.New(v.Type()).Elem()
reset.Set(v)
t.Cleanup(func() {
v.Set(reset)
})
})
}
// Like [cobraTestRunner.Eventually], but more specific
func (t *cobraTestRunner) WaitForTextPrinted(text string, timeout time.Duration) {
t.Eventually(func() bool {
currentStdout := t.stdout.String()
return strings.Contains(currentStdout, text)
}, timeout, 50*time.Millisecond)
}
func (t *cobraTestRunner) WaitForOutput(text string, timeout time.Duration) {
require.Eventually(t.T, func() bool {
currentStdout := t.stdout.String()
currentErrout := t.stderr.String()
return strings.Contains(currentStdout, text) || strings.Contains(currentErrout, text)
}, timeout, 50*time.Millisecond)
}
func (t *cobraTestRunner) WithStdin() {
reader, writer := io.Pipe()
t.stdinR = reader
t.stdinW = writer
}
func (t *cobraTestRunner) CloseStdin() {
if t.stdinW == nil {
panic("no standard input configured")
}
t.stdinW.Close()
}
func (t *cobraTestRunner) SendText(text string) {
if t.stdinW == nil {
panic("no standard input configured")
}
t.stdinW.Write([]byte(text + "\n"))
}
func (t *cobraTestRunner) RunBackground() {
var stdoutR, stderrR io.Reader
var stdoutW, stderrW io.WriteCloser
stdoutR, stdoutW = io.Pipe()
stderrR, stderrW = io.Pipe()
ctx := cmdio.NewContext(t.ctx, &cmdio.Logger{
Mode: flags.ModeAppend,
Reader: bufio.Reader{},
Writer: stderrW,
})
cli := cmd.New(ctx)
cli.SetOut(stdoutW)
cli.SetErr(stderrW)
cli.SetArgs(t.args)
if t.stdinW != nil {
cli.SetIn(t.stdinR)
}
// Register cleanup function to restore flags to their original values
// once test has been executed. This is needed because flag values reside
// in a global singleton data-structure, and thus subsequent tests might
// otherwise interfere with each other
t.registerFlagCleanup(cli)
errch := make(chan error)
ctx, cancel := context.WithCancel(ctx)
// Tee stdout/stderr to buffers.
stdoutR = io.TeeReader(stdoutR, &t.stdout)
stderrR = io.TeeReader(stderrR, &t.stderr)
// Consume stdout/stderr line-by-line.
var wg sync.WaitGroup
t.stdoutLines = consumeLines(ctx, &wg, stdoutR)
t.stderrLines = consumeLines(ctx, &wg, stderrR)
// Run command in background.
go func() {
err := root.Execute(ctx, cli)
if err != nil {
t.Logf("Error running command: %s", err)
}
// Close pipes to signal EOF.
stdoutW.Close()
stderrW.Close()
// Wait for the [consumeLines] routines to finish now that
// the pipes they're reading from have closed.
wg.Wait()
if t.stdout.Len() > 0 {
// Make a copy of the buffer such that it remains "unread".
scanner := bufio.NewScanner(bytes.NewBuffer(t.stdout.Bytes()))
for scanner.Scan() {
t.Logf("[databricks stdout]: %s", scanner.Text())
}
}
if t.stderr.Len() > 0 {
// Make a copy of the buffer such that it remains "unread".
scanner := bufio.NewScanner(bytes.NewBuffer(t.stderr.Bytes()))
for scanner.Scan() {
t.Logf("[databricks stderr]: %s", scanner.Text())
}
}
// Reset context on command for the next test.
// These commands are globals so we have to clean up to the best of our ability after each run.
// See https://github.com/spf13/cobra/blob/a6f198b635c4b18fff81930c40d464904e55b161/command.go#L1062-L1066
//lint:ignore SA1012 cobra sets the context and doesn't clear it
cli.SetContext(nil)
// Make caller aware of error.
errch <- err
close(errch)
}()
// Ensure command terminates upon test completion (success or failure).
t.Cleanup(func() {
// Signal termination of command.
cancel()
// Wait for goroutine to finish.
<-errch
})
t.errch = errch
}
func (t *cobraTestRunner) Run() (bytes.Buffer, bytes.Buffer, error) {
t.RunBackground()
err := <-t.errch
return t.stdout, t.stderr, err
}
// Like [require.Eventually] but errors if the underlying command has failed.
func (c *cobraTestRunner) Eventually(condition func() bool, waitFor time.Duration, tick time.Duration, msgAndArgs ...interface{}) {
ch := make(chan bool, 1)
timer := time.NewTimer(waitFor)
defer timer.Stop()
ticker := time.NewTicker(tick)
defer ticker.Stop()
// Kick off condition check immediately.
go func() { ch <- condition() }()
for tick := ticker.C; ; {
select {
case err := <-c.errch:
require.Fail(c, "Command failed", err)
return
case <-timer.C:
require.Fail(c, "Condition never satisfied", msgAndArgs...)
return
case <-tick:
tick = nil
go func() { ch <- condition() }()
case v := <-ch:
if v {
return
}
tick = ticker.C
}
}
}
func (t *cobraTestRunner) RunAndExpectOutput(heredoc string) {
stdout, _, err := t.Run()
require.NoError(t, err)
require.Equal(t, cmdio.Heredoc(heredoc), strings.TrimSpace(stdout.String()))
}
func (t *cobraTestRunner) RunAndParseJSON(v any) {
stdout, _, err := t.Run()
require.NoError(t, err)
err = json.Unmarshal(stdout.Bytes(), &v)
require.NoError(t, err)
}
func NewCobraTestRunner(t *testing.T, args ...string) *cobraTestRunner {
return &cobraTestRunner{
T: t,
ctx: context.Background(),
args: args,
}
}
func NewCobraTestRunnerWithContext(t *testing.T, ctx context.Context, args ...string) *cobraTestRunner {
return &cobraTestRunner{
T: t,
ctx: ctx,
args: args,
}
}
func RequireSuccessfulRun(t *testing.T, args ...string) (bytes.Buffer, bytes.Buffer) {
t.Logf("run args: [%s]", strings.Join(args, ", "))
c := NewCobraTestRunner(t, args...)
stdout, stderr, err := c.Run()
require.NoError(t, err)
return stdout, stderr
}
func RequireErrorRun(t *testing.T, args ...string) (bytes.Buffer, bytes.Buffer, error) {
c := NewCobraTestRunner(t, args...)
stdout, stderr, err := c.Run()
require.Error(t, err)
return stdout, stderr, err
}
func writeFile(t *testing.T, name string, body string) string {
f, err := os.Create(filepath.Join(t.TempDir(), name))
require.NoError(t, err)
_, err = f.WriteString(body)
require.NoError(t, err)
f.Close()
return f.Name()
}
func GenerateNotebookTasks(notebookPath string, versions []string, nodeTypeId string) []jobs.SubmitTask {
tasks := make([]jobs.SubmitTask, 0)
for i := 0; i < len(versions); i++ {
task := jobs.SubmitTask{
TaskKey: fmt.Sprintf("notebook_%s", strings.ReplaceAll(versions[i], ".", "_")),
NotebookTask: &jobs.NotebookTask{
NotebookPath: notebookPath,
},
NewCluster: &compute.ClusterSpec{
SparkVersion: versions[i],
NumWorkers: 1,
NodeTypeId: nodeTypeId,
DataSecurityMode: compute.DataSecurityModeUserIsolation,
},
}
tasks = append(tasks, task)
}
return tasks
}
func GenerateSparkPythonTasks(notebookPath string, versions []string, nodeTypeId string) []jobs.SubmitTask {
tasks := make([]jobs.SubmitTask, 0)
for i := 0; i < len(versions); i++ {
task := jobs.SubmitTask{
TaskKey: fmt.Sprintf("spark_%s", strings.ReplaceAll(versions[i], ".", "_")),
SparkPythonTask: &jobs.SparkPythonTask{
PythonFile: notebookPath,
},
NewCluster: &compute.ClusterSpec{
SparkVersion: versions[i],
NumWorkers: 1,
NodeTypeId: nodeTypeId,
DataSecurityMode: compute.DataSecurityModeUserIsolation,
},
}
tasks = append(tasks, task)
}
return tasks
}
func GenerateWheelTasks(wheelPath string, versions []string, nodeTypeId string) []jobs.SubmitTask {
tasks := make([]jobs.SubmitTask, 0)
for i := 0; i < len(versions); i++ {
task := jobs.SubmitTask{
TaskKey: fmt.Sprintf("whl_%s", strings.ReplaceAll(versions[i], ".", "_")),
PythonWheelTask: &jobs.PythonWheelTask{
PackageName: "my_test_code",
EntryPoint: "run",
},
NewCluster: &compute.ClusterSpec{
SparkVersion: versions[i],
NumWorkers: 1,
NodeTypeId: nodeTypeId,
DataSecurityMode: compute.DataSecurityModeUserIsolation,
},
Libraries: []compute.Library{
{Whl: wheelPath},
},
}
tasks = append(tasks, task)
}
return tasks
}
func TemporaryWorkspaceDir(t *testing.T, w *databricks.WorkspaceClient) string {
ctx := context.Background()
me, err := w.CurrentUser.Me(ctx)
require.NoError(t, err)
basePath := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("integration-test-wsfs-"))
t.Logf("Creating %s", basePath)
err = w.Workspace.MkdirsByPath(ctx, basePath)
require.NoError(t, err)
// Remove test directory on test completion.
t.Cleanup(func() {
t.Logf("Removing %s", basePath)
err := w.Workspace.Delete(ctx, workspace.Delete{
Path: basePath,
Recursive: true,
})
if err == nil || apierr.IsMissing(err) {
return
}
t.Logf("Unable to remove temporary workspace directory %s: %#v", basePath, err)
})
return basePath
}
func TemporaryDbfsDir(t *testing.T, w *databricks.WorkspaceClient) string {
ctx := context.Background()
path := fmt.Sprintf("/tmp/%s", RandomName("integration-test-dbfs-"))
t.Logf("Creating DBFS folder:%s", path)
err := w.Dbfs.MkdirsByPath(ctx, path)
require.NoError(t, err)
t.Cleanup(func() {
t.Logf("Removing DBFS folder:%s", path)
err := w.Dbfs.Delete(ctx, files.Delete{
Path: path,
Recursive: true,
})
if err == nil || apierr.IsMissing(err) {
return
}
t.Logf("unable to remove temporary dbfs directory %s: %#v", path, err)
})
return path
}
// Create a new UC volume in a catalog called "main" in the workspace.
func temporaryUcVolume(t *testing.T, w *databricks.WorkspaceClient) string {
ctx := context.Background()
// Create a schema
schema, err := w.Schemas.Create(ctx, catalog.CreateSchema{
CatalogName: "main",
Name: RandomName("test-schema-"),
})
require.NoError(t, err)
t.Cleanup(func() {
w.Schemas.Delete(ctx, catalog.DeleteSchemaRequest{
FullName: schema.FullName,
})
})
// Create a volume
volume, err := w.Volumes.Create(ctx, catalog.CreateVolumeRequestContent{
CatalogName: "main",
SchemaName: schema.Name,
Name: "my-volume",
VolumeType: catalog.VolumeTypeManaged,
})
require.NoError(t, err)
t.Cleanup(func() {
w.Volumes.Delete(ctx, catalog.DeleteVolumeRequest{
Name: volume.FullName,
})
})
return path.Join("/Volumes", "main", schema.Name, volume.Name)
}
func TemporaryRepo(t *testing.T, w *databricks.WorkspaceClient) string {
ctx := context.Background()
me, err := w.CurrentUser.Me(ctx)
require.NoError(t, err)
repoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("integration-test-repo-"))
t.Logf("Creating repo:%s", repoPath)
repoInfo, err := w.Repos.Create(ctx, workspace.CreateRepo{
Url: "https://github.com/databricks/cli",
Provider: "github",
Path: repoPath,
})
require.NoError(t, err)
t.Cleanup(func() {
t.Logf("Removing repo: %s", repoPath)
err := w.Repos.Delete(ctx, workspace.DeleteRepoRequest{
RepoId: repoInfo.Id,
})
if err == nil || apierr.IsMissing(err) {
return
}
t.Logf("unable to remove repo %s: %#v", repoPath, err)
})
return repoPath
}
func GetNodeTypeId(env string) string {
if env == "gcp" {
return "n1-standard-4"
} else if env == "aws" || env == "ucws" {
// aws-prod-ucws has CLOUD_ENV set to "ucws"
return "i3.xlarge"
}
return "Standard_DS4_v2"
}
func setupLocalFiler(t *testing.T) (filer.Filer, string) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
tmp := t.TempDir()
f, err := filer.NewLocalClient(tmp)
require.NoError(t, err)
return f, path.Join(filepath.ToSlash(tmp))
}
func setupWsfsFiler(t *testing.T) (filer.Filer, string) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
ctx := context.Background()
w := databricks.Must(databricks.NewWorkspaceClient())
tmpdir := TemporaryWorkspaceDir(t, w)
f, err := filer.NewWorkspaceFilesClient(w, tmpdir)
require.NoError(t, err)
// Check if we can use this API here, skip test if we cannot.
_, err = f.Read(ctx, "we_use_this_call_to_test_if_this_api_is_enabled")
var aerr *apierr.APIError
if errors.As(err, &aerr) && aerr.StatusCode == http.StatusBadRequest {
t.Skip(aerr.Message)
}
return f, tmpdir
}
func setupWsfsExtensionsFiler(t *testing.T) (filer.Filer, string) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
w := databricks.Must(databricks.NewWorkspaceClient())
tmpdir := TemporaryWorkspaceDir(t, w)
f, err := filer.NewWorkspaceFilesExtensionsClient(w, tmpdir)
require.NoError(t, err)
return f, tmpdir
}
func setupDbfsFiler(t *testing.T) (filer.Filer, string) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
w, err := databricks.NewWorkspaceClient()
require.NoError(t, err)
tmpDir := TemporaryDbfsDir(t, w)
f, err := filer.NewDbfsClient(w, tmpDir)
require.NoError(t, err)
return f, path.Join("dbfs:/", tmpDir)
}
func setupUcVolumesFiler(t *testing.T) (filer.Filer, string) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
if os.Getenv("TEST_METASTORE_ID") == "" {
t.Skip("Skipping tests that require a UC Volume when metastore id is not set.")
}
w, err := databricks.NewWorkspaceClient()
require.NoError(t, err)
tmpDir := temporaryUcVolume(t, w)
f, err := filer.NewFilesClient(w, tmpDir)
require.NoError(t, err)
return f, path.Join("dbfs:/", tmpDir)
}