databricks-cli/internal/bundle/generate_job_test.go

package bundle

import (
	"context"
	"fmt"
	"os"
	"path"
	"path/filepath"
	"strings"
	"testing"

	"github.com/databricks/cli/internal"
	"github.com/databricks/cli/internal/testutil"
	"github.com/databricks/cli/libs/filer"
	"github.com/databricks/databricks-sdk-go"
	"github.com/databricks/databricks-sdk-go/service/compute"
	"github.com/databricks/databricks-sdk-go/service/jobs"
	"github.com/google/uuid"
	"github.com/stretchr/testify/require"
)

func TestAccGenerateFromExistingJobAndDeploy(t *testing.T) {
	env := internal.GetEnvOrSkipTest(t, "CLOUD_ENV")
	t.Log(env)

	uniqueId := uuid.New().String()
	bundleRoot, err := initTestTemplate(t, "with_includes", map[string]any{
		"unique_id": uniqueId,
	})
	require.NoError(t, err)

	jobId := createTestJob(t)
	t.Cleanup(func() {
		destroyJob(t, jobId)
		require.NoError(t, err)
	})

	t.Setenv("BUNDLE_ROOT", bundleRoot)
	c := internal.NewCobraTestRunner(t, "bundle", "generate", "job",
		"--existing-job-id", fmt.Sprint(jobId),
		"--config-dir", filepath.Join(bundleRoot, "resources"),
		"--source-dir", filepath.Join(bundleRoot, "src"))
	_, _, err = c.Run()
	require.NoError(t, err)

	_, err = os.Stat(filepath.Join(bundleRoot, "src", "test.py"))
	require.NoError(t, err)

	matches, err := filepath.Glob(filepath.Join(bundleRoot, "resources", "job_generated_job_*.yml"))
	require.NoError(t, err)
	require.Len(t, matches, 1)

	// check the content of generated yaml
	data, err := os.ReadFile(matches[0])
	require.NoError(t, err)
	generatedYaml := string(data)
	require.Contains(t, generatedYaml, "notebook_task:")
	require.Contains(t, generatedYaml, fmt.Sprintf("notebook_path: %s", filepath.Join("..", "src", "test.py")))
	require.Contains(t, generatedYaml, "task_key: test")
	require.Contains(t, generatedYaml, "new_cluster:")
	require.Contains(t, generatedYaml, "spark_version: 13.3.x-scala2.12")
	require.Contains(t, generatedYaml, "num_workers: 1")

	err = deployBundle(t, bundleRoot)
	require.NoError(t, err)

	err = destroyBundle(t, bundleRoot)
	require.NoError(t, err)

}

func createTestJob(t *testing.T) int64 {
	var nodeTypeId string
	switch testutil.GetCloud(t) {
	case testutil.AWS:
		nodeTypeId = "i3.xlarge"
	case testutil.Azure:
		nodeTypeId = "Standard_DS4_v2"
	case testutil.GCP:
		nodeTypeId = "n1-standard-4"
	}

	w, err := databricks.NewWorkspaceClient()
	require.NoError(t, err)

	ctx := context.Background()
	tmpdir := internal.TemporaryWorkspaceDir(t, w)
	f, err := filer.NewWorkspaceFilesClient(w, tmpdir)
	require.NoError(t, err)

	err = f.Write(ctx, "test.py", strings.NewReader("# Databricks notebook source\nprint('Hello world!'))"))
	require.NoError(t, err)

	resp, err := w.Jobs.Create(ctx, jobs.CreateJob{
		Name: internal.RandomName("generated-job-"),
		Tasks: []jobs.Task{
			{
				TaskKey: "test",
				NewCluster: &compute.ClusterSpec{
					SparkVersion: "13.3.x-scala2.12",
					NumWorkers:   1,
					NodeTypeId:   nodeTypeId,
				},
				NotebookTask: &jobs.NotebookTask{
					NotebookPath: path.Join(tmpdir, "test"),
				},
			},
		},
	})
	require.NoError(t, err)

	return resp.JobId
}

func destroyJob(t *testing.T, jobId int64) {
	w, err := databricks.NewWorkspaceClient()
	require.NoError(t, err)

	ctx := context.Background()
	err = w.Jobs.Delete(ctx, jobs.DeleteJob{
		JobId: jobId,
	})
	require.NoError(t, err)
}
Added `databricks bundle generate job` command (#1043) ## Changes Now it's possible to generate bundle configuration for existing job. For now it only supports jobs with notebook tasks. It will download notebooks referenced in the job tasks and generate bundle YAML config for this job which can be included in larger bundle. ## Tests Running command manually Example of generated config ``` resources: jobs: job_128737545467921: name: Notebook job format: MULTI_TASK tasks: - task_key: as_notebook existing_cluster_id: 0704-xxxxxx-yyyyyyy notebook_task: base_parameters: bundle_root: /Users/andrew.nester@databricks.com/.bundle/job_with_module_imports/development/files notebook_path: ./entry_notebook.py source: WORKSPACE run_if: ALL_SUCCESS max_concurrent_runs: 1 ``` ## Tests Manual (on our last 100 jobs) + added end-to-end test ``` --- PASS: TestAccGenerateFromExistingJobAndDeploy (50.91s) PASS coverage: 61.5% of statements in ./... ok github.com/databricks/cli/internal/bundle 51.209s coverage: 61.5% of statements in ./... ``` 2024-01-17 14:26:33 +00:00			`package bundle`

			`import (`
			`"context"`
			`"fmt"`
			`"os"`
			`"path"`
			`"path/filepath"`
			`"strings"`
			`"testing"`

			`"github.com/databricks/cli/internal"`
			`"github.com/databricks/cli/internal/testutil"`
			`"github.com/databricks/cli/libs/filer"`
			`"github.com/databricks/databricks-sdk-go"`
			`"github.com/databricks/databricks-sdk-go/service/compute"`
			`"github.com/databricks/databricks-sdk-go/service/jobs"`
			`"github.com/google/uuid"`
			`"github.com/stretchr/testify/require"`
			`)`

			`func TestAccGenerateFromExistingJobAndDeploy(t *testing.T) {`
			`env := internal.GetEnvOrSkipTest(t, "CLOUD_ENV")`
			`t.Log(env)`

			`uniqueId := uuid.New().String()`
			`bundleRoot, err := initTestTemplate(t, "with_includes", map[string]any{`
			`"unique_id": uniqueId,`
			`})`
			`require.NoError(t, err)`

			`jobId := createTestJob(t)`
			`t.Cleanup(func() {`
			`destroyJob(t, jobId)`
			`require.NoError(t, err)`
			`})`

			`t.Setenv("BUNDLE_ROOT", bundleRoot)`
			`c := internal.NewCobraTestRunner(t, "bundle", "generate", "job",`
			`"--existing-job-id", fmt.Sprint(jobId),`
			`"--config-dir", filepath.Join(bundleRoot, "resources"),`
			`"--source-dir", filepath.Join(bundleRoot, "src"))`
			`_, _, err = c.Run()`
			`require.NoError(t, err)`

			`_, err = os.Stat(filepath.Join(bundleRoot, "src", "test.py"))`
			`require.NoError(t, err)`

			`matches, err := filepath.Glob(filepath.Join(bundleRoot, "resources", "job_generated_job_*.yml"))`
			`require.NoError(t, err)`
			`require.Len(t, matches, 1)`

			`// check the content of generated yaml`
			`data, err := os.ReadFile(matches[0])`
			`require.NoError(t, err)`
			`generatedYaml := string(data)`
			`require.Contains(t, generatedYaml, "notebook_task:")`
Fixed path matching for Windows in generate job test (#1132) ## Changes Fixed path matching for Windows in generate job test 2024-01-19 08:05:59 +00:00			`require.Contains(t, generatedYaml, fmt.Sprintf("notebook_path: %s", filepath.Join("..", "src", "test.py")))`
Added `databricks bundle generate job` command (#1043) ## Changes Now it's possible to generate bundle configuration for existing job. For now it only supports jobs with notebook tasks. It will download notebooks referenced in the job tasks and generate bundle YAML config for this job which can be included in larger bundle. ## Tests Running command manually Example of generated config ``` resources: jobs: job_128737545467921: name: Notebook job format: MULTI_TASK tasks: - task_key: as_notebook existing_cluster_id: 0704-xxxxxx-yyyyyyy notebook_task: base_parameters: bundle_root: /Users/andrew.nester@databricks.com/.bundle/job_with_module_imports/development/files notebook_path: ./entry_notebook.py source: WORKSPACE run_if: ALL_SUCCESS max_concurrent_runs: 1 ``` ## Tests Manual (on our last 100 jobs) + added end-to-end test ``` --- PASS: TestAccGenerateFromExistingJobAndDeploy (50.91s) PASS coverage: 61.5% of statements in ./... ok github.com/databricks/cli/internal/bundle 51.209s coverage: 61.5% of statements in ./... ``` 2024-01-17 14:26:33 +00:00			`require.Contains(t, generatedYaml, "task_key: test")`
			`require.Contains(t, generatedYaml, "new_cluster:")`
			`require.Contains(t, generatedYaml, "spark_version: 13.3.x-scala2.12")`
			`require.Contains(t, generatedYaml, "num_workers: 1")`

			`err = deployBundle(t, bundleRoot)`
			`require.NoError(t, err)`

			`err = destroyBundle(t, bundleRoot)`
			`require.NoError(t, err)`

			`}`

			`func createTestJob(t *testing.T) int64 {`
			`var nodeTypeId string`
			`switch testutil.GetCloud(t) {`
			`case testutil.AWS:`
			`nodeTypeId = "i3.xlarge"`
			`case testutil.Azure:`
			`nodeTypeId = "Standard_DS4_v2"`
			`case testutil.GCP:`
			`nodeTypeId = "n1-standard-4"`
			`}`

			`w, err := databricks.NewWorkspaceClient()`
			`require.NoError(t, err)`

			`ctx := context.Background()`
			`tmpdir := internal.TemporaryWorkspaceDir(t, w)`
			`f, err := filer.NewWorkspaceFilesClient(w, tmpdir)`
			`require.NoError(t, err)`

			`err = f.Write(ctx, "test.py", strings.NewReader("# Databricks notebook source\nprint('Hello world!'))"))`
			`require.NoError(t, err)`

			`resp, err := w.Jobs.Create(ctx, jobs.CreateJob{`
			`Name: internal.RandomName("generated-job-"),`
			`Tasks: []jobs.Task{`
			`{`
			`TaskKey: "test",`
			`NewCluster: &compute.ClusterSpec{`
			`SparkVersion: "13.3.x-scala2.12",`
			`NumWorkers: 1,`
			`NodeTypeId: nodeTypeId,`
			`},`
			`NotebookTask: &jobs.NotebookTask{`
			`NotebookPath: path.Join(tmpdir, "test"),`
			`},`
			`},`
			`},`
			`})`
			`require.NoError(t, err)`

			`return resp.JobId`
			`}`

			`func destroyJob(t *testing.T, jobId int64) {`
			`w, err := databricks.NewWorkspaceClient()`
			`require.NoError(t, err)`

			`ctx := context.Background()`
			`err = w.Jobs.Delete(ctx, jobs.DeleteJob{`
			`JobId: jobId,`
			`})`
			`require.NoError(t, err)`
			`}`