databricks-cli/integration/bundle/helpers_test.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

196 lines
6.0 KiB
Go
Raw Normal View History

package bundle_test
import (
"bytes"
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/internal/testcli"
"github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/flags"
"github.com/databricks/cli/libs/folders"
"github.com/databricks/cli/libs/template"
"github.com/databricks/databricks-sdk-go"
"github.com/stretchr/testify/require"
)
const defaultSparkVersion = "13.3.x-snapshot-scala2.12"
func initTestTemplate(t testutil.TestingT, ctx context.Context, templateName string, config map[string]any) string {
bundleRoot := t.TempDir()
return initTestTemplateWithBundleRoot(t, ctx, templateName, config, bundleRoot)
}
func initTestTemplateWithBundleRoot(t testutil.TestingT, ctx context.Context, templateName string, config map[string]any, bundleRoot string) string {
templateRoot := filepath.Join("bundles", templateName)
configFilePath := writeConfigFile(t, config)
ctx = root.SetWorkspaceClient(ctx, nil)
cmd := cmdio.NewIO(ctx, flags.OutputJSON, strings.NewReader(""), os.Stdout, os.Stderr, "", "bundles")
ctx = cmdio.InContext(ctx, cmd)
Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
r := template.Resolver{
TemplatePathOrUrl: templateRoot,
ConfigFile: configFilePath,
OutputDir: bundleRoot,
}
tmpl, err := r.Resolve(ctx)
require.NoError(t, err)
Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
defer tmpl.Reader.Cleanup(ctx)
err = tmpl.Writer.Materialize(ctx, tmpl.Reader)
require.NoError(t, err)
Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
return bundleRoot
}
func writeConfigFile(t testutil.TestingT, config map[string]any) string {
bytes, err := json.Marshal(config)
require.NoError(t, err)
dir := t.TempDir()
filepath := filepath.Join(dir, "config.json")
t.Log("Configuration for template: ", string(bytes))
err = os.WriteFile(filepath, bytes, 0o644)
require.NoError(t, err)
return filepath
}
func validateBundle(t testutil.TestingT, ctx context.Context, path string) ([]byte, error) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
c := testcli.NewRunner(t, ctx, "bundle", "validate", "--output", "json")
stdout, _, err := c.Run()
return stdout.Bytes(), err
}
func mustValidateBundle(t testutil.TestingT, ctx context.Context, path string) []byte {
data, err := validateBundle(t, ctx, path)
require.NoError(t, err)
return data
}
func unmarshalConfig(t testutil.TestingT, data []byte) *bundle.Bundle {
bundle := &bundle.Bundle{}
err := json.Unmarshal(data, &bundle.Config)
require.NoError(t, err)
return bundle
}
func deployBundle(t testutil.TestingT, ctx context.Context, path string) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
c := testcli.NewRunner(t, ctx, "bundle", "deploy", "--force-lock", "--auto-approve")
_, _, err := c.Run()
require.NoError(t, err)
}
func deployBundleWithArgsErr(t testutil.TestingT, ctx context.Context, path string, args ...string) (string, string, error) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
args = append([]string{"bundle", "deploy"}, args...)
c := testcli.NewRunner(t, ctx, args...)
stdout, stderr, err := c.Run()
return stdout.String(), stderr.String(), err
}
func deployBundleWithArgs(t testutil.TestingT, ctx context.Context, path string, args ...string) (string, string) {
stdout, stderr, err := deployBundleWithArgsErr(t, ctx, path, args...)
require.NoError(t, err)
return stdout, stderr
}
func deployBundleWithFlags(t testutil.TestingT, ctx context.Context, path string, flags []string) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
args := []string{"bundle", "deploy", "--force-lock"}
args = append(args, flags...)
c := testcli.NewRunner(t, ctx, args...)
_, _, err := c.Run()
require.NoError(t, err)
}
func runResource(t testutil.TestingT, ctx context.Context, path, key string) (string, error) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
ctx = cmdio.NewContext(ctx, cmdio.Default())
c := testcli.NewRunner(t, ctx, "bundle", "run", key)
stdout, _, err := c.Run()
return stdout.String(), err
}
Added support for Databricks Apps in DABs (#1928) ## Changes Now it's possible to configure new `app` resource in bundle and point it to the custom `source_code_path` location where Databricks App code is defined. On `databricks bundle deploy` DABs will create an app. All consecutive `databricks bundle deploy` execution will update an existing app if there are any updated On `databricks bundle run <my_app>` DABs will execute app deployment. If the app is not started yet, it will start the app first. ### Bundle configuration ``` bundle: name: apps variables: my_job_id: description: "ID of job to run app" lookup: job: "My Job" databricks_name: description: "Name for app user" additional_flags: description: "Additional flags to run command app" default: "" my_app_config: type: complex description: "Configuration for my Databricks App" default: command: - flask - --app - hello - run - ${var.additional_flags} env: - name: DATABRICKS_NAME value: ${var.databricks_name} resources: apps: my_app: name: "anester-app" # required and has to be unique description: "My App" source_code_path: ./app # required and points to location of app code config: ${var.my_app_config} resources: - name: "my-job" description: "A job for app to be able to run" job: id: ${var.my_job_id} permission: "CAN_MANAGE_RUN" permissions: - user_name: "foo@bar.com" level: "CAN_VIEW" - service_principal_name: "my_sp" level: "CAN_MANAGE" targets: dev: variables: databricks_name: "Andrew (from dev)" additional_flags: --debug prod: variables: databricks_name: "Andrew (from prod)" ``` ### Execution 1. `databricks bundle deploy -t dev` 2. `databricks bundle run my_app -t dev` **If app is started** ``` ✓ Getting the status of the app my-app ✓ App is in RUNNING state ✓ Preparing source code for new app deployment. ✓ Deployment is pending ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` **If app is not started** ``` ✓ Getting the status of the app my-app ✓ App is in UNAVAILABLE state ✓ Starting the app my-app ✓ App is starting... .... ✓ App is starting... ✓ App is started! ✓ Preparing source code for new app deployment. ✓ Downloading source code from /Workspace/Users/... ✓ Starting app with command: flask --app hello run --debug ✓ App started successfully You can access the app at <app-url> ``` ## Tests Added unit and config tests + manual test. ``` --- PASS: TestAccDeployBundleWithApp (404.59s) PASS coverage: 36.8% of statements in ./... ok github.com/databricks/cli/internal/bundle 405.035s coverage: 36.8% of statements in ./... ```
2025-01-13 16:43:48 +00:00
func runResourceWithStderr(t testutil.TestingT, ctx context.Context, path, key string) (string, string) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
ctx = cmdio.NewContext(ctx, cmdio.Default())
c := testcli.NewRunner(t, ctx, "bundle", "run", key)
stdout, stderr, err := c.Run()
require.NoError(t, err)
return stdout.String(), stderr.String()
}
func runResourceWithParams(t testutil.TestingT, ctx context.Context, path, key string, params ...string) (string, error) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
ctx = cmdio.NewContext(ctx, cmdio.Default())
args := make([]string, 0)
args = append(args, "bundle", "run", key)
args = append(args, params...)
c := testcli.NewRunner(t, ctx, args...)
stdout, _, err := c.Run()
return stdout.String(), err
}
func destroyBundle(t testutil.TestingT, ctx context.Context, path string) {
ctx = env.Set(ctx, "BUNDLE_ROOT", path)
c := testcli.NewRunner(t, ctx, "bundle", "destroy", "--auto-approve")
_, _, err := c.Run()
require.NoError(t, err)
}
func getBundleRemoteRootPath(w *databricks.WorkspaceClient, t testutil.TestingT, uniqueId string) string {
// Compute root path for the bundle deployment
me, err := w.CurrentUser.Me(context.Background())
require.NoError(t, err)
root := fmt.Sprintf("/Workspace/Users/%s/.bundle/%s", me.UserName, uniqueId)
return root
}
func blackBoxRun(t testutil.TestingT, ctx context.Context, root string, args ...string) (stdout, stderr string) {
gitRoot, err := folders.FindDirWithLeaf(".", ".git")
require.NoError(t, err)
// Create the command
cmd := exec.Command("go", append([]string{"run", "main.go"}, args...)...)
cmd.Dir = gitRoot
// Configure the environment
ctx = env.Set(ctx, "BUNDLE_ROOT", root)
for key, value := range env.All(ctx) {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", key, value))
}
// Create buffers to capture output
var outBuffer, errBuffer bytes.Buffer
cmd.Stdout = &outBuffer
cmd.Stderr = &errBuffer
// Run the command
err = cmd.Run()
require.NoError(t, err)
// Get the output
stdout = outBuffer.String()
stderr = errBuffer.String()
return
}