From 6c57683dc6077282dd95e03b19396f602dd5d635 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Sat, 21 Sep 2024 08:36:47 +0200 Subject: [PATCH 1/5] Reduce time until the prompt is shown for bundle run (#1727) ## Summary Makes the `databricks bundle run` command use local state before showing the menu prompt, which makes it show more quickly. For large/busy workspaces this means the prompt can show 2-3 seconds earlier. --- cmd/bundle/run.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 63458f85..9ef5eb8f 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -55,13 +55,7 @@ task or a Python wheel task, the second example applies. return diags.Error() } - diags = bundle.Apply(ctx, b, bundle.Seq( - phases.Initialize(), - terraform.Interpolate(), - terraform.Write(), - terraform.StatePull(), - terraform.Load(terraform.ErrorOnEmptyState), - )) + diags = bundle.Apply(ctx, b, phases.Initialize()) if err := diags.Error(); err != nil { return err } @@ -84,6 +78,16 @@ task or a Python wheel task, the second example applies. return fmt.Errorf("expected a KEY of the resource to run") } + diags = bundle.Apply(ctx, b, bundle.Seq( + terraform.Interpolate(), + terraform.Write(), + terraform.StatePull(), + terraform.Load(terraform.ErrorOnEmptyState), + )) + if err := diags.Error(); err != nil { + return err + } + runner, err := run.Find(b, args[0]) if err != nil { return err From 7665c639bd34392f5d95c177b520d48b9ffa40f4 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Mon, 23 Sep 2024 11:52:04 +0200 Subject: [PATCH 2/5] Use Unity Catalog for pipelines in the default-python template (#1766) ## Summary Enables Unity Catalog for pipelines in the default template. Pipelines will default to non-Unity Catalog pipelines if a catalog is not specified. *Small caveat*: there are cases where admins lock down the default catalog of a workspace and don't allow the creation of a new schema there. If that happens, the pipeline would fail at runtime with a clear error indicating what happened. ("PERMISSION_DENIED: User does not have CREATE SCHEMA on Catalog 'main'."). I've seen this with an internal Databricks workspace, where creating new non-UC schemas wasn't locked down, but creation in the `main` was. ## Testing - Validated on a non-UC + UC workspace. The catalog selection logic here is the same as applied for the SQL templates. --- .../resources/{{.project_name}}_pipeline.yml.tmpl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl index 4b8f74d1..bf469046 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl @@ -3,6 +3,12 @@ resources: pipelines: {{.project_name}}_pipeline: name: {{.project_name}}_pipeline + {{- if eq default_catalog ""}} + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + # catalog: catalog_name + {{- else}} + catalog: {{default_catalog}} + {{- end}} target: {{.project_name}}_${bundle.environment} libraries: - notebook: From ac80d3dfcb648c26c131ad21aec45449ac5c3b31 Mon Sep 17 00:00:00 2001 From: Ilia Babanov Date: Mon, 23 Sep 2024 12:09:11 +0200 Subject: [PATCH 3/5] Add verbose flag to the "bundle deploy" command (#1774) ## Changes - Extract sync output logic from `cmd/sync` into `lib/sync` - Add hidden `verbose` flag to the `bundle deploy` command, it's false by default and hidden from the `--help` output - Pass output handler to the `deploy/files/upload` mutator if the verbose option is true The was an idea to use in-place output overriding each past file sync event in the output, bit that wont work for the extension, since it doesn't display deploy logs in the terminal. Example output: ``` ~/tmp/defpy: ~/cli/cli bundle deploy --sync-progress Building defpy... Uploading defpy-0.0.1+20240917.112755-py3-none-any.whl... Uploading bundle files to /Users/ilia.babanov@databricks.com/.bundle/defpy/dev/files... Action: PUT: requirements-dev.txt, resources/defpy_pipeline.yml, pytest.ini, src/defpy/main.py, src/defpy/__init__.py, src/dlt_pipeline.ipynb, tests/main_test.py, src/notebook.ipynb, setup.py, resources/defpy_job.yml, .vscode/extensions.json, .vscode/settings.json, fixtures/.gitkeep, .vscode/__builtins__.pyi, README.md, .gitignore, databricks.yml Uploaded tests Uploaded resources Uploaded fixtures Uploaded .vscode Uploaded src/defpy Uploaded requirements-dev.txt Uploaded .gitignore Uploaded fixtures/.gitkeep Uploaded src/defpy/__init__.py Uploaded databricks.yml Uploaded README.md Uploaded setup.py Uploaded .vscode/__builtins__.pyi Uploaded .vscode/extensions.json Uploaded src/dlt_pipeline.ipynb Uploaded .vscode/settings.json Uploaded resources/defpy_job.yml Uploaded pytest.ini Uploaded src/defpy/main.py Uploaded tests/main_test.py Uploaded resources/defpy_pipeline.yml Uploaded src/notebook.ipynb Initial Sync Complete Deploying resources... Updating deployment state... Deployment complete! ``` Output example in the extension: Screenshot 2024-09-19 at 11 07 48 ## Tests Manually for the `sync` and `bundle deploy` commands + vscode extension sync and deploy flows --- bundle/deploy/files/upload.go | 18 ++++++++++---- bundle/phases/deploy.go | 5 ++-- cmd/bundle/deploy.go | 14 ++++++++++- cmd/sync/sync.go | 38 ++++++++++++++--------------- {cmd => libs}/sync/output.go | 6 ++--- libs/sync/sync.go | 45 +++++++++++++++++++++++++---------- 6 files changed, 82 insertions(+), 44 deletions(-) rename {cmd => libs}/sync/output.go (83%) diff --git a/bundle/deploy/files/upload.go b/bundle/deploy/files/upload.go index 2c126623..77b83611 100644 --- a/bundle/deploy/files/upload.go +++ b/bundle/deploy/files/upload.go @@ -8,9 +8,12 @@ import ( "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/sync" ) -type upload struct{} +type upload struct { + outputHandler sync.OutputHandler +} func (m *upload) Name() string { return "files.Upload" @@ -18,11 +21,18 @@ func (m *upload) Name() string { func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { cmdio.LogString(ctx, fmt.Sprintf("Uploading bundle files to %s...", b.Config.Workspace.FilePath)) - sync, err := GetSync(ctx, bundle.ReadOnly(b)) + opts, err := GetSyncOptions(ctx, bundle.ReadOnly(b)) if err != nil { return diag.FromErr(err) } + opts.OutputHandler = m.outputHandler + sync, err := sync.New(ctx, *opts) + if err != nil { + return diag.FromErr(err) + } + defer sync.Close() + b.Files, err = sync.RunOnce(ctx) if err != nil { return diag.FromErr(err) @@ -32,6 +42,6 @@ func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return nil } -func Upload() bundle.Mutator { - return &upload{} +func Upload(outputHandler sync.OutputHandler) bundle.Mutator { + return &upload{outputHandler} } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 49544227..097c561e 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -18,6 +18,7 @@ import ( "github.com/databricks/cli/bundle/python" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/sync" terraformlib "github.com/databricks/cli/libs/terraform" tfjson "github.com/hashicorp/terraform-json" ) @@ -128,7 +129,7 @@ properties such as the 'catalog' or 'storage' are changed:` } // The deploy phase deploys artifacts and resources. -func Deploy() bundle.Mutator { +func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { // Core mutators that CRUD resources and modify deployment state. These // mutators need informed consent if they are potentially destructive. deployCore := bundle.Defer( @@ -157,7 +158,7 @@ func Deploy() bundle.Mutator { libraries.ExpandGlobReferences(), libraries.Upload(), python.TransformWheelTask(), - files.Upload(), + files.Upload(outputHandler), deploy.StateUpdate(), deploy.StatePush(), permissions.ApplyWorkspaceRootPermissions(), diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index 1166875a..49231734 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/sync" "github.com/spf13/cobra" ) @@ -25,11 +26,15 @@ func newDeployCommand() *cobra.Command { var failOnActiveRuns bool var computeID string var autoApprove bool + var verbose bool cmd.Flags().BoolVar(&force, "force", false, "Force-override Git branch validation.") cmd.Flags().BoolVar(&forceLock, "force-lock", false, "Force acquisition of deployment lock.") cmd.Flags().BoolVar(&failOnActiveRuns, "fail-on-active-runs", false, "Fail if there are running jobs or pipelines in the deployment.") cmd.Flags().StringVarP(&computeID, "compute-id", "c", "", "Override compute in the deployment with the given compute ID.") cmd.Flags().BoolVar(&autoApprove, "auto-approve", false, "Skip interactive approvals that might be required for deployment.") + cmd.Flags().BoolVar(&verbose, "verbose", false, "Enable verbose output.") + // Verbose flag currently only affects file sync output, it's used by the vscode extension + cmd.Flags().MarkHidden("verbose") cmd.RunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() @@ -51,11 +56,18 @@ func newDeployCommand() *cobra.Command { return nil }) + var outputHandler sync.OutputHandler + if verbose { + outputHandler = func(ctx context.Context, c <-chan sync.Event) { + sync.TextOutput(ctx, c, cmd.OutOrStdout()) + } + } + diags = diags.Extend( bundle.Apply(ctx, b, bundle.Seq( phases.Initialize(), phases.Build(), - phases.Deploy(), + phases.Deploy(outputHandler), )), ) } diff --git a/cmd/sync/sync.go b/cmd/sync/sync.go index 23a4c018..2092d9e3 100644 --- a/cmd/sync/sync.go +++ b/cmd/sync/sync.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "path/filepath" - stdsync "sync" "time" "github.com/databricks/cli/bundle" @@ -46,6 +45,21 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn return nil, flag.ErrHelp } + var outputFunc func(context.Context, <-chan sync.Event, io.Writer) + switch f.output { + case flags.OutputText: + outputFunc = sync.TextOutput + case flags.OutputJSON: + outputFunc = sync.JsonOutput + } + + var outputHandler sync.OutputHandler + if outputFunc != nil { + outputHandler = func(ctx context.Context, events <-chan sync.Event) { + outputFunc(ctx, events, cmd.OutOrStdout()) + } + } + opts := sync.SyncOptions{ LocalRoot: vfs.MustNew(args[0]), Paths: []string{"."}, @@ -62,6 +76,8 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn // exist and add it to the `.gitignore` file in the root. SnapshotBasePath: filepath.Join(args[0], ".databricks"), WorkspaceClient: root.WorkspaceClient(cmd.Context()), + + OutputHandler: outputHandler, } return &opts, nil } @@ -118,23 +134,7 @@ func New() *cobra.Command { if err != nil { return err } - - var outputFunc func(context.Context, <-chan sync.Event, io.Writer) - switch f.output { - case flags.OutputText: - outputFunc = textOutput - case flags.OutputJSON: - outputFunc = jsonOutput - } - - var wg stdsync.WaitGroup - if outputFunc != nil { - wg.Add(1) - go func() { - defer wg.Done() - outputFunc(ctx, s.Events(), cmd.OutOrStdout()) - }() - } + defer s.Close() if f.watch { err = s.RunContinuous(ctx) @@ -142,8 +142,6 @@ func New() *cobra.Command { _, err = s.RunOnce(ctx) } - s.Close() - wg.Wait() return err } diff --git a/cmd/sync/output.go b/libs/sync/output.go similarity index 83% rename from cmd/sync/output.go rename to libs/sync/output.go index 2785343f..c01b25ef 100644 --- a/cmd/sync/output.go +++ b/libs/sync/output.go @@ -5,12 +5,10 @@ import ( "context" "encoding/json" "io" - - "github.com/databricks/cli/libs/sync" ) // Read synchronization events and write them as JSON to the specified writer (typically stdout). -func jsonOutput(ctx context.Context, ch <-chan sync.Event, w io.Writer) { +func JsonOutput(ctx context.Context, ch <-chan Event, w io.Writer) { enc := json.NewEncoder(w) for { select { @@ -31,7 +29,7 @@ func jsonOutput(ctx context.Context, ch <-chan sync.Event, w io.Writer) { } // Read synchronization events and write them as text to the specified writer (typically stdout). -func textOutput(ctx context.Context, ch <-chan sync.Event, w io.Writer) { +func TextOutput(ctx context.Context, ch <-chan Event, w io.Writer) { bw := bufio.NewWriter(w) for { diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 9eaebf2a..cc9c7394 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -3,6 +3,7 @@ package sync import ( "context" "fmt" + stdsync "sync" "time" "github.com/databricks/cli/libs/filer" @@ -15,6 +16,8 @@ import ( "github.com/databricks/databricks-sdk-go/service/iam" ) +type OutputHandler func(context.Context, <-chan Event) + type SyncOptions struct { LocalRoot vfs.Path Paths []string @@ -34,6 +37,8 @@ type SyncOptions struct { CurrentUser *iam.User Host string + + OutputHandler OutputHandler } type Sync struct { @@ -49,6 +54,10 @@ type Sync struct { // Synchronization progress events are sent to this event notifier. notifier EventNotifier seq int + + // WaitGroup is automatically created when an output handler is provided in the SyncOptions. + // Close call is required to ensure the output handler goroutine handles all events in time. + outputWaitGroup *stdsync.WaitGroup } // New initializes and returns a new [Sync] instance. @@ -106,31 +115,41 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { return nil, err } + var notifier EventNotifier + var outputWaitGroup = &stdsync.WaitGroup{} + if opts.OutputHandler != nil { + ch := make(chan Event, MaxRequestsInFlight) + notifier = &ChannelNotifier{ch} + outputWaitGroup.Add(1) + go func() { + defer outputWaitGroup.Done() + opts.OutputHandler(ctx, ch) + }() + } else { + notifier = &NopNotifier{} + } + return &Sync{ SyncOptions: &opts, - fileSet: fileSet, - includeFileSet: includeFileSet, - excludeFileSet: excludeFileSet, - snapshot: snapshot, - filer: filer, - notifier: &NopNotifier{}, - seq: 0, + fileSet: fileSet, + includeFileSet: includeFileSet, + excludeFileSet: excludeFileSet, + snapshot: snapshot, + filer: filer, + notifier: notifier, + outputWaitGroup: outputWaitGroup, + seq: 0, }, nil } -func (s *Sync) Events() <-chan Event { - ch := make(chan Event, MaxRequestsInFlight) - s.notifier = &ChannelNotifier{ch} - return ch -} - func (s *Sync) Close() { if s.notifier == nil { return } s.notifier.Close() s.notifier = nil + s.outputWaitGroup.Wait() } func (s *Sync) notifyStart(ctx context.Context, d diff) { From 56ed9bebf39b2ef6430e42dbb840684285217436 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 23 Sep 2024 12:42:34 +0200 Subject: [PATCH 4/5] Added support for creating all-purpose clusters (#1698) ## Changes Added support for creating all-purpose clusters Example of configuration ``` bundle: name: clusters resources: clusters: test_cluster: cluster_name: "Test Cluster" num_workers: 2 node_type_id: "i3.xlarge" autoscale: min_workers: 2 max_workers: 7 spark_version: "13.3.x-scala2.12" spark_conf: "spark.executor.memory": "2g" jobs: test_job: name: "Test Job" tasks: - task_key: test_task existing_cluster_id: ${resources.clusters.test_cluster.id} notebook_task: notebook_path: "./src/test.py" targets: development: mode: development compute_id: ${resources.clusters.test_cluster.id} ``` ## Tests Added unit, config and E2E tests --- bundle/config/bundle.go | 7 +- bundle/config/mutator/apply_presets.go | 15 +++ bundle/config/mutator/compute_id_compat.go | 87 +++++++++++++++++ .../config/mutator/compute_id_compate_test.go | 57 +++++++++++ bundle/config/mutator/mutator.go | 1 + bundle/config/mutator/override_compute.go | 8 +- .../config/mutator/override_compute_test.go | 4 +- .../mutator/process_target_mode_test.go | 10 ++ bundle/config/mutator/run_as_test.go | 2 + bundle/config/resources.go | 1 + bundle/config/resources/clusters.go | 39 ++++++++ bundle/config/root.go | 6 +- bundle/config/target.go | 7 +- bundle/deploy/terraform/convert.go | 22 +++++ bundle/deploy/terraform/convert_test.go | 56 +++++++++++ bundle/deploy/terraform/interpolate.go | 2 + bundle/deploy/terraform/interpolate_test.go | 2 + .../deploy/terraform/tfdyn/convert_cluster.go | 52 ++++++++++ .../terraform/tfdyn/convert_cluster_test.go | 97 +++++++++++++++++++ bundle/tests/clusters/databricks.yml | 36 +++++++ bundle/tests/clusters_test.go | 36 +++++++ cmd/bundle/deploy.go | 11 ++- .../clusters/databricks_template_schema.json | 16 +++ .../clusters/template/databricks.yml.tmpl | 24 +++++ .../bundles/clusters/template/hello_world.py | 1 + internal/bundle/clusters_test.go | 56 +++++++++++ internal/testutil/cloud.go | 4 + 27 files changed, 643 insertions(+), 16 deletions(-) create mode 100644 bundle/config/mutator/compute_id_compat.go create mode 100644 bundle/config/mutator/compute_id_compate_test.go create mode 100644 bundle/config/resources/clusters.go create mode 100644 bundle/deploy/terraform/tfdyn/convert_cluster.go create mode 100644 bundle/deploy/terraform/tfdyn/convert_cluster_test.go create mode 100644 bundle/tests/clusters/databricks.yml create mode 100644 bundle/tests/clusters_test.go create mode 100644 internal/bundle/bundles/clusters/databricks_template_schema.json create mode 100644 internal/bundle/bundles/clusters/template/databricks.yml.tmpl create mode 100644 internal/bundle/bundles/clusters/template/hello_world.py create mode 100644 internal/bundle/clusters_test.go diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index 78648dfd..f533c4d1 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -38,8 +38,11 @@ type Bundle struct { // Annotated readonly as this should be set at the target level. Mode Mode `json:"mode,omitempty" bundle:"readonly"` - // Overrides the compute used for jobs and other supported assets. - ComputeID string `json:"compute_id,omitempty"` + // DEPRECATED: Overrides the compute used for jobs and other supported assets. + ComputeId string `json:"compute_id,omitempty"` + + // Overrides the cluster used for jobs and other supported assets. + ClusterId string `json:"cluster_id,omitempty"` // Deployment section specifies deployment related configuration for bundle Deployment Deployment `json:"deployment,omitempty"` diff --git a/bundle/config/mutator/apply_presets.go b/bundle/config/mutator/apply_presets.go index 28d015c1..27af82e5 100644 --- a/bundle/config/mutator/apply_presets.go +++ b/bundle/config/mutator/apply_presets.go @@ -160,6 +160,21 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos // the Databricks UI and via the SQL API. } + // Clusters: Prefix, Tags + for _, c := range r.Clusters { + c.ClusterName = prefix + c.ClusterName + if c.CustomTags == nil { + c.CustomTags = make(map[string]string) + } + for _, tag := range tags { + normalisedKey := b.Tagging.NormalizeKey(tag.Key) + normalisedValue := b.Tagging.NormalizeValue(tag.Value) + if _, ok := c.CustomTags[normalisedKey]; !ok { + c.CustomTags[normalisedKey] = normalisedValue + } + } + } + return nil } diff --git a/bundle/config/mutator/compute_id_compat.go b/bundle/config/mutator/compute_id_compat.go new file mode 100644 index 00000000..3afe02e9 --- /dev/null +++ b/bundle/config/mutator/compute_id_compat.go @@ -0,0 +1,87 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type computeIdToClusterId struct{} + +func ComputeIdToClusterId() bundle.Mutator { + return &computeIdToClusterId{} +} + +func (m *computeIdToClusterId) Name() string { + return "ComputeIdToClusterId" +} + +func (m *computeIdToClusterId) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + + // The "compute_id" key is set; rewrite it to "cluster_id". + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + v, d := rewriteComputeIdToClusterId(v, dyn.NewPath(dyn.Key("bundle"))) + diags = diags.Extend(d) + + // Check if the "compute_id" key is set in any target overrides. + return dyn.MapByPattern(v, dyn.NewPattern(dyn.Key("targets"), dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + v, d := rewriteComputeIdToClusterId(v, dyn.Path{}) + diags = diags.Extend(d) + return v, nil + }) + }) + + diags = diags.Extend(diag.FromErr(err)) + return diags +} + +func rewriteComputeIdToClusterId(v dyn.Value, p dyn.Path) (dyn.Value, diag.Diagnostics) { + var diags diag.Diagnostics + computeIdPath := p.Append(dyn.Key("compute_id")) + computeId, err := dyn.GetByPath(v, computeIdPath) + + // If the "compute_id" key is not set, we don't need to do anything. + if err != nil { + return v, nil + } + + if computeId.Kind() == dyn.KindInvalid { + return v, nil + } + + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: "compute_id is deprecated, please use cluster_id instead", + Locations: computeId.Locations(), + Paths: []dyn.Path{computeIdPath}, + }) + + clusterIdPath := p.Append(dyn.Key("cluster_id")) + nv, err := dyn.SetByPath(v, clusterIdPath, computeId) + if err != nil { + return dyn.InvalidValue, diag.FromErr(err) + } + // Drop the "compute_id" key. + vout, err := dyn.Walk(nv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + switch len(p) { + case 0: + return v, nil + case 1: + if p[0] == dyn.Key("compute_id") { + return v, dyn.ErrDrop + } + return v, nil + case 2: + if p[1] == dyn.Key("compute_id") { + return v, dyn.ErrDrop + } + } + return v, dyn.ErrSkip + }) + + diags = diags.Extend(diag.FromErr(err)) + return vout, diags +} diff --git a/bundle/config/mutator/compute_id_compate_test.go b/bundle/config/mutator/compute_id_compate_test.go new file mode 100644 index 00000000..e59d37e3 --- /dev/null +++ b/bundle/config/mutator/compute_id_compate_test.go @@ -0,0 +1,57 @@ +package mutator_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/libs/diag" + "github.com/stretchr/testify/assert" +) + +func TestComputeIdToClusterId(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + ComputeId: "compute-id", + }, + }, + } + + diags := bundle.Apply(context.Background(), b, mutator.ComputeIdToClusterId()) + assert.NoError(t, diags.Error()) + assert.Equal(t, "compute-id", b.Config.Bundle.ClusterId) + assert.Empty(t, b.Config.Bundle.ComputeId) + + assert.Len(t, diags, 1) + assert.Equal(t, "compute_id is deprecated, please use cluster_id instead", diags[0].Summary) + assert.Equal(t, diag.Warning, diags[0].Severity) +} + +func TestComputeIdToClusterIdInTargetOverride(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Targets: map[string]*config.Target{ + "dev": { + ComputeId: "compute-id-dev", + }, + }, + }, + } + + diags := bundle.Apply(context.Background(), b, mutator.ComputeIdToClusterId()) + assert.NoError(t, diags.Error()) + assert.Empty(t, b.Config.Targets["dev"].ComputeId) + + diags = diags.Extend(bundle.Apply(context.Background(), b, mutator.SelectTarget("dev"))) + assert.NoError(t, diags.Error()) + + assert.Equal(t, "compute-id-dev", b.Config.Bundle.ClusterId) + assert.Empty(t, b.Config.Bundle.ComputeId) + + assert.Len(t, diags, 1) + assert.Equal(t, "compute_id is deprecated, please use cluster_id instead", diags[0].Summary) + assert.Equal(t, diag.Warning, diags[0].Severity) +} diff --git a/bundle/config/mutator/mutator.go b/bundle/config/mutator/mutator.go index 0458beff..faf50ae6 100644 --- a/bundle/config/mutator/mutator.go +++ b/bundle/config/mutator/mutator.go @@ -23,6 +23,7 @@ func DefaultMutators() []bundle.Mutator { VerifyCliVersion(), EnvironmentsToTargets(), + ComputeIdToClusterId(), InitializeVariables(), DefineDefaultTarget(), LoadGitDetails(), diff --git a/bundle/config/mutator/override_compute.go b/bundle/config/mutator/override_compute.go index 73fbad36..5700cdf2 100644 --- a/bundle/config/mutator/override_compute.go +++ b/bundle/config/mutator/override_compute.go @@ -39,22 +39,22 @@ func overrideJobCompute(j *resources.Job, compute string) { func (m *overrideCompute) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { if b.Config.Bundle.Mode != config.Development { - if b.Config.Bundle.ComputeID != "" { + if b.Config.Bundle.ClusterId != "" { return diag.Errorf("cannot override compute for an target that does not use 'mode: development'") } return nil } if v := env.Get(ctx, "DATABRICKS_CLUSTER_ID"); v != "" { - b.Config.Bundle.ComputeID = v + b.Config.Bundle.ClusterId = v } - if b.Config.Bundle.ComputeID == "" { + if b.Config.Bundle.ClusterId == "" { return nil } r := b.Config.Resources for i := range r.Jobs { - overrideJobCompute(r.Jobs[i], b.Config.Bundle.ComputeID) + overrideJobCompute(r.Jobs[i], b.Config.Bundle.ClusterId) } return nil diff --git a/bundle/config/mutator/override_compute_test.go b/bundle/config/mutator/override_compute_test.go index 152ee543..369447d7 100644 --- a/bundle/config/mutator/override_compute_test.go +++ b/bundle/config/mutator/override_compute_test.go @@ -20,7 +20,7 @@ func TestOverrideDevelopment(t *testing.T) { Config: config.Root{ Bundle: config.Bundle{ Mode: config.Development, - ComputeID: "newClusterID", + ClusterId: "newClusterID", }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -144,7 +144,7 @@ func TestOverrideProduction(t *testing.T) { b := &bundle.Bundle{ Config: config.Root{ Bundle: config.Bundle{ - ComputeID: "newClusterID", + ClusterId: "newClusterID", }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 42f1929c..b0eb57ee 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/cli/libs/tags" sdkconfig "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/ml" @@ -119,6 +120,9 @@ func mockBundle(mode config.Mode) *bundle.Bundle { Schemas: map[string]*resources.Schema{ "schema1": {CreateSchema: &catalog.CreateSchema{Name: "schema1"}}, }, + Clusters: map[string]*resources.Cluster{ + "cluster1": {ClusterSpec: &compute.ClusterSpec{ClusterName: "cluster1", SparkVersion: "13.2.x", NumWorkers: 1}}, + }, }, }, // Use AWS implementation for testing. @@ -177,6 +181,9 @@ func TestProcessTargetModeDevelopment(t *testing.T) { // Schema 1 assert.Equal(t, "dev_lennart_schema1", b.Config.Resources.Schemas["schema1"].Name) + + // Clusters + assert.Equal(t, "[dev lennart] cluster1", b.Config.Resources.Clusters["cluster1"].ClusterName) } func TestProcessTargetModeDevelopmentTagNormalizationForAws(t *testing.T) { @@ -281,6 +288,7 @@ func TestProcessTargetModeDefault(t *testing.T) { assert.Equal(t, "servingendpoint1", b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Name) assert.Equal(t, "registeredmodel1", b.Config.Resources.RegisteredModels["registeredmodel1"].Name) assert.Equal(t, "qualityMonitor1", b.Config.Resources.QualityMonitors["qualityMonitor1"].TableName) + assert.Equal(t, "cluster1", b.Config.Resources.Clusters["cluster1"].ClusterName) } func TestProcessTargetModeProduction(t *testing.T) { @@ -312,6 +320,7 @@ func TestProcessTargetModeProduction(t *testing.T) { b.Config.Resources.Experiments["experiment2"].Permissions = permissions b.Config.Resources.Models["model1"].Permissions = permissions b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Permissions = permissions + b.Config.Resources.Clusters["cluster1"].Permissions = permissions diags = validateProductionMode(context.Background(), b, false) require.NoError(t, diags.Error()) @@ -322,6 +331,7 @@ func TestProcessTargetModeProduction(t *testing.T) { assert.Equal(t, "servingendpoint1", b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Name) assert.Equal(t, "registeredmodel1", b.Config.Resources.RegisteredModels["registeredmodel1"].Name) assert.Equal(t, "qualityMonitor1", b.Config.Resources.QualityMonitors["qualityMonitor1"].TableName) + assert.Equal(t, "cluster1", b.Config.Resources.Clusters["cluster1"].ClusterName) } func TestProcessTargetModeProductionOkForPrincipal(t *testing.T) { diff --git a/bundle/config/mutator/run_as_test.go b/bundle/config/mutator/run_as_test.go index e6cef9ba..abeea45d 100644 --- a/bundle/config/mutator/run_as_test.go +++ b/bundle/config/mutator/run_as_test.go @@ -32,6 +32,7 @@ func allResourceTypes(t *testing.T) []string { // the dyn library gives us the correct list of all resources supported. Please // also update this check when adding a new resource require.Equal(t, []string{ + "clusters", "experiments", "jobs", "model_serving_endpoints", @@ -133,6 +134,7 @@ func TestRunAsErrorForUnsupportedResources(t *testing.T) { // some point in the future. These resources are (implicitly) on the deny list, since // they are not on the allow list below. allowList := []string{ + "clusters", "jobs", "models", "registered_models", diff --git a/bundle/config/resources.go b/bundle/config/resources.go index 22d69ffb..a3afb7fc 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -19,6 +19,7 @@ type Resources struct { RegisteredModels map[string]*resources.RegisteredModel `json:"registered_models,omitempty"` QualityMonitors map[string]*resources.QualityMonitor `json:"quality_monitors,omitempty"` Schemas map[string]*resources.Schema `json:"schemas,omitempty"` + Clusters map[string]*resources.Cluster `json:"clusters,omitempty"` } type ConfigResource interface { diff --git a/bundle/config/resources/clusters.go b/bundle/config/resources/clusters.go new file mode 100644 index 00000000..63234566 --- /dev/null +++ b/bundle/config/resources/clusters.go @@ -0,0 +1,39 @@ +package resources + +import ( + "context" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/marshal" + "github.com/databricks/databricks-sdk-go/service/compute" +) + +type Cluster struct { + ID string `json:"id,omitempty" bundle:"readonly"` + Permissions []Permission `json:"permissions,omitempty"` + ModifiedStatus ModifiedStatus `json:"modified_status,omitempty" bundle:"internal"` + + *compute.ClusterSpec +} + +func (s *Cluster) UnmarshalJSON(b []byte) error { + return marshal.Unmarshal(b, s) +} + +func (s Cluster) MarshalJSON() ([]byte, error) { + return marshal.Marshal(s) +} + +func (s *Cluster) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { + _, err := w.Clusters.GetByClusterId(ctx, id) + if err != nil { + log.Debugf(ctx, "cluster %s does not exist", id) + return false, err + } + return true, nil +} + +func (s *Cluster) TerraformResourceName() string { + return "databricks_cluster" +} diff --git a/bundle/config/root.go b/bundle/config/root.go index 884c2e1c..92d834f0 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -366,9 +366,9 @@ func (r *Root) MergeTargetOverrides(name string) error { } } - // Merge `compute_id`. This field must be overwritten if set, not merged. - if v := target.Get("compute_id"); v.Kind() != dyn.KindInvalid { - root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("compute_id")), v) + // Merge `cluster_id`. This field must be overwritten if set, not merged. + if v := target.Get("cluster_id"); v.Kind() != dyn.KindInvalid { + root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("cluster_id")), v) if err != nil { return err } diff --git a/bundle/config/target.go b/bundle/config/target.go index fc6ba7b5..fae9c940 100644 --- a/bundle/config/target.go +++ b/bundle/config/target.go @@ -24,8 +24,11 @@ type Target struct { // name prefix of deployed resources. Presets Presets `json:"presets,omitempty"` - // Overrides the compute used for jobs and other supported assets. - ComputeID string `json:"compute_id,omitempty"` + // DEPRECATED: Overrides the compute used for jobs and other supported assets. + ComputeId string `json:"compute_id,omitempty"` + + // Overrides the cluster used for jobs and other supported assets. + ClusterId string `json:"cluster_id,omitempty"` Bundle *Bundle `json:"bundle,omitempty"` diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index f13c241c..5a548e3b 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -231,6 +231,13 @@ func BundleToTerraform(config *config.Root) *schema.Root { tfroot.Resource.QualityMonitor[k] = &dst } + for k, src := range config.Resources.Clusters { + noResources = false + var dst schema.ResourceCluster + conv(src, &dst) + tfroot.Resource.Cluster[k] = &dst + } + // We explicitly set "resource" to nil to omit it from a JSON encoding. // This is required because the terraform CLI requires >= 1 resources defined // if the "resource" property is used in a .tf.json file. @@ -394,6 +401,16 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { } cur.ID = instance.Attributes.ID config.Resources.Schemas[resource.Name] = cur + case "databricks_cluster": + if config.Resources.Clusters == nil { + config.Resources.Clusters = make(map[string]*resources.Cluster) + } + cur := config.Resources.Clusters[resource.Name] + if cur == nil { + cur = &resources.Cluster{ModifiedStatus: resources.ModifiedStatusDeleted} + } + cur.ID = instance.Attributes.ID + config.Resources.Clusters[resource.Name] = cur case "databricks_permissions": case "databricks_grants": // Ignore; no need to pull these back into the configuration. @@ -443,6 +460,11 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { src.ModifiedStatus = resources.ModifiedStatusCreated } } + for _, src := range config.Resources.Clusters { + if src.ModifiedStatus == "" && src.ID == "" { + src.ModifiedStatus = resources.ModifiedStatusCreated + } + } return nil } diff --git a/bundle/deploy/terraform/convert_test.go b/bundle/deploy/terraform/convert_test.go index e4ef6114..4c6866d9 100644 --- a/bundle/deploy/terraform/convert_test.go +++ b/bundle/deploy/terraform/convert_test.go @@ -663,6 +663,14 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "1"}}, }, }, + { + Type: "databricks_cluster", + Mode: "managed", + Name: "test_cluster", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "1"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -692,6 +700,9 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { assert.Equal(t, "1", config.Resources.Schemas["test_schema"].ID) assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Schemas["test_schema"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.Clusters["test_cluster"].ID) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Clusters["test_cluster"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -754,6 +765,13 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { }, }, }, + Clusters: map[string]*resources.Cluster{ + "test_cluster": { + ClusterSpec: &compute.ClusterSpec{ + ClusterName: "test_cluster", + }, + }, + }, }, } var tfState = resourcesState{ @@ -786,6 +804,9 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { assert.Equal(t, "", config.Resources.Schemas["test_schema"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Schemas["test_schema"].ModifiedStatus) + assert.Equal(t, "", config.Resources.Clusters["test_cluster"].ID) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Clusters["test_cluster"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -888,6 +909,18 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { }, }, }, + Clusters: map[string]*resources.Cluster{ + "test_cluster": { + ClusterSpec: &compute.ClusterSpec{ + ClusterName: "test_cluster", + }, + }, + "test_cluster_new": { + ClusterSpec: &compute.ClusterSpec{ + ClusterName: "test_cluster_new", + }, + }, + }, }, } var tfState = resourcesState{ @@ -1020,6 +1053,22 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "2"}}, }, }, + { + Type: "databricks_cluster", + Mode: "managed", + Name: "test_cluster", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "1"}}, + }, + }, + { + Type: "databricks_cluster", + Mode: "managed", + Name: "test_cluster_old", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "2"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -1081,6 +1130,13 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { assert.Equal(t, "", config.Resources.Schemas["test_schema_new"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Schemas["test_schema_new"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.Clusters["test_cluster"].ID) + assert.Equal(t, "", config.Resources.Clusters["test_cluster"].ModifiedStatus) + assert.Equal(t, "2", config.Resources.Clusters["test_cluster_old"].ID) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Clusters["test_cluster_old"].ModifiedStatus) + assert.Equal(t, "", config.Resources.Clusters["test_cluster_new"].ID) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Clusters["test_cluster_new"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } diff --git a/bundle/deploy/terraform/interpolate.go b/bundle/deploy/terraform/interpolate.go index faa098e1..12894c68 100644 --- a/bundle/deploy/terraform/interpolate.go +++ b/bundle/deploy/terraform/interpolate.go @@ -58,6 +58,8 @@ func (m *interpolateMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.D path = dyn.NewPath(dyn.Key("databricks_quality_monitor")).Append(path[2:]...) case dyn.Key("schemas"): path = dyn.NewPath(dyn.Key("databricks_schema")).Append(path[2:]...) + case dyn.Key("clusters"): + path = dyn.NewPath(dyn.Key("databricks_cluster")).Append(path[2:]...) default: // Trigger "key not found" for unknown resource types. return dyn.GetByPath(root, path) diff --git a/bundle/deploy/terraform/interpolate_test.go b/bundle/deploy/terraform/interpolate_test.go index 5ceb243b..630a904a 100644 --- a/bundle/deploy/terraform/interpolate_test.go +++ b/bundle/deploy/terraform/interpolate_test.go @@ -31,6 +31,7 @@ func TestInterpolate(t *testing.T) { "other_model_serving": "${resources.model_serving_endpoints.other_model_serving.id}", "other_registered_model": "${resources.registered_models.other_registered_model.id}", "other_schema": "${resources.schemas.other_schema.id}", + "other_cluster": "${resources.clusters.other_cluster.id}", }, Tasks: []jobs.Task{ { @@ -67,6 +68,7 @@ func TestInterpolate(t *testing.T) { assert.Equal(t, "${databricks_model_serving.other_model_serving.id}", j.Tags["other_model_serving"]) assert.Equal(t, "${databricks_registered_model.other_registered_model.id}", j.Tags["other_registered_model"]) assert.Equal(t, "${databricks_schema.other_schema.id}", j.Tags["other_schema"]) + assert.Equal(t, "${databricks_cluster.other_cluster.id}", j.Tags["other_cluster"]) m := b.Config.Resources.Models["my_model"] assert.Equal(t, "my_model", m.Model.Name) diff --git a/bundle/deploy/terraform/tfdyn/convert_cluster.go b/bundle/deploy/terraform/tfdyn/convert_cluster.go new file mode 100644 index 00000000..f25f09ea --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_cluster.go @@ -0,0 +1,52 @@ +package tfdyn + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go/service/compute" +) + +func convertClusterResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { + // Normalize the output value to the target schema. + vout, diags := convert.Normalize(compute.ClusterSpec{}, vin) + for _, diag := range diags { + log.Debugf(ctx, "cluster normalization diagnostic: %s", diag.Summary) + } + + return vout, nil +} + +type clusterConverter struct{} + +func (clusterConverter) Convert(ctx context.Context, key string, vin dyn.Value, out *schema.Resources) error { + vout, err := convertClusterResource(ctx, vin) + if err != nil { + return err + } + + // We always set no_wait as it allows DABs not to wait for cluster to be started. + vout, err = dyn.Set(vout, "no_wait", dyn.V(true)) + if err != nil { + return err + } + + // Add the converted resource to the output. + out.Cluster[key] = vout.AsAny() + + // Configure permissions for this resource. + if permissions := convertPermissionsResource(ctx, vin); permissions != nil { + permissions.JobId = fmt.Sprintf("${databricks_cluster.%s.id}", key) + out.Permissions["cluster_"+key] = permissions + } + + return nil +} + +func init() { + registerConverter("clusters", clusterConverter{}) +} diff --git a/bundle/deploy/terraform/tfdyn/convert_cluster_test.go b/bundle/deploy/terraform/tfdyn/convert_cluster_test.go new file mode 100644 index 00000000..e7d2542f --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_cluster_test.go @@ -0,0 +1,97 @@ +package tfdyn + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestConvertCluster(t *testing.T) { + var src = resources.Cluster{ + ClusterSpec: &compute.ClusterSpec{ + NumWorkers: 3, + SparkVersion: "13.3.x-scala2.12", + ClusterName: "cluster", + SparkConf: map[string]string{ + "spark.executor.memory": "2g", + }, + AwsAttributes: &compute.AwsAttributes{ + Availability: "ON_DEMAND", + }, + AzureAttributes: &compute.AzureAttributes{ + Availability: "SPOT", + }, + DataSecurityMode: "USER_ISOLATION", + NodeTypeId: "m5.xlarge", + Autoscale: &compute.AutoScale{ + MinWorkers: 1, + MaxWorkers: 10, + }, + }, + + Permissions: []resources.Permission{ + { + Level: "CAN_RUN", + UserName: "jack@gmail.com", + }, + { + Level: "CAN_MANAGE", + ServicePrincipalName: "sp", + }, + }, + } + + vin, err := convert.FromTyped(src, dyn.NilValue) + require.NoError(t, err) + + ctx := context.Background() + out := schema.NewResources() + err = clusterConverter{}.Convert(ctx, "my_cluster", vin, out) + require.NoError(t, err) + + cluster := out.Cluster["my_cluster"] + assert.Equal(t, map[string]any{ + "num_workers": int64(3), + "spark_version": "13.3.x-scala2.12", + "cluster_name": "cluster", + "spark_conf": map[string]any{ + "spark.executor.memory": "2g", + }, + "aws_attributes": map[string]any{ + "availability": "ON_DEMAND", + }, + "azure_attributes": map[string]any{ + "availability": "SPOT", + }, + "data_security_mode": "USER_ISOLATION", + "no_wait": true, + "node_type_id": "m5.xlarge", + "autoscale": map[string]any{ + "min_workers": int64(1), + "max_workers": int64(10), + }, + }, cluster) + + // Assert equality on the permissions + assert.Equal(t, &schema.ResourcePermissions{ + JobId: "${databricks_cluster.my_cluster.id}", + AccessControl: []schema.ResourcePermissionsAccessControl{ + { + PermissionLevel: "CAN_RUN", + UserName: "jack@gmail.com", + }, + { + PermissionLevel: "CAN_MANAGE", + ServicePrincipalName: "sp", + }, + }, + }, out.Permissions["cluster_my_cluster"]) + +} diff --git a/bundle/tests/clusters/databricks.yml b/bundle/tests/clusters/databricks.yml new file mode 100644 index 00000000..1074462a --- /dev/null +++ b/bundle/tests/clusters/databricks.yml @@ -0,0 +1,36 @@ +bundle: + name: clusters + +workspace: + host: https://acme.cloud.databricks.com/ + +resources: + clusters: + foo: + cluster_name: foo + num_workers: 2 + node_type_id: "i3.xlarge" + autoscale: + min_workers: 2 + max_workers: 7 + spark_version: "13.3.x-scala2.12" + spark_conf: + "spark.executor.memory": "2g" + +targets: + default: + + development: + resources: + clusters: + foo: + cluster_name: foo-override + num_workers: 3 + node_type_id: "m5.xlarge" + autoscale: + min_workers: 1 + max_workers: 3 + spark_version: "15.2.x-scala2.12" + spark_conf: + "spark.executor.memory": "4g" + "spark.executor.memory2": "4g" diff --git a/bundle/tests/clusters_test.go b/bundle/tests/clusters_test.go new file mode 100644 index 00000000..def8a2a3 --- /dev/null +++ b/bundle/tests/clusters_test.go @@ -0,0 +1,36 @@ +package config_tests + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestClusters(t *testing.T) { + b := load(t, "./clusters") + assert.Equal(t, "clusters", b.Config.Bundle.Name) + + cluster := b.Config.Resources.Clusters["foo"] + assert.Equal(t, "foo", cluster.ClusterName) + assert.Equal(t, "13.3.x-scala2.12", cluster.SparkVersion) + assert.Equal(t, "i3.xlarge", cluster.NodeTypeId) + assert.Equal(t, 2, cluster.NumWorkers) + assert.Equal(t, "2g", cluster.SparkConf["spark.executor.memory"]) + assert.Equal(t, 2, cluster.Autoscale.MinWorkers) + assert.Equal(t, 7, cluster.Autoscale.MaxWorkers) +} + +func TestClustersOverride(t *testing.T) { + b := loadTarget(t, "./clusters", "development") + assert.Equal(t, "clusters", b.Config.Bundle.Name) + + cluster := b.Config.Resources.Clusters["foo"] + assert.Equal(t, "foo-override", cluster.ClusterName) + assert.Equal(t, "15.2.x-scala2.12", cluster.SparkVersion) + assert.Equal(t, "m5.xlarge", cluster.NodeTypeId) + assert.Equal(t, 3, cluster.NumWorkers) + assert.Equal(t, "4g", cluster.SparkConf["spark.executor.memory"]) + assert.Equal(t, "4g", cluster.SparkConf["spark.executor.memory2"]) + assert.Equal(t, 1, cluster.Autoscale.MinWorkers) + assert.Equal(t, 3, cluster.Autoscale.MaxWorkers) +} diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index 49231734..f1c85cb3 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -24,14 +24,16 @@ func newDeployCommand() *cobra.Command { var force bool var forceLock bool var failOnActiveRuns bool - var computeID string + var clusterId string var autoApprove bool var verbose bool cmd.Flags().BoolVar(&force, "force", false, "Force-override Git branch validation.") cmd.Flags().BoolVar(&forceLock, "force-lock", false, "Force acquisition of deployment lock.") cmd.Flags().BoolVar(&failOnActiveRuns, "fail-on-active-runs", false, "Fail if there are running jobs or pipelines in the deployment.") - cmd.Flags().StringVarP(&computeID, "compute-id", "c", "", "Override compute in the deployment with the given compute ID.") + cmd.Flags().StringVar(&clusterId, "compute-id", "", "Override cluster in the deployment with the given compute ID.") + cmd.Flags().StringVarP(&clusterId, "cluster-id", "c", "", "Override cluster in the deployment with the given cluster ID.") cmd.Flags().BoolVar(&autoApprove, "auto-approve", false, "Skip interactive approvals that might be required for deployment.") + cmd.Flags().MarkDeprecated("compute-id", "use --cluster-id instead") cmd.Flags().BoolVar(&verbose, "verbose", false, "Enable verbose output.") // Verbose flag currently only affects file sync output, it's used by the vscode extension cmd.Flags().MarkHidden("verbose") @@ -47,7 +49,10 @@ func newDeployCommand() *cobra.Command { b.AutoApprove = autoApprove if cmd.Flag("compute-id").Changed { - b.Config.Bundle.ComputeID = computeID + b.Config.Bundle.ClusterId = clusterId + } + if cmd.Flag("cluster-id").Changed { + b.Config.Bundle.ClusterId = clusterId } if cmd.Flag("fail-on-active-runs").Changed { b.Config.Bundle.Deployment.FailOnActiveRuns = failOnActiveRuns diff --git a/internal/bundle/bundles/clusters/databricks_template_schema.json b/internal/bundle/bundles/clusters/databricks_template_schema.json new file mode 100644 index 00000000..c1c5cf12 --- /dev/null +++ b/internal/bundle/bundles/clusters/databricks_template_schema.json @@ -0,0 +1,16 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for job name" + }, + "spark_version": { + "type": "string", + "description": "Spark version used for job cluster" + }, + "node_type_id": { + "type": "string", + "description": "Node type id for job cluster" + } + } +} diff --git a/internal/bundle/bundles/clusters/template/databricks.yml.tmpl b/internal/bundle/bundles/clusters/template/databricks.yml.tmpl new file mode 100644 index 00000000..e0d6320a --- /dev/null +++ b/internal/bundle/bundles/clusters/template/databricks.yml.tmpl @@ -0,0 +1,24 @@ +bundle: + name: basic + +workspace: + root_path: "~/.bundle/{{.unique_id}}" + +resources: + clusters: + test_cluster: + cluster_name: "test-cluster-{{.unique_id}}" + spark_version: "{{.spark_version}}" + node_type_id: "{{.node_type_id}}" + num_workers: 2 + spark_conf: + "spark.executor.memory": "2g" + + jobs: + foo: + name: test-job-with-cluster-{{.unique_id}} + tasks: + - task_key: my_notebook_task + existing_cluster_id: "${resources.clusters.test_cluster.cluster_id}" + spark_python_task: + python_file: ./hello_world.py diff --git a/internal/bundle/bundles/clusters/template/hello_world.py b/internal/bundle/bundles/clusters/template/hello_world.py new file mode 100644 index 00000000..f301245e --- /dev/null +++ b/internal/bundle/bundles/clusters/template/hello_world.py @@ -0,0 +1 @@ +print("Hello World!") diff --git a/internal/bundle/clusters_test.go b/internal/bundle/clusters_test.go new file mode 100644 index 00000000..a961f3ea --- /dev/null +++ b/internal/bundle/clusters_test.go @@ -0,0 +1,56 @@ +package bundle + +import ( + "fmt" + "testing" + + "github.com/databricks/cli/internal" + "github.com/databricks/cli/internal/acc" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/env" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/google/uuid" + "github.com/stretchr/testify/require" +) + +func TestAccDeployBundleWithCluster(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + if testutil.IsAWSCloud(wt.T) { + t.Skip("Skipping test for AWS cloud because it is not permitted to create clusters") + } + + nodeTypeId := internal.GetNodeTypeId(env.Get(ctx, "CLOUD_ENV")) + uniqueId := uuid.New().String() + root, err := initTestTemplate(t, ctx, "clusters", map[string]any{ + "unique_id": uniqueId, + "node_type_id": nodeTypeId, + "spark_version": defaultSparkVersion, + }) + require.NoError(t, err) + + t.Cleanup(func() { + err = destroyBundle(t, ctx, root) + require.NoError(t, err) + + cluster, err := wt.W.Clusters.GetByClusterName(ctx, fmt.Sprintf("test-cluster-%s", uniqueId)) + if err != nil { + require.ErrorContains(t, err, "does not exist") + } else { + require.Contains(t, []compute.State{compute.StateTerminated, compute.StateTerminating}, cluster.State) + } + + }) + + err = deployBundle(t, ctx, root) + require.NoError(t, err) + + // Cluster should exists after bundle deployment + cluster, err := wt.W.Clusters.GetByClusterName(ctx, fmt.Sprintf("test-cluster-%s", uniqueId)) + require.NoError(t, err) + require.NotNil(t, cluster) + + out, err := runResource(t, ctx, root, "foo") + require.NoError(t, err) + require.Contains(t, out, "Hello World!") +} diff --git a/internal/testutil/cloud.go b/internal/testutil/cloud.go index e547069f..ba5b75ec 100644 --- a/internal/testutil/cloud.go +++ b/internal/testutil/cloud.go @@ -49,3 +49,7 @@ func GetCloud(t *testing.T) Cloud { } return -1 } + +func IsAWSCloud(t *testing.T) bool { + return GetCloud(t) == AWS +} From 0cc35ca05693e5989308f432f22bb0a28f8cb1dd Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:12:30 +0530 Subject: [PATCH 5/5] Assert tokens are redacted in origin URL when username is not specified (#1785) TSIA --- libs/git/repository_test.go | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/libs/git/repository_test.go b/libs/git/repository_test.go index a28038ee..93d9a03d 100644 --- a/libs/git/repository_test.go +++ b/libs/git/repository_test.go @@ -209,7 +209,26 @@ func TestRepositoryGitConfigWhenNotARepo(t *testing.T) { } func TestRepositoryOriginUrlRemovesUserCreds(t *testing.T) { - repo := newTestRepository(t) - repo.addOriginUrl("https://username:token@github.com/databricks/foobar.git") - repo.assertOriginUrl("https://github.com/databricks/foobar.git") + tcases := []struct { + url string + expected string + }{ + { + url: "https://username:token@github.com/databricks/foobar.git", + expected: "https://github.com/databricks/foobar.git", + }, + { + // Note: The token is still considered and parsed as a username here. + // However credentials integrations by Git providers like GitHub + // allow for setting a PAT token as a username. + url: "https://token@github.com/databricks/foobar.git", + expected: "https://github.com/databricks/foobar.git", + }, + } + + for _, tc := range tcases { + repo := newTestRepository(t) + repo.addOriginUrl(tc.url) + repo.assertOriginUrl(tc.expected) + } }