diff --git a/bundle/config/experimental.go b/bundle/config/experimental.go index 62d1ae73..008d7b90 100644 --- a/bundle/config/experimental.go +++ b/bundle/config/experimental.go @@ -10,6 +10,19 @@ type Experimental struct { // In this case the configured wheel task will be deployed as a notebook task which install defined wheel in runtime and executes it. // For more details see https://github.com/databricks/cli/pull/797 and https://github.com/databricks/cli/pull/635 PythonWheelWrapper bool `json:"python_wheel_wrapper,omitempty"` + + // Enable legacy run_as behavior. That is: + // - Set the run_as identity as the owner of any pipelines in the bundle. + // - Do not error in the presence of resources that do not support run_as. + // As of April 2024 this includes pipelines and model serving endpoints. + // + // This mode of run_as requires the deploying user to be a workspace and metastore + // admin. Use of this flag is not recommend for new bundles, and it is only provided + // to unblock customers that are stuck due to breaking changes in the run_as behavior + // made in https://github.com/databricks/cli/pull/1233. This flag might + // be removed in the future once we have a proper workaround like allowing IS_OWNER + // as a top-level permission in the DAB. + UseLegacyRunAs bool `json:"use_legacy_run_as,omitempty"` } type Command string diff --git a/bundle/config/mutator/run_as.go b/bundle/config/mutator/run_as.go index 8da233c2..c5b294b2 100644 --- a/bundle/config/mutator/run_as.go +++ b/bundle/config/mutator/run_as.go @@ -3,8 +3,10 @@ package mutator import ( "context" "fmt" + "slices" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" "github.com/databricks/databricks-sdk-go/service/jobs" @@ -101,19 +103,12 @@ func validateRunAs(b *bundle.Bundle) error { return nil } -func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { - // Mutator is a no-op if run_as is not specified in the bundle +func setRunAsForJobs(b *bundle.Bundle) { runAs := b.Config.RunAs if runAs == nil { - return nil + return } - // Assert the run_as configuration is valid in the context of the bundle - if err := validateRunAs(b); err != nil { - return diag.FromErr(err) - } - - // Set run_as for jobs for i := range b.Config.Resources.Jobs { job := b.Config.Resources.Jobs[i] if job.RunAs != nil { @@ -124,6 +119,63 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { UserName: runAs.UserName, } } +} +// Legacy behavior of run_as for DLT pipelines. Available under the experimental.use_run_as_legacy flag. +// Only available to unblock customers stuck due to breaking changes in https://github.com/databricks/cli/pull/1233 +func setPipelineOwnersToRunAsIdentity(b *bundle.Bundle) { + runAs := b.Config.RunAs + if runAs == nil { + return + } + + me := b.Config.Workspace.CurrentUser.UserName + // If user deploying the bundle and the one defined in run_as are the same + // Do not add IS_OWNER permission. Current user is implied to be an owner in this case. + // Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407 + if runAs.UserName == me || runAs.ServicePrincipalName == me { + return + } + + for i := range b.Config.Resources.Pipelines { + pipeline := b.Config.Resources.Pipelines[i] + pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool { + return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) || + (runAs.UserName != "" && p.UserName == runAs.UserName) + }) + pipeline.Permissions = append(pipeline.Permissions, resources.Permission{ + Level: "IS_OWNER", + ServicePrincipalName: runAs.ServicePrincipalName, + UserName: runAs.UserName, + }) + } +} + +func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { + // Mutator is a no-op if run_as is not specified in the bundle + runAs := b.Config.RunAs + if runAs == nil { + return nil + } + + if b.Config.Experimental != nil && b.Config.Experimental.UseLegacyRunAs { + setPipelineOwnersToRunAsIdentity(b) + setRunAsForJobs(b) + return diag.Diagnostics{ + { + Severity: diag.Warning, + Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.", + Path: dyn.MustPathFromString("experimental.use_legacy_run_as"), + Location: b.Config.GetLocation("experimental.use_legacy_run_as"), + }, + } + } + + // Assert the run_as configuration is valid in the context of the bundle + if err := validateRunAs(b); err != nil { + return diag.FromErr(err) + } + + setRunAsForJobs(b) return nil } diff --git a/bundle/tests/run_as/legacy/databricks.yml b/bundle/tests/run_as/legacy/databricks.yml new file mode 100644 index 00000000..e47224db --- /dev/null +++ b/bundle/tests/run_as/legacy/databricks.yml @@ -0,0 +1,68 @@ +bundle: + name: "run_as" + +run_as: + service_principal_name: "my_service_principal" + +experimental: + use_legacy_run_as: true + +resources: + jobs: + job_one: + name: Job One + + tasks: + - task_key: "task_one" + notebook_task: + notebook_path: "./test.py" + + job_two: + name: Job Two + + tasks: + - task_key: "task_two" + notebook_task: + notebook_path: "./test.py" + + job_three: + name: Job Three + + run_as: + service_principal_name: "my_service_principal_for_job" + + tasks: + - task_key: "task_three" + notebook_task: + notebook_path: "./test.py" + + pipelines: + nyc_taxi_pipeline: + name: "nyc taxi loader" + + permissions: + - level: CAN_VIEW + service_principal_name: my_service_principal + - level: CAN_VIEW + user_name: my_user_name + + libraries: + - notebook: + path: ./dlt/nyc_taxi_loader + + + models: + model_one: + name: "skynet" + + registered_models: + model_two: + name: "skynet (in UC)" + + experiments: + experiment_one: + name: "experiment_one" + + model_serving_endpoints: + model_serving_one: + name: "skynet" diff --git a/bundle/tests/run_as_test.go b/bundle/tests/run_as_test.go index 40359c17..5ad7a89a 100644 --- a/bundle/tests/run_as_test.go +++ b/bundle/tests/run_as_test.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/ml" + "github.com/databricks/databricks-sdk-go/service/serving" "github.com/stretchr/testify/assert" ) @@ -233,3 +234,53 @@ func TestRunAsErrorNeitherUserOrSpSpecifiedAtTargetOverride(t *testing.T) { configPath := filepath.FromSlash("run_as/not_allowed/neither_sp_nor_user_override/override.yml") assert.EqualError(t, err, fmt.Sprintf("run_as section must specify exactly one identity. Neither service_principal_name nor user_name is specified at %s:4:12", configPath)) } + +func TestLegacyRunAs(t *testing.T) { + b := load(t, "./run_as/legacy") + + ctx := context.Background() + bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + b.Config.Workspace.CurrentUser = &config.User{ + User: &iam.User{ + UserName: "jane@doe.com", + }, + } + return nil + }) + + diags := bundle.Apply(ctx, b, mutator.SetRunAs()) + assert.NoError(t, diags.Error()) + + assert.Len(t, b.Config.Resources.Jobs, 3) + jobs := b.Config.Resources.Jobs + + // job_one and job_two should have the same run_as identity as the bundle. + assert.NotNil(t, jobs["job_one"].RunAs) + assert.Equal(t, "my_service_principal", jobs["job_one"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_one"].RunAs.UserName) + + assert.NotNil(t, jobs["job_two"].RunAs) + assert.Equal(t, "my_service_principal", jobs["job_two"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_two"].RunAs.UserName) + + // job_three should retain it's run_as identity. + assert.NotNil(t, jobs["job_three"].RunAs) + assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_three"].RunAs.UserName) + + // Assert owner permissions for pipelines are set. + pipelines := b.Config.Resources.Pipelines + assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2) + + assert.Equal(t, "CAN_VIEW", pipelines["nyc_taxi_pipeline"].Permissions[0].Level) + assert.Equal(t, "my_user_name", pipelines["nyc_taxi_pipeline"].Permissions[0].UserName) + + assert.Equal(t, "IS_OWNER", pipelines["nyc_taxi_pipeline"].Permissions[1].Level) + assert.Equal(t, "my_service_principal", pipelines["nyc_taxi_pipeline"].Permissions[1].ServicePrincipalName) + + // Assert other resources are not affected. + assert.Equal(t, ml.Model{Name: "skynet"}, *b.Config.Resources.Models["model_one"].Model) + assert.Equal(t, catalog.CreateRegisteredModelRequest{Name: "skynet (in UC)"}, *b.Config.Resources.RegisteredModels["model_two"].CreateRegisteredModelRequest) + assert.Equal(t, ml.Experiment{Name: "experiment_one"}, *b.Config.Resources.Experiments["experiment_one"].Experiment) + assert.Equal(t, serving.CreateServingEndpoint{Name: "skynet"}, *b.Config.Resources.ModelServingEndpoints["model_serving_one"].CreateServingEndpoint) +}