mirror of https://github.com/databricks/cli.git
Add legacy option for `run_as` (#1384)
## Changes This PR partially reverts the changes in https://github.com/databricks/cli/pull/1233 and puts the old code under an "experimental.use_legacy_run_as" configuration. This gives customers who ran into the breaking change made in the PR a way out. ## Tests Both manually and via unit tests. Manually verified that run_as works for pipelines now. And if a user wants to use the feature they need to be both a Metastore and a workspace admin. --------- Error when the deploying user is a workspace admin but not a metastore admin: ``` Error: terraform apply: exit status 1 Error: cannot update permissions: User is not a metastore admin for Metastore 'deco-uc-prod-aws-us-east-1'. with databricks_permissions.pipeline_foo, on bundle.tf.json line 23, in resource.databricks_permissions.pipeline_foo: 23: } ``` -------- Output of bundle validate: ``` ➜ bundle-playground git:(master) ✗ cli bundle validate Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. at experimental.use_legacy_run_as in databricks.yml:13:22 Name: bundle-playground Target: default Workspace: Host: https://dbc-a39a1eb1-ef95.cloud.databricks.com User: shreyas.goenka@databricks.com Path: /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default Found 1 warning ```
This commit is contained in:
parent
3108883a8f
commit
1d9bf4b2c4
|
@ -10,6 +10,19 @@ type Experimental struct {
|
|||
// In this case the configured wheel task will be deployed as a notebook task which install defined wheel in runtime and executes it.
|
||||
// For more details see https://github.com/databricks/cli/pull/797 and https://github.com/databricks/cli/pull/635
|
||||
PythonWheelWrapper bool `json:"python_wheel_wrapper,omitempty"`
|
||||
|
||||
// Enable legacy run_as behavior. That is:
|
||||
// - Set the run_as identity as the owner of any pipelines in the bundle.
|
||||
// - Do not error in the presence of resources that do not support run_as.
|
||||
// As of April 2024 this includes pipelines and model serving endpoints.
|
||||
//
|
||||
// This mode of run_as requires the deploying user to be a workspace and metastore
|
||||
// admin. Use of this flag is not recommend for new bundles, and it is only provided
|
||||
// to unblock customers that are stuck due to breaking changes in the run_as behavior
|
||||
// made in https://github.com/databricks/cli/pull/1233. This flag might
|
||||
// be removed in the future once we have a proper workaround like allowing IS_OWNER
|
||||
// as a top-level permission in the DAB.
|
||||
UseLegacyRunAs bool `json:"use_legacy_run_as,omitempty"`
|
||||
}
|
||||
|
||||
type Command string
|
||||
|
|
|
@ -3,8 +3,10 @@ package mutator
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"slices"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/config/resources"
|
||||
"github.com/databricks/cli/libs/diag"
|
||||
"github.com/databricks/cli/libs/dyn"
|
||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||
|
@ -101,19 +103,12 @@ func validateRunAs(b *bundle.Bundle) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
|
||||
// Mutator is a no-op if run_as is not specified in the bundle
|
||||
func setRunAsForJobs(b *bundle.Bundle) {
|
||||
runAs := b.Config.RunAs
|
||||
if runAs == nil {
|
||||
return nil
|
||||
return
|
||||
}
|
||||
|
||||
// Assert the run_as configuration is valid in the context of the bundle
|
||||
if err := validateRunAs(b); err != nil {
|
||||
return diag.FromErr(err)
|
||||
}
|
||||
|
||||
// Set run_as for jobs
|
||||
for i := range b.Config.Resources.Jobs {
|
||||
job := b.Config.Resources.Jobs[i]
|
||||
if job.RunAs != nil {
|
||||
|
@ -124,6 +119,63 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
|
|||
UserName: runAs.UserName,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Legacy behavior of run_as for DLT pipelines. Available under the experimental.use_run_as_legacy flag.
|
||||
// Only available to unblock customers stuck due to breaking changes in https://github.com/databricks/cli/pull/1233
|
||||
func setPipelineOwnersToRunAsIdentity(b *bundle.Bundle) {
|
||||
runAs := b.Config.RunAs
|
||||
if runAs == nil {
|
||||
return
|
||||
}
|
||||
|
||||
me := b.Config.Workspace.CurrentUser.UserName
|
||||
// If user deploying the bundle and the one defined in run_as are the same
|
||||
// Do not add IS_OWNER permission. Current user is implied to be an owner in this case.
|
||||
// Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407
|
||||
if runAs.UserName == me || runAs.ServicePrincipalName == me {
|
||||
return
|
||||
}
|
||||
|
||||
for i := range b.Config.Resources.Pipelines {
|
||||
pipeline := b.Config.Resources.Pipelines[i]
|
||||
pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool {
|
||||
return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) ||
|
||||
(runAs.UserName != "" && p.UserName == runAs.UserName)
|
||||
})
|
||||
pipeline.Permissions = append(pipeline.Permissions, resources.Permission{
|
||||
Level: "IS_OWNER",
|
||||
ServicePrincipalName: runAs.ServicePrincipalName,
|
||||
UserName: runAs.UserName,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
|
||||
// Mutator is a no-op if run_as is not specified in the bundle
|
||||
runAs := b.Config.RunAs
|
||||
if runAs == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if b.Config.Experimental != nil && b.Config.Experimental.UseLegacyRunAs {
|
||||
setPipelineOwnersToRunAsIdentity(b)
|
||||
setRunAsForJobs(b)
|
||||
return diag.Diagnostics{
|
||||
{
|
||||
Severity: diag.Warning,
|
||||
Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.",
|
||||
Path: dyn.MustPathFromString("experimental.use_legacy_run_as"),
|
||||
Location: b.Config.GetLocation("experimental.use_legacy_run_as"),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Assert the run_as configuration is valid in the context of the bundle
|
||||
if err := validateRunAs(b); err != nil {
|
||||
return diag.FromErr(err)
|
||||
}
|
||||
|
||||
setRunAsForJobs(b)
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
bundle:
|
||||
name: "run_as"
|
||||
|
||||
run_as:
|
||||
service_principal_name: "my_service_principal"
|
||||
|
||||
experimental:
|
||||
use_legacy_run_as: true
|
||||
|
||||
resources:
|
||||
jobs:
|
||||
job_one:
|
||||
name: Job One
|
||||
|
||||
tasks:
|
||||
- task_key: "task_one"
|
||||
notebook_task:
|
||||
notebook_path: "./test.py"
|
||||
|
||||
job_two:
|
||||
name: Job Two
|
||||
|
||||
tasks:
|
||||
- task_key: "task_two"
|
||||
notebook_task:
|
||||
notebook_path: "./test.py"
|
||||
|
||||
job_three:
|
||||
name: Job Three
|
||||
|
||||
run_as:
|
||||
service_principal_name: "my_service_principal_for_job"
|
||||
|
||||
tasks:
|
||||
- task_key: "task_three"
|
||||
notebook_task:
|
||||
notebook_path: "./test.py"
|
||||
|
||||
pipelines:
|
||||
nyc_taxi_pipeline:
|
||||
name: "nyc taxi loader"
|
||||
|
||||
permissions:
|
||||
- level: CAN_VIEW
|
||||
service_principal_name: my_service_principal
|
||||
- level: CAN_VIEW
|
||||
user_name: my_user_name
|
||||
|
||||
libraries:
|
||||
- notebook:
|
||||
path: ./dlt/nyc_taxi_loader
|
||||
|
||||
|
||||
models:
|
||||
model_one:
|
||||
name: "skynet"
|
||||
|
||||
registered_models:
|
||||
model_two:
|
||||
name: "skynet (in UC)"
|
||||
|
||||
experiments:
|
||||
experiment_one:
|
||||
name: "experiment_one"
|
||||
|
||||
model_serving_endpoints:
|
||||
model_serving_one:
|
||||
name: "skynet"
|
|
@ -13,6 +13,7 @@ import (
|
|||
"github.com/databricks/databricks-sdk-go/service/catalog"
|
||||
"github.com/databricks/databricks-sdk-go/service/iam"
|
||||
"github.com/databricks/databricks-sdk-go/service/ml"
|
||||
"github.com/databricks/databricks-sdk-go/service/serving"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
|
@ -233,3 +234,53 @@ func TestRunAsErrorNeitherUserOrSpSpecifiedAtTargetOverride(t *testing.T) {
|
|||
configPath := filepath.FromSlash("run_as/not_allowed/neither_sp_nor_user_override/override.yml")
|
||||
assert.EqualError(t, err, fmt.Sprintf("run_as section must specify exactly one identity. Neither service_principal_name nor user_name is specified at %s:4:12", configPath))
|
||||
}
|
||||
|
||||
func TestLegacyRunAs(t *testing.T) {
|
||||
b := load(t, "./run_as/legacy")
|
||||
|
||||
ctx := context.Background()
|
||||
bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
|
||||
b.Config.Workspace.CurrentUser = &config.User{
|
||||
User: &iam.User{
|
||||
UserName: "jane@doe.com",
|
||||
},
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
diags := bundle.Apply(ctx, b, mutator.SetRunAs())
|
||||
assert.NoError(t, diags.Error())
|
||||
|
||||
assert.Len(t, b.Config.Resources.Jobs, 3)
|
||||
jobs := b.Config.Resources.Jobs
|
||||
|
||||
// job_one and job_two should have the same run_as identity as the bundle.
|
||||
assert.NotNil(t, jobs["job_one"].RunAs)
|
||||
assert.Equal(t, "my_service_principal", jobs["job_one"].RunAs.ServicePrincipalName)
|
||||
assert.Equal(t, "", jobs["job_one"].RunAs.UserName)
|
||||
|
||||
assert.NotNil(t, jobs["job_two"].RunAs)
|
||||
assert.Equal(t, "my_service_principal", jobs["job_two"].RunAs.ServicePrincipalName)
|
||||
assert.Equal(t, "", jobs["job_two"].RunAs.UserName)
|
||||
|
||||
// job_three should retain it's run_as identity.
|
||||
assert.NotNil(t, jobs["job_three"].RunAs)
|
||||
assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName)
|
||||
assert.Equal(t, "", jobs["job_three"].RunAs.UserName)
|
||||
|
||||
// Assert owner permissions for pipelines are set.
|
||||
pipelines := b.Config.Resources.Pipelines
|
||||
assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2)
|
||||
|
||||
assert.Equal(t, "CAN_VIEW", pipelines["nyc_taxi_pipeline"].Permissions[0].Level)
|
||||
assert.Equal(t, "my_user_name", pipelines["nyc_taxi_pipeline"].Permissions[0].UserName)
|
||||
|
||||
assert.Equal(t, "IS_OWNER", pipelines["nyc_taxi_pipeline"].Permissions[1].Level)
|
||||
assert.Equal(t, "my_service_principal", pipelines["nyc_taxi_pipeline"].Permissions[1].ServicePrincipalName)
|
||||
|
||||
// Assert other resources are not affected.
|
||||
assert.Equal(t, ml.Model{Name: "skynet"}, *b.Config.Resources.Models["model_one"].Model)
|
||||
assert.Equal(t, catalog.CreateRegisteredModelRequest{Name: "skynet (in UC)"}, *b.Config.Resources.RegisteredModels["model_two"].CreateRegisteredModelRequest)
|
||||
assert.Equal(t, ml.Experiment{Name: "experiment_one"}, *b.Config.Resources.Experiments["experiment_one"].Experiment)
|
||||
assert.Equal(t, serving.CreateServingEndpoint{Name: "skynet"}, *b.Config.Resources.ModelServingEndpoints["model_serving_one"].CreateServingEndpoint)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue