Add legacy option for `run_as` (#1384)

## Changes
This PR partially reverts the changes in
https://github.com/databricks/cli/pull/1233 and puts the old code under
an "experimental.use_legacy_run_as" configuration. This gives customers
who ran into the breaking change made in the PR a way out.


## Tests
Both manually and via unit tests.

Manually verified that run_as works for pipelines now. And if a user
wants to use the feature they need to be both a Metastore and a
workspace admin.

---------

Error when the deploying user is a workspace admin but not a metastore
admin:
```
Error: terraform apply: exit status 1

Error: cannot update permissions: User is not a metastore admin for Metastore 'deco-uc-prod-aws-us-east-1'.

  with databricks_permissions.pipeline_foo,
  on bundle.tf.json line 23, in resource.databricks_permissions.pipeline_foo:
  23:       }
```

--------

Output of bundle validate:
```
➜  bundle-playground git:(master) ✗ cli bundle validate
Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.
  at experimental.use_legacy_run_as
  in databricks.yml:13:22

Name: bundle-playground
Target: default
Workspace:
  Host: https://dbc-a39a1eb1-ef95.cloud.databricks.com
  User: shreyas.goenka@databricks.com
  Path: /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default

Found 1 warning
```
This commit is contained in:
shreyas-goenka 2024-04-22 17:21:41 +05:30 committed by GitHub
parent 3108883a8f
commit 1d9bf4b2c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 193 additions and 9 deletions

View File

@ -10,6 +10,19 @@ type Experimental struct {
// In this case the configured wheel task will be deployed as a notebook task which install defined wheel in runtime and executes it. // In this case the configured wheel task will be deployed as a notebook task which install defined wheel in runtime and executes it.
// For more details see https://github.com/databricks/cli/pull/797 and https://github.com/databricks/cli/pull/635 // For more details see https://github.com/databricks/cli/pull/797 and https://github.com/databricks/cli/pull/635
PythonWheelWrapper bool `json:"python_wheel_wrapper,omitempty"` PythonWheelWrapper bool `json:"python_wheel_wrapper,omitempty"`
// Enable legacy run_as behavior. That is:
// - Set the run_as identity as the owner of any pipelines in the bundle.
// - Do not error in the presence of resources that do not support run_as.
// As of April 2024 this includes pipelines and model serving endpoints.
//
// This mode of run_as requires the deploying user to be a workspace and metastore
// admin. Use of this flag is not recommend for new bundles, and it is only provided
// to unblock customers that are stuck due to breaking changes in the run_as behavior
// made in https://github.com/databricks/cli/pull/1233. This flag might
// be removed in the future once we have a proper workaround like allowing IS_OWNER
// as a top-level permission in the DAB.
UseLegacyRunAs bool `json:"use_legacy_run_as,omitempty"`
} }
type Command string type Command string

View File

@ -3,8 +3,10 @@ package mutator
import ( import (
"context" "context"
"fmt" "fmt"
"slices"
"github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn"
"github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/jobs"
@ -101,19 +103,12 @@ func validateRunAs(b *bundle.Bundle) error {
return nil return nil
} }
func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { func setRunAsForJobs(b *bundle.Bundle) {
// Mutator is a no-op if run_as is not specified in the bundle
runAs := b.Config.RunAs runAs := b.Config.RunAs
if runAs == nil { if runAs == nil {
return nil return
} }
// Assert the run_as configuration is valid in the context of the bundle
if err := validateRunAs(b); err != nil {
return diag.FromErr(err)
}
// Set run_as for jobs
for i := range b.Config.Resources.Jobs { for i := range b.Config.Resources.Jobs {
job := b.Config.Resources.Jobs[i] job := b.Config.Resources.Jobs[i]
if job.RunAs != nil { if job.RunAs != nil {
@ -124,6 +119,63 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
UserName: runAs.UserName, UserName: runAs.UserName,
} }
} }
}
// Legacy behavior of run_as for DLT pipelines. Available under the experimental.use_run_as_legacy flag.
// Only available to unblock customers stuck due to breaking changes in https://github.com/databricks/cli/pull/1233
func setPipelineOwnersToRunAsIdentity(b *bundle.Bundle) {
runAs := b.Config.RunAs
if runAs == nil {
return
}
me := b.Config.Workspace.CurrentUser.UserName
// If user deploying the bundle and the one defined in run_as are the same
// Do not add IS_OWNER permission. Current user is implied to be an owner in this case.
// Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407
if runAs.UserName == me || runAs.ServicePrincipalName == me {
return
}
for i := range b.Config.Resources.Pipelines {
pipeline := b.Config.Resources.Pipelines[i]
pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool {
return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) ||
(runAs.UserName != "" && p.UserName == runAs.UserName)
})
pipeline.Permissions = append(pipeline.Permissions, resources.Permission{
Level: "IS_OWNER",
ServicePrincipalName: runAs.ServicePrincipalName,
UserName: runAs.UserName,
})
}
}
func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
// Mutator is a no-op if run_as is not specified in the bundle
runAs := b.Config.RunAs
if runAs == nil {
return nil
}
if b.Config.Experimental != nil && b.Config.Experimental.UseLegacyRunAs {
setPipelineOwnersToRunAsIdentity(b)
setRunAsForJobs(b)
return diag.Diagnostics{
{
Severity: diag.Warning,
Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.",
Path: dyn.MustPathFromString("experimental.use_legacy_run_as"),
Location: b.Config.GetLocation("experimental.use_legacy_run_as"),
},
}
}
// Assert the run_as configuration is valid in the context of the bundle
if err := validateRunAs(b); err != nil {
return diag.FromErr(err)
}
setRunAsForJobs(b)
return nil return nil
} }

View File

@ -0,0 +1,68 @@
bundle:
name: "run_as"
run_as:
service_principal_name: "my_service_principal"
experimental:
use_legacy_run_as: true
resources:
jobs:
job_one:
name: Job One
tasks:
- task_key: "task_one"
notebook_task:
notebook_path: "./test.py"
job_two:
name: Job Two
tasks:
- task_key: "task_two"
notebook_task:
notebook_path: "./test.py"
job_three:
name: Job Three
run_as:
service_principal_name: "my_service_principal_for_job"
tasks:
- task_key: "task_three"
notebook_task:
notebook_path: "./test.py"
pipelines:
nyc_taxi_pipeline:
name: "nyc taxi loader"
permissions:
- level: CAN_VIEW
service_principal_name: my_service_principal
- level: CAN_VIEW
user_name: my_user_name
libraries:
- notebook:
path: ./dlt/nyc_taxi_loader
models:
model_one:
name: "skynet"
registered_models:
model_two:
name: "skynet (in UC)"
experiments:
experiment_one:
name: "experiment_one"
model_serving_endpoints:
model_serving_one:
name: "skynet"

View File

@ -13,6 +13,7 @@ import (
"github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/ml" "github.com/databricks/databricks-sdk-go/service/ml"
"github.com/databricks/databricks-sdk-go/service/serving"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
@ -233,3 +234,53 @@ func TestRunAsErrorNeitherUserOrSpSpecifiedAtTargetOverride(t *testing.T) {
configPath := filepath.FromSlash("run_as/not_allowed/neither_sp_nor_user_override/override.yml") configPath := filepath.FromSlash("run_as/not_allowed/neither_sp_nor_user_override/override.yml")
assert.EqualError(t, err, fmt.Sprintf("run_as section must specify exactly one identity. Neither service_principal_name nor user_name is specified at %s:4:12", configPath)) assert.EqualError(t, err, fmt.Sprintf("run_as section must specify exactly one identity. Neither service_principal_name nor user_name is specified at %s:4:12", configPath))
} }
func TestLegacyRunAs(t *testing.T) {
b := load(t, "./run_as/legacy")
ctx := context.Background()
bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
b.Config.Workspace.CurrentUser = &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
}
return nil
})
diags := bundle.Apply(ctx, b, mutator.SetRunAs())
assert.NoError(t, diags.Error())
assert.Len(t, b.Config.Resources.Jobs, 3)
jobs := b.Config.Resources.Jobs
// job_one and job_two should have the same run_as identity as the bundle.
assert.NotNil(t, jobs["job_one"].RunAs)
assert.Equal(t, "my_service_principal", jobs["job_one"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_one"].RunAs.UserName)
assert.NotNil(t, jobs["job_two"].RunAs)
assert.Equal(t, "my_service_principal", jobs["job_two"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_two"].RunAs.UserName)
// job_three should retain it's run_as identity.
assert.NotNil(t, jobs["job_three"].RunAs)
assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_three"].RunAs.UserName)
// Assert owner permissions for pipelines are set.
pipelines := b.Config.Resources.Pipelines
assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2)
assert.Equal(t, "CAN_VIEW", pipelines["nyc_taxi_pipeline"].Permissions[0].Level)
assert.Equal(t, "my_user_name", pipelines["nyc_taxi_pipeline"].Permissions[0].UserName)
assert.Equal(t, "IS_OWNER", pipelines["nyc_taxi_pipeline"].Permissions[1].Level)
assert.Equal(t, "my_service_principal", pipelines["nyc_taxi_pipeline"].Permissions[1].ServicePrincipalName)
// Assert other resources are not affected.
assert.Equal(t, ml.Model{Name: "skynet"}, *b.Config.Resources.Models["model_one"].Model)
assert.Equal(t, catalog.CreateRegisteredModelRequest{Name: "skynet (in UC)"}, *b.Config.Resources.RegisteredModels["model_two"].CreateRegisteredModelRequest)
assert.Equal(t, ml.Experiment{Name: "experiment_one"}, *b.Config.Resources.Experiments["experiment_one"].Experiment)
assert.Equal(t, serving.CreateServingEndpoint{Name: "skynet"}, *b.Config.Resources.ModelServingEndpoints["model_serving_one"].CreateServingEndpoint)
}