mirror of https://github.com/databricks/cli.git
Add legacy option for `run_as` (#1384)
## Changes This PR partially reverts the changes in https://github.com/databricks/cli/pull/1233 and puts the old code under an "experimental.use_legacy_run_as" configuration. This gives customers who ran into the breaking change made in the PR a way out. ## Tests Both manually and via unit tests. Manually verified that run_as works for pipelines now. And if a user wants to use the feature they need to be both a Metastore and a workspace admin. --------- Error when the deploying user is a workspace admin but not a metastore admin: ``` Error: terraform apply: exit status 1 Error: cannot update permissions: User is not a metastore admin for Metastore 'deco-uc-prod-aws-us-east-1'. with databricks_permissions.pipeline_foo, on bundle.tf.json line 23, in resource.databricks_permissions.pipeline_foo: 23: } ``` -------- Output of bundle validate: ``` ➜ bundle-playground git:(master) ✗ cli bundle validate Warning: You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC. at experimental.use_legacy_run_as in databricks.yml:13:22 Name: bundle-playground Target: default Workspace: Host: https://dbc-a39a1eb1-ef95.cloud.databricks.com User: shreyas.goenka@databricks.com Path: /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default Found 1 warning ```
This commit is contained in:
parent
3108883a8f
commit
1d9bf4b2c4
|
@ -10,6 +10,19 @@ type Experimental struct {
|
||||||
// In this case the configured wheel task will be deployed as a notebook task which install defined wheel in runtime and executes it.
|
// In this case the configured wheel task will be deployed as a notebook task which install defined wheel in runtime and executes it.
|
||||||
// For more details see https://github.com/databricks/cli/pull/797 and https://github.com/databricks/cli/pull/635
|
// For more details see https://github.com/databricks/cli/pull/797 and https://github.com/databricks/cli/pull/635
|
||||||
PythonWheelWrapper bool `json:"python_wheel_wrapper,omitempty"`
|
PythonWheelWrapper bool `json:"python_wheel_wrapper,omitempty"`
|
||||||
|
|
||||||
|
// Enable legacy run_as behavior. That is:
|
||||||
|
// - Set the run_as identity as the owner of any pipelines in the bundle.
|
||||||
|
// - Do not error in the presence of resources that do not support run_as.
|
||||||
|
// As of April 2024 this includes pipelines and model serving endpoints.
|
||||||
|
//
|
||||||
|
// This mode of run_as requires the deploying user to be a workspace and metastore
|
||||||
|
// admin. Use of this flag is not recommend for new bundles, and it is only provided
|
||||||
|
// to unblock customers that are stuck due to breaking changes in the run_as behavior
|
||||||
|
// made in https://github.com/databricks/cli/pull/1233. This flag might
|
||||||
|
// be removed in the future once we have a proper workaround like allowing IS_OWNER
|
||||||
|
// as a top-level permission in the DAB.
|
||||||
|
UseLegacyRunAs bool `json:"use_legacy_run_as,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Command string
|
type Command string
|
||||||
|
|
|
@ -3,8 +3,10 @@ package mutator
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"slices"
|
||||||
|
|
||||||
"github.com/databricks/cli/bundle"
|
"github.com/databricks/cli/bundle"
|
||||||
|
"github.com/databricks/cli/bundle/config/resources"
|
||||||
"github.com/databricks/cli/libs/diag"
|
"github.com/databricks/cli/libs/diag"
|
||||||
"github.com/databricks/cli/libs/dyn"
|
"github.com/databricks/cli/libs/dyn"
|
||||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||||
|
@ -101,19 +103,12 @@ func validateRunAs(b *bundle.Bundle) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
|
func setRunAsForJobs(b *bundle.Bundle) {
|
||||||
// Mutator is a no-op if run_as is not specified in the bundle
|
|
||||||
runAs := b.Config.RunAs
|
runAs := b.Config.RunAs
|
||||||
if runAs == nil {
|
if runAs == nil {
|
||||||
return nil
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assert the run_as configuration is valid in the context of the bundle
|
|
||||||
if err := validateRunAs(b); err != nil {
|
|
||||||
return diag.FromErr(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set run_as for jobs
|
|
||||||
for i := range b.Config.Resources.Jobs {
|
for i := range b.Config.Resources.Jobs {
|
||||||
job := b.Config.Resources.Jobs[i]
|
job := b.Config.Resources.Jobs[i]
|
||||||
if job.RunAs != nil {
|
if job.RunAs != nil {
|
||||||
|
@ -124,6 +119,63 @@ func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
|
||||||
UserName: runAs.UserName,
|
UserName: runAs.UserName,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Legacy behavior of run_as for DLT pipelines. Available under the experimental.use_run_as_legacy flag.
|
||||||
|
// Only available to unblock customers stuck due to breaking changes in https://github.com/databricks/cli/pull/1233
|
||||||
|
func setPipelineOwnersToRunAsIdentity(b *bundle.Bundle) {
|
||||||
|
runAs := b.Config.RunAs
|
||||||
|
if runAs == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
me := b.Config.Workspace.CurrentUser.UserName
|
||||||
|
// If user deploying the bundle and the one defined in run_as are the same
|
||||||
|
// Do not add IS_OWNER permission. Current user is implied to be an owner in this case.
|
||||||
|
// Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407
|
||||||
|
if runAs.UserName == me || runAs.ServicePrincipalName == me {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range b.Config.Resources.Pipelines {
|
||||||
|
pipeline := b.Config.Resources.Pipelines[i]
|
||||||
|
pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool {
|
||||||
|
return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) ||
|
||||||
|
(runAs.UserName != "" && p.UserName == runAs.UserName)
|
||||||
|
})
|
||||||
|
pipeline.Permissions = append(pipeline.Permissions, resources.Permission{
|
||||||
|
Level: "IS_OWNER",
|
||||||
|
ServicePrincipalName: runAs.ServicePrincipalName,
|
||||||
|
UserName: runAs.UserName,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics {
|
||||||
|
// Mutator is a no-op if run_as is not specified in the bundle
|
||||||
|
runAs := b.Config.RunAs
|
||||||
|
if runAs == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.Config.Experimental != nil && b.Config.Experimental.UseLegacyRunAs {
|
||||||
|
setPipelineOwnersToRunAsIdentity(b)
|
||||||
|
setRunAsForJobs(b)
|
||||||
|
return diag.Diagnostics{
|
||||||
|
{
|
||||||
|
Severity: diag.Warning,
|
||||||
|
Summary: "You are using the legacy mode of run_as. The support for this mode is experimental and might be removed in a future release of the CLI. In order to run the DLT pipelines in your DAB as the run_as user this mode changes the owners of the pipelines to the run_as identity, which requires the user deploying the bundle to be a workspace admin, and also a Metastore admin if the pipeline target is in UC.",
|
||||||
|
Path: dyn.MustPathFromString("experimental.use_legacy_run_as"),
|
||||||
|
Location: b.Config.GetLocation("experimental.use_legacy_run_as"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert the run_as configuration is valid in the context of the bundle
|
||||||
|
if err := validateRunAs(b); err != nil {
|
||||||
|
return diag.FromErr(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
setRunAsForJobs(b)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
bundle:
|
||||||
|
name: "run_as"
|
||||||
|
|
||||||
|
run_as:
|
||||||
|
service_principal_name: "my_service_principal"
|
||||||
|
|
||||||
|
experimental:
|
||||||
|
use_legacy_run_as: true
|
||||||
|
|
||||||
|
resources:
|
||||||
|
jobs:
|
||||||
|
job_one:
|
||||||
|
name: Job One
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- task_key: "task_one"
|
||||||
|
notebook_task:
|
||||||
|
notebook_path: "./test.py"
|
||||||
|
|
||||||
|
job_two:
|
||||||
|
name: Job Two
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- task_key: "task_two"
|
||||||
|
notebook_task:
|
||||||
|
notebook_path: "./test.py"
|
||||||
|
|
||||||
|
job_three:
|
||||||
|
name: Job Three
|
||||||
|
|
||||||
|
run_as:
|
||||||
|
service_principal_name: "my_service_principal_for_job"
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- task_key: "task_three"
|
||||||
|
notebook_task:
|
||||||
|
notebook_path: "./test.py"
|
||||||
|
|
||||||
|
pipelines:
|
||||||
|
nyc_taxi_pipeline:
|
||||||
|
name: "nyc taxi loader"
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
- level: CAN_VIEW
|
||||||
|
service_principal_name: my_service_principal
|
||||||
|
- level: CAN_VIEW
|
||||||
|
user_name: my_user_name
|
||||||
|
|
||||||
|
libraries:
|
||||||
|
- notebook:
|
||||||
|
path: ./dlt/nyc_taxi_loader
|
||||||
|
|
||||||
|
|
||||||
|
models:
|
||||||
|
model_one:
|
||||||
|
name: "skynet"
|
||||||
|
|
||||||
|
registered_models:
|
||||||
|
model_two:
|
||||||
|
name: "skynet (in UC)"
|
||||||
|
|
||||||
|
experiments:
|
||||||
|
experiment_one:
|
||||||
|
name: "experiment_one"
|
||||||
|
|
||||||
|
model_serving_endpoints:
|
||||||
|
model_serving_one:
|
||||||
|
name: "skynet"
|
|
@ -13,6 +13,7 @@ import (
|
||||||
"github.com/databricks/databricks-sdk-go/service/catalog"
|
"github.com/databricks/databricks-sdk-go/service/catalog"
|
||||||
"github.com/databricks/databricks-sdk-go/service/iam"
|
"github.com/databricks/databricks-sdk-go/service/iam"
|
||||||
"github.com/databricks/databricks-sdk-go/service/ml"
|
"github.com/databricks/databricks-sdk-go/service/ml"
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/serving"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -233,3 +234,53 @@ func TestRunAsErrorNeitherUserOrSpSpecifiedAtTargetOverride(t *testing.T) {
|
||||||
configPath := filepath.FromSlash("run_as/not_allowed/neither_sp_nor_user_override/override.yml")
|
configPath := filepath.FromSlash("run_as/not_allowed/neither_sp_nor_user_override/override.yml")
|
||||||
assert.EqualError(t, err, fmt.Sprintf("run_as section must specify exactly one identity. Neither service_principal_name nor user_name is specified at %s:4:12", configPath))
|
assert.EqualError(t, err, fmt.Sprintf("run_as section must specify exactly one identity. Neither service_principal_name nor user_name is specified at %s:4:12", configPath))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLegacyRunAs(t *testing.T) {
|
||||||
|
b := load(t, "./run_as/legacy")
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
|
||||||
|
b.Config.Workspace.CurrentUser = &config.User{
|
||||||
|
User: &iam.User{
|
||||||
|
UserName: "jane@doe.com",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
diags := bundle.Apply(ctx, b, mutator.SetRunAs())
|
||||||
|
assert.NoError(t, diags.Error())
|
||||||
|
|
||||||
|
assert.Len(t, b.Config.Resources.Jobs, 3)
|
||||||
|
jobs := b.Config.Resources.Jobs
|
||||||
|
|
||||||
|
// job_one and job_two should have the same run_as identity as the bundle.
|
||||||
|
assert.NotNil(t, jobs["job_one"].RunAs)
|
||||||
|
assert.Equal(t, "my_service_principal", jobs["job_one"].RunAs.ServicePrincipalName)
|
||||||
|
assert.Equal(t, "", jobs["job_one"].RunAs.UserName)
|
||||||
|
|
||||||
|
assert.NotNil(t, jobs["job_two"].RunAs)
|
||||||
|
assert.Equal(t, "my_service_principal", jobs["job_two"].RunAs.ServicePrincipalName)
|
||||||
|
assert.Equal(t, "", jobs["job_two"].RunAs.UserName)
|
||||||
|
|
||||||
|
// job_three should retain it's run_as identity.
|
||||||
|
assert.NotNil(t, jobs["job_three"].RunAs)
|
||||||
|
assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName)
|
||||||
|
assert.Equal(t, "", jobs["job_three"].RunAs.UserName)
|
||||||
|
|
||||||
|
// Assert owner permissions for pipelines are set.
|
||||||
|
pipelines := b.Config.Resources.Pipelines
|
||||||
|
assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2)
|
||||||
|
|
||||||
|
assert.Equal(t, "CAN_VIEW", pipelines["nyc_taxi_pipeline"].Permissions[0].Level)
|
||||||
|
assert.Equal(t, "my_user_name", pipelines["nyc_taxi_pipeline"].Permissions[0].UserName)
|
||||||
|
|
||||||
|
assert.Equal(t, "IS_OWNER", pipelines["nyc_taxi_pipeline"].Permissions[1].Level)
|
||||||
|
assert.Equal(t, "my_service_principal", pipelines["nyc_taxi_pipeline"].Permissions[1].ServicePrincipalName)
|
||||||
|
|
||||||
|
// Assert other resources are not affected.
|
||||||
|
assert.Equal(t, ml.Model{Name: "skynet"}, *b.Config.Resources.Models["model_one"].Model)
|
||||||
|
assert.Equal(t, catalog.CreateRegisteredModelRequest{Name: "skynet (in UC)"}, *b.Config.Resources.RegisteredModels["model_two"].CreateRegisteredModelRequest)
|
||||||
|
assert.Equal(t, ml.Experiment{Name: "experiment_one"}, *b.Config.Resources.Experiments["experiment_one"].Experiment)
|
||||||
|
assert.Equal(t, serving.CreateServingEndpoint{Name: "skynet"}, *b.Config.Resources.ModelServingEndpoints["model_serving_one"].CreateServingEndpoint)
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue