From 4ee926b8858bf1583fcd8bbe9a5222b1594e72ec Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 23 Aug 2023 18:47:07 +0200 Subject: [PATCH] Added run_as section for bundle configuration (#692) ## Changes Added run_as section for bundle configuration. This section allows to define an user name or service principal which will be applied as an execution identity for jobs and DLT pipelines. In the case of DLT, identity defined in `run_as` will be assigned `IS_OWNER` permission on this pipeline. ## Tests Added unit tests for configuration. Also ran deploy for the following bundle configuration ``` bundle: name: "run_as" run_as: # service_principal_name: "f7263fcc-56d0-4981-8baf-c2a45296690b" user_name: "lennart.kats@databricks.com" resources: pipelines: andrew_pipeline: name: "Andrew Nester pipeline" libraries: - notebook: path: ./test.py jobs: job_one: name: Job One tasks: - task_key: "task" new_cluster: num_workers: 1 spark_version: 13.2.x-snapshot-scala2.12 node_type_id: i3.xlarge runtime_engine: PHOTON notebook_task: notebook_path: "./test.py" ``` --- bundle/config/mutator/run_as.go | 65 +++++++++++++++++++++++ bundle/config/root.go | 8 +++ bundle/config/target.go | 4 ++ bundle/phases/initialize.go | 1 + bundle/tests/run_as/databricks.yml | 42 +++++++++++++++ bundle/tests/run_as_test.go | 82 ++++++++++++++++++++++++++++++ 6 files changed, 202 insertions(+) create mode 100644 bundle/config/mutator/run_as.go create mode 100644 bundle/tests/run_as/databricks.yml create mode 100644 bundle/tests/run_as_test.go diff --git a/bundle/config/mutator/run_as.go b/bundle/config/mutator/run_as.go new file mode 100644 index 00000000..7d1a4917 --- /dev/null +++ b/bundle/config/mutator/run_as.go @@ -0,0 +1,65 @@ +package mutator + +import ( + "context" + "slices" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +type setRunAs struct { +} + +// SetRunAs mutator is used to go over defined resources such as Jobs and DLT Pipelines +// And set correct execution identity ("run_as" for a job or "is_owner" permission for DLT) +// if top-level "run-as" section is defined in the configuration. +func SetRunAs() bundle.Mutator { + return &setRunAs{} +} + +func (m *setRunAs) Name() string { + return "SetRunAs" +} + +func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) error { + runAs := b.Config.RunAs + if runAs == nil { + return nil + } + + for i := range b.Config.Resources.Jobs { + job := b.Config.Resources.Jobs[i] + if job.RunAs != nil { + continue + } + job.RunAs = &jobs.JobRunAs{ + ServicePrincipalName: runAs.ServicePrincipalName, + UserName: runAs.UserName, + } + } + + me := b.Config.Workspace.CurrentUser.UserName + // If user deploying the bundle and the one defined in run_as are the same + // Do not add IS_OWNER permission. Current user is implied to be an owner in this case. + // Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407 + if runAs.UserName == me || runAs.ServicePrincipalName == me { + return nil + } + + for i := range b.Config.Resources.Pipelines { + pipeline := b.Config.Resources.Pipelines[i] + pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool { + return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) || + (runAs.UserName != "" && p.UserName == runAs.UserName) + }) + pipeline.Permissions = append(pipeline.Permissions, resources.Permission{ + Level: "IS_OWNER", + ServicePrincipalName: runAs.ServicePrincipalName, + UserName: runAs.UserName, + }) + } + + return nil +} diff --git a/bundle/config/root.go b/bundle/config/root.go index e0d20425..1275dab4 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/databricks/cli/bundle/config/variable" + "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/ghodss/yaml" "github.com/imdario/mergo" ) @@ -80,6 +81,9 @@ type Root struct { // Sync section specifies options for files synchronization Sync Sync `json:"sync"` + + // RunAs section allows to define an execution identity for jobs and pipelines runs + RunAs *jobs.JobRunAs `json:"run_as,omitempty"` } func Load(path string) (*Root, error) { @@ -237,6 +241,10 @@ func (r *Root) MergeTargetOverrides(target *Target) error { } } + if target.RunAs != nil { + r.RunAs = target.RunAs + } + if target.Mode != "" { r.Bundle.Mode = target.Mode } diff --git a/bundle/config/target.go b/bundle/config/target.go index 10775049..6a45fdb8 100644 --- a/bundle/config/target.go +++ b/bundle/config/target.go @@ -1,5 +1,7 @@ package config +import "github.com/databricks/databricks-sdk-go/service/jobs" + type Mode string // Target defines overrides for a single target. @@ -31,6 +33,8 @@ type Target struct { Variables map[string]string `json:"variables,omitempty"` Git Git `json:"git,omitempty"` + + RunAs *jobs.JobRunAs `json:"run_as,omitempty"` } const ( diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 219ec26c..546a8478 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -16,6 +16,7 @@ func Initialize() bundle.Mutator { "initialize", []bundle.Mutator{ mutator.PopulateCurrentUser(), + mutator.SetRunAs(), mutator.DefineDefaultWorkspaceRoot(), mutator.ExpandWorkspaceRoot(), mutator.DefineDefaultWorkspacePaths(), diff --git a/bundle/tests/run_as/databricks.yml b/bundle/tests/run_as/databricks.yml new file mode 100644 index 00000000..18ea5573 --- /dev/null +++ b/bundle/tests/run_as/databricks.yml @@ -0,0 +1,42 @@ +bundle: + name: "run_as" + +run_as: + service_principal_name: "my_service_principal" + +targets: + development: + mode: development + run_as: + user_name: "my_user_name" + +resources: + pipelines: + nyc_taxi_pipeline: + permissions: + - level: CAN_VIEW + service_principal_name: my_service_principal + - level: CAN_VIEW + user_name: my_user_name + name: "nyc taxi loader" + libraries: + - notebook: + path: ./dlt/nyc_taxi_loader + jobs: + job_one: + name: Job One + tasks: + - task: + notebook_path: "./test.py" + job_two: + name: Job Two + tasks: + - task: + notebook_path: "./test.py" + job_three: + name: Job Three + run_as: + service_principal_name: "my_service_principal_for_job" + tasks: + - task: + notebook_path: "./test.py" diff --git a/bundle/tests/run_as_test.go b/bundle/tests/run_as_test.go new file mode 100644 index 00000000..44c06816 --- /dev/null +++ b/bundle/tests/run_as_test.go @@ -0,0 +1,82 @@ +package config_tests + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/stretchr/testify/assert" +) + +func TestRunAsDefault(t *testing.T) { + b := load(t, "./run_as") + b.Config.Workspace.CurrentUser = &config.User{ + User: &iam.User{ + UserName: "jane@doe.com", + }, + } + ctx := context.Background() + err := bundle.Apply(ctx, b, mutator.SetRunAs()) + assert.NoError(t, err) + + assert.Len(t, b.Config.Resources.Jobs, 3) + jobs := b.Config.Resources.Jobs + + assert.NotNil(t, jobs["job_one"].RunAs) + assert.Equal(t, "my_service_principal", jobs["job_one"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_one"].RunAs.UserName) + + assert.NotNil(t, jobs["job_two"].RunAs) + assert.Equal(t, "my_service_principal", jobs["job_two"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_two"].RunAs.UserName) + + assert.NotNil(t, jobs["job_three"].RunAs) + assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_three"].RunAs.UserName) + + pipelines := b.Config.Resources.Pipelines + assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2) + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "CAN_VIEW") + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].UserName, "my_user_name") + + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].Level, "IS_OWNER") + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].ServicePrincipalName, "my_service_principal") +} + +func TestRunAsDevelopment(t *testing.T) { + b := loadTarget(t, "./run_as", "development") + b.Config.Workspace.CurrentUser = &config.User{ + User: &iam.User{ + UserName: "jane@doe.com", + }, + } + ctx := context.Background() + err := bundle.Apply(ctx, b, mutator.SetRunAs()) + assert.NoError(t, err) + + assert.Len(t, b.Config.Resources.Jobs, 3) + jobs := b.Config.Resources.Jobs + + assert.NotNil(t, jobs["job_one"].RunAs) + assert.Equal(t, "", jobs["job_one"].RunAs.ServicePrincipalName) + assert.Equal(t, "my_user_name", jobs["job_one"].RunAs.UserName) + + assert.NotNil(t, jobs["job_two"].RunAs) + assert.Equal(t, "", jobs["job_two"].RunAs.ServicePrincipalName) + assert.Equal(t, "my_user_name", jobs["job_two"].RunAs.UserName) + + assert.NotNil(t, jobs["job_three"].RunAs) + assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName) + assert.Equal(t, "", jobs["job_three"].RunAs.UserName) + + pipelines := b.Config.Resources.Pipelines + assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2) + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "CAN_VIEW") + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].ServicePrincipalName, "my_service_principal") + + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].Level, "IS_OWNER") + assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].UserName, "my_user_name") +}