Added run_as section for bundle configuration (#692)

## Changes
Added run_as section for bundle configuration.

This section allows to define an user name or service principal which
will be applied as an execution identity for jobs and DLT pipelines. In
the case of DLT, identity defined in `run_as` will be assigned
`IS_OWNER` permission on this pipeline.

## Tests
Added unit tests for configuration.

Also ran deploy for the following bundle configuration

```
bundle:
  name: "run_as"

run_as:
  # service_principal_name: "f7263fcc-56d0-4981-8baf-c2a45296690b"
  user_name: "lennart.kats@databricks.com"

resources:
  pipelines:
    andrew_pipeline:
      name: "Andrew Nester pipeline"
      libraries:
        - notebook:
            path: ./test.py

  jobs:
    job_one:
      name: Job One
      tasks:
        - task_key: "task"
          new_cluster:
            num_workers: 1
            spark_version: 13.2.x-snapshot-scala2.12
            node_type_id: i3.xlarge
            runtime_engine: PHOTON
          notebook_task: 
            notebook_path: "./test.py"
```
This commit is contained in:
Andrew Nester 2023-08-23 18:47:07 +02:00 committed by GitHub
parent 5ed635a240
commit 4ee926b885
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 202 additions and 0 deletions

View File

@ -0,0 +1,65 @@
package mutator
import (
"context"
"slices"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/databricks-sdk-go/service/jobs"
)
type setRunAs struct {
}
// SetRunAs mutator is used to go over defined resources such as Jobs and DLT Pipelines
// And set correct execution identity ("run_as" for a job or "is_owner" permission for DLT)
// if top-level "run-as" section is defined in the configuration.
func SetRunAs() bundle.Mutator {
return &setRunAs{}
}
func (m *setRunAs) Name() string {
return "SetRunAs"
}
func (m *setRunAs) Apply(_ context.Context, b *bundle.Bundle) error {
runAs := b.Config.RunAs
if runAs == nil {
return nil
}
for i := range b.Config.Resources.Jobs {
job := b.Config.Resources.Jobs[i]
if job.RunAs != nil {
continue
}
job.RunAs = &jobs.JobRunAs{
ServicePrincipalName: runAs.ServicePrincipalName,
UserName: runAs.UserName,
}
}
me := b.Config.Workspace.CurrentUser.UserName
// If user deploying the bundle and the one defined in run_as are the same
// Do not add IS_OWNER permission. Current user is implied to be an owner in this case.
// Otherwise, it will fail due to this bug https://github.com/databricks/terraform-provider-databricks/issues/2407
if runAs.UserName == me || runAs.ServicePrincipalName == me {
return nil
}
for i := range b.Config.Resources.Pipelines {
pipeline := b.Config.Resources.Pipelines[i]
pipeline.Permissions = slices.DeleteFunc(pipeline.Permissions, func(p resources.Permission) bool {
return (runAs.ServicePrincipalName != "" && p.ServicePrincipalName == runAs.ServicePrincipalName) ||
(runAs.UserName != "" && p.UserName == runAs.UserName)
})
pipeline.Permissions = append(pipeline.Permissions, resources.Permission{
Level: "IS_OWNER",
ServicePrincipalName: runAs.ServicePrincipalName,
UserName: runAs.UserName,
})
}
return nil
}

View File

@ -7,6 +7,7 @@ import (
"strings"
"github.com/databricks/cli/bundle/config/variable"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/ghodss/yaml"
"github.com/imdario/mergo"
)
@ -80,6 +81,9 @@ type Root struct {
// Sync section specifies options for files synchronization
Sync Sync `json:"sync"`
// RunAs section allows to define an execution identity for jobs and pipelines runs
RunAs *jobs.JobRunAs `json:"run_as,omitempty"`
}
func Load(path string) (*Root, error) {
@ -237,6 +241,10 @@ func (r *Root) MergeTargetOverrides(target *Target) error {
}
}
if target.RunAs != nil {
r.RunAs = target.RunAs
}
if target.Mode != "" {
r.Bundle.Mode = target.Mode
}

View File

@ -1,5 +1,7 @@
package config
import "github.com/databricks/databricks-sdk-go/service/jobs"
type Mode string
// Target defines overrides for a single target.
@ -31,6 +33,8 @@ type Target struct {
Variables map[string]string `json:"variables,omitempty"`
Git Git `json:"git,omitempty"`
RunAs *jobs.JobRunAs `json:"run_as,omitempty"`
}
const (

View File

@ -16,6 +16,7 @@ func Initialize() bundle.Mutator {
"initialize",
[]bundle.Mutator{
mutator.PopulateCurrentUser(),
mutator.SetRunAs(),
mutator.DefineDefaultWorkspaceRoot(),
mutator.ExpandWorkspaceRoot(),
mutator.DefineDefaultWorkspacePaths(),

View File

@ -0,0 +1,42 @@
bundle:
name: "run_as"
run_as:
service_principal_name: "my_service_principal"
targets:
development:
mode: development
run_as:
user_name: "my_user_name"
resources:
pipelines:
nyc_taxi_pipeline:
permissions:
- level: CAN_VIEW
service_principal_name: my_service_principal
- level: CAN_VIEW
user_name: my_user_name
name: "nyc taxi loader"
libraries:
- notebook:
path: ./dlt/nyc_taxi_loader
jobs:
job_one:
name: Job One
tasks:
- task:
notebook_path: "./test.py"
job_two:
name: Job Two
tasks:
- task:
notebook_path: "./test.py"
job_three:
name: Job Three
run_as:
service_principal_name: "my_service_principal_for_job"
tasks:
- task:
notebook_path: "./test.py"

View File

@ -0,0 +1,82 @@
package config_tests
import (
"context"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/stretchr/testify/assert"
)
func TestRunAsDefault(t *testing.T) {
b := load(t, "./run_as")
b.Config.Workspace.CurrentUser = &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
}
ctx := context.Background()
err := bundle.Apply(ctx, b, mutator.SetRunAs())
assert.NoError(t, err)
assert.Len(t, b.Config.Resources.Jobs, 3)
jobs := b.Config.Resources.Jobs
assert.NotNil(t, jobs["job_one"].RunAs)
assert.Equal(t, "my_service_principal", jobs["job_one"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_one"].RunAs.UserName)
assert.NotNil(t, jobs["job_two"].RunAs)
assert.Equal(t, "my_service_principal", jobs["job_two"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_two"].RunAs.UserName)
assert.NotNil(t, jobs["job_three"].RunAs)
assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_three"].RunAs.UserName)
pipelines := b.Config.Resources.Pipelines
assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2)
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "CAN_VIEW")
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].UserName, "my_user_name")
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].Level, "IS_OWNER")
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].ServicePrincipalName, "my_service_principal")
}
func TestRunAsDevelopment(t *testing.T) {
b := loadTarget(t, "./run_as", "development")
b.Config.Workspace.CurrentUser = &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
}
ctx := context.Background()
err := bundle.Apply(ctx, b, mutator.SetRunAs())
assert.NoError(t, err)
assert.Len(t, b.Config.Resources.Jobs, 3)
jobs := b.Config.Resources.Jobs
assert.NotNil(t, jobs["job_one"].RunAs)
assert.Equal(t, "", jobs["job_one"].RunAs.ServicePrincipalName)
assert.Equal(t, "my_user_name", jobs["job_one"].RunAs.UserName)
assert.NotNil(t, jobs["job_two"].RunAs)
assert.Equal(t, "", jobs["job_two"].RunAs.ServicePrincipalName)
assert.Equal(t, "my_user_name", jobs["job_two"].RunAs.UserName)
assert.NotNil(t, jobs["job_three"].RunAs)
assert.Equal(t, "my_service_principal_for_job", jobs["job_three"].RunAs.ServicePrincipalName)
assert.Equal(t, "", jobs["job_three"].RunAs.UserName)
pipelines := b.Config.Resources.Pipelines
assert.Len(t, pipelines["nyc_taxi_pipeline"].Permissions, 2)
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].Level, "CAN_VIEW")
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[0].ServicePrincipalName, "my_service_principal")
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].Level, "IS_OWNER")
assert.Equal(t, pipelines["nyc_taxi_pipeline"].Permissions[1].UserName, "my_user_name")
}