From 368796ba122b80a851091c58c66eb67bb9741ff0 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Mon, 10 Jul 2023 09:12:50 +0200 Subject: [PATCH] WIP --- bundle/config/environment.go | 10 +- bundle/config/git.go | 3 + .../mutator/expand_workspace_root_test.go | 18 ++- bundle/config/mutator/load_git_details.go | 1 + .../config/mutator/populate_current_user.go | 7 +- .../mutator/process_environment_mode.go | 123 ++++++++++++-- .../mutator/process_environment_mode_test.go | 151 ++++++++++++++---- bundle/config/workspace.go | 9 +- bundle/deploy/files/sync.go | 7 +- 9 files changed, 277 insertions(+), 52 deletions(-) diff --git a/bundle/config/environment.go b/bundle/config/environment.go index 06a8d8909..3e66977e5 100644 --- a/bundle/config/environment.go +++ b/bundle/config/environment.go @@ -32,7 +32,13 @@ type Environment struct { } const ( - // Right now, we just have a default / "" mode and a "development" mode. - // Additional modes are expected to come for pull-requests and production. + // Development mode: deployments done purely for running things in development. + // Any deployed resources will be marked as "dev" and might hidden or cleaned up. Development Mode = "development" + + // Production mode: deployments done for production purposes. + // Any deployed resources will not be changed but this mode will enable + // various strictness checks to make sure that a deployment is correctly setup + // for production purposes. + Production Mode = "production" ) diff --git a/bundle/config/git.go b/bundle/config/git.go index 7ada8dfbc..e9f5fc0cc 100644 --- a/bundle/config/git.go +++ b/bundle/config/git.go @@ -4,4 +4,7 @@ type Git struct { Branch string `json:"branch,omitempty"` OriginURL string `json:"origin_url,omitempty"` Commit string `json:"commit,omitempty" bundle:"readonly"` + + // Inferred is set to true if the Git details were inferred and weren't set explicitly + Inferred bool `json:"-" bundle:"readonly"` } diff --git a/bundle/config/mutator/expand_workspace_root_test.go b/bundle/config/mutator/expand_workspace_root_test.go index e872dc835..0ec11a07d 100644 --- a/bundle/config/mutator/expand_workspace_root_test.go +++ b/bundle/config/mutator/expand_workspace_root_test.go @@ -16,8 +16,10 @@ func TestExpandWorkspaceRoot(t *testing.T) { bundle := &bundle.Bundle{ Config: config.Root{ Workspace: config.Workspace{ - CurrentUser: &iam.User{ - UserName: "jane@doe.com", + CurrentUser: &config.User{ + User: &iam.User{ + UserName: "jane@doe.com", + }, }, RootPath: "~/foo", }, @@ -32,8 +34,10 @@ func TestExpandWorkspaceRootDoesNothing(t *testing.T) { bundle := &bundle.Bundle{ Config: config.Root{ Workspace: config.Workspace{ - CurrentUser: &iam.User{ - UserName: "jane@doe.com", + CurrentUser: &config.User{ + User: &iam.User{ + UserName: "jane@doe.com", + }, }, RootPath: "/Users/charly@doe.com/foo", }, @@ -48,8 +52,10 @@ func TestExpandWorkspaceRootWithoutRoot(t *testing.T) { bundle := &bundle.Bundle{ Config: config.Root{ Workspace: config.Workspace{ - CurrentUser: &iam.User{ - UserName: "jane@doe.com", + CurrentUser: &config.User{ + User: &iam.User{ + UserName: "jane@doe.com", + }, }, }, }, diff --git a/bundle/config/mutator/load_git_details.go b/bundle/config/mutator/load_git_details.go index 121924c62..85e9eae7b 100644 --- a/bundle/config/mutator/load_git_details.go +++ b/bundle/config/mutator/load_git_details.go @@ -32,6 +32,7 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) error { } else { b.Config.Bundle.Git.Branch = branch } + b.Config.Bundle.Git.Inferred = true } // load commit hash if undefined if b.Config.Bundle.Git.Commit == "" { diff --git a/bundle/config/mutator/populate_current_user.go b/bundle/config/mutator/populate_current_user.go index 34c6ff6e3..5de245ae3 100644 --- a/bundle/config/mutator/populate_current_user.go +++ b/bundle/config/mutator/populate_current_user.go @@ -2,8 +2,10 @@ package mutator import ( "context" + "strings" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" ) type populateCurrentUser struct{} @@ -24,6 +26,9 @@ func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) error return err } - b.Config.Workspace.CurrentUser = me + b.Config.Workspace.CurrentUser = &config.User{ + ShortName: strings.Split(me.UserName, "@")[0], + User: me, + } return nil } diff --git a/bundle/config/mutator/process_environment_mode.go b/bundle/config/mutator/process_environment_mode.go index 3e1b7e819..1186b9349 100644 --- a/bundle/config/mutator/process_environment_mode.go +++ b/bundle/config/mutator/process_environment_mode.go @@ -4,18 +4,24 @@ import ( "context" "fmt" "path" + "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/ml" ) -type processEnvironmentMode struct{} +type processEnvironmentMode struct { + // getPrincipalGetByIdImpl overrides the GetPrincipalGetById implementation for testing purposes. + getPrincipalGetByIdImpl func(ctx context.Context, id string) (*iam.ServicePrincipal, error) +} const developmentConcurrentRuns = 4 -func ProcessEnvironmentMode() bundle.Mutator { +func ProcessEnvironmentMode() *processEnvironmentMode { return &processEnvironmentMode{} } @@ -26,15 +32,17 @@ func (m *processEnvironmentMode) Name() string { // Mark all resources as being for 'development' purposes, i.e. // changing their their name, adding tags, and (in the future) // marking them as 'hidden' in the UI. -func processDevelopmentMode(b *bundle.Bundle) error { +func transformDevelopmentMode(b *bundle.Bundle) error { r := b.Config.Resources + prefix := "[dev " + b.Config.Workspace.CurrentUser.ShortName + "] " + for i := range r.Jobs { - r.Jobs[i].Name = "[dev] " + r.Jobs[i].Name + r.Jobs[i].Name = prefix + r.Jobs[i].Name if r.Jobs[i].Tags == nil { r.Jobs[i].Tags = make(map[string]string) } - r.Jobs[i].Tags["dev"] = "" + r.Jobs[i].Tags["dev"] = b.Config.Workspace.CurrentUser.DisplayName if r.Jobs[i].MaxConcurrentRuns == 0 { r.Jobs[i].MaxConcurrentRuns = developmentConcurrentRuns } @@ -50,13 +58,13 @@ func processDevelopmentMode(b *bundle.Bundle) error { } for i := range r.Pipelines { - r.Pipelines[i].Name = "[dev] " + r.Pipelines[i].Name + r.Pipelines[i].Name = prefix + r.Pipelines[i].Name r.Pipelines[i].Development = true // (pipelines don't yet support tags) } for i := range r.Models { - r.Models[i].Name = "[dev] " + r.Models[i].Name + r.Models[i].Name = prefix + r.Models[i].Name r.Models[i].Tags = append(r.Models[i].Tags, ml.ModelTag{Key: "dev", Value: ""}) } @@ -65,20 +73,113 @@ func processDevelopmentMode(b *bundle.Bundle) error { dir := path.Dir(filepath) base := path.Base(filepath) if dir == "." { - r.Experiments[i].Name = "[dev] " + base + r.Experiments[i].Name = prefix + base } else { - r.Experiments[i].Name = dir + "/[dev] " + base + r.Experiments[i].Name = dir + "/" + prefix + base } - r.Experiments[i].Tags = append(r.Experiments[i].Tags, ml.ExperimentTag{Key: "dev", Value: ""}) + r.Experiments[i].Tags = append(r.Experiments[i].Tags, ml.ExperimentTag{Key: "dev", Value: b.Config.Workspace.CurrentUser.DisplayName}) } return nil } +func validateDevelopmentMode(b *bundle.Bundle) error { + if isUserSpecificDeployment(b) { + return fmt.Errorf("environment with 'mode: development' must deploy to a location specific to the user, and should e.g. set 'root_path: ~/.bundle/${bundle.name}/${bundle.environment}'") + } + return nil +} + +func isUserSpecificDeployment(b *bundle.Bundle) bool { + username := b.Config.Workspace.CurrentUser.UserName + return !strings.Contains(b.Config.Workspace.StatePath, username) || + !strings.Contains(b.Config.Workspace.ArtifactsPath, username) || + !strings.Contains(b.Config.Workspace.FilesPath, username) +} + +func (m *processEnvironmentMode) validateProductionMode(ctx context.Context, b *bundle.Bundle) error { + if b.Config.Bundle.Git.Inferred { + TODO: show a nice human error here? :( + return fmt.Errorf("environment with 'mode: production' must specify an explicit 'git' configuration") + } + + r := b.Config.Resources + for i := range r.Pipelines { + if r.Pipelines[i].Development { + return fmt.Errorf("environment with 'mode: production' cannot specify a pipeline with 'development: true'") + } + } + + isPrincipal, err := m.isServicePrincipalUsed(ctx, b) + if err != nil { + return err + } + + if !isPrincipal { + if isUserSpecificDeployment(b) { + return fmt.Errorf("environment with 'mode: development' must deploy to a location specific to the user, and should e.g. set 'root_path: ~/.bundle/${bundle.name}/${bundle.environment}'") + } + + if !arePermissionsSetExplicitly(r) { + return fmt.Errorf("environment with 'mode: production' must set permissions and run_as for all resources (when not using service principals)") + } + } + return nil +} + +// Determines whether a service principal identity is used to run the CLI. +func (m *processEnvironmentMode) isServicePrincipalUsed(ctx context.Context, b *bundle.Bundle) (bool, error) { + ws := b.WorkspaceClient() + + getPrincipalById := m.getPrincipalGetByIdImpl + if getPrincipalById == nil { + getPrincipalById = ws.ServicePrincipals.GetById + } + + _, err := getPrincipalById(ctx, b.Config.Workspace.CurrentUser.Id) + if err != nil { + apiError, ok := err.(*apierr.APIError) + if ok && apiError.StatusCode == 404 { + return false, nil + } + return false, err + } + return false, nil +} + +// Determines whether permissions and run_as are explicitly set for all resources. +// We do this in a best-effort fashion; we may not actually test all resources, +// as we expect customers to use the top-level 'permissions' and 'run_as' fields. +// We'd rather not check for those specific fields though, as customers might +// set specific permissions instead! +func arePermissionsSetExplicitly(r config.Resources) bool { + for i := range r.Pipelines { + if r.Pipelines[i].Permissions == nil { + return false + } + } + + for i := range r.Jobs { + if r.Jobs[i].Permissions == nil { + return false + } + if r.Jobs[i].RunAs == nil { + return false + } + } + return false +} + func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) error { switch b.Config.Bundle.Mode { case config.Development: - return processDevelopmentMode(b) + err := validateDevelopmentMode(b) + if err != nil { + return err + } + return transformDevelopmentMode(b) + case config.Production: + return m.validateProductionMode(ctx, b) case "": // No action default: diff --git a/bundle/config/mutator/process_environment_mode_test.go b/bundle/config/mutator/process_environment_mode_test.go index 5342de212..6f5590d65 100644 --- a/bundle/config/mutator/process_environment_mode_test.go +++ b/bundle/config/mutator/process_environment_mode_test.go @@ -1,13 +1,16 @@ -package mutator_test +package mutator import ( "context" + "reflect" + "strings" "testing" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" - "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/ml" "github.com/databricks/databricks-sdk-go/service/pipelines" @@ -15,11 +18,23 @@ import ( "github.com/stretchr/testify/require" ) -func TestProcessEnvironmentModeApplyDebug(t *testing.T) { - bundle := &bundle.Bundle{ +func mockBundle(mode config.Mode) *bundle.Bundle { + return &bundle.Bundle{ Config: config.Root{ Bundle: config.Bundle{ - Mode: config.Development, + Mode: mode, + }, + Workspace: config.Workspace{ + CurrentUser: &config.User{ + ShortName: "Lennart", + User: &iam.User{ + UserName: "lennart@company.com", + Id: "1", + }, + }, + StatePath: "/Users/lennart@company.com/.bundle/x/y/state", + ArtifactsPath: "/Users/lennart@company.com/.bundle/x/y/artifacts", + FilesPath: "/Users/lennart@company.com/.bundle/x/y/files", }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -38,40 +53,120 @@ func TestProcessEnvironmentModeApplyDebug(t *testing.T) { }, }, } +} - m := mutator.ProcessEnvironmentMode() +func TestProcessEnvironmentModeDevelopment(t *testing.T) { + bundle := mockBundle(config.Development) + + m := ProcessEnvironmentMode() err := m.Apply(context.Background(), bundle) require.NoError(t, err) - assert.Equal(t, "[dev] job1", bundle.Config.Resources.Jobs["job1"].Name) - assert.Equal(t, "[dev] pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name) - assert.Equal(t, "/Users/lennart.kats@databricks.com/[dev] experiment1", bundle.Config.Resources.Experiments["experiment1"].Name) - assert.Equal(t, "[dev] experiment2", bundle.Config.Resources.Experiments["experiment2"].Name) - assert.Equal(t, "[dev] model1", bundle.Config.Resources.Models["model1"].Name) + assert.Equal(t, "[dev Lennart] job1", bundle.Config.Resources.Jobs["job1"].Name) + assert.Equal(t, "[dev Lennart] pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name) + assert.Equal(t, "/Users/lennart.kats@databricks.com/[dev Lennart] experiment1", bundle.Config.Resources.Experiments["experiment1"].Name) + assert.Equal(t, "[dev Lennart] experiment2", bundle.Config.Resources.Experiments["experiment2"].Name) + assert.Equal(t, "[dev Lennart] model1", bundle.Config.Resources.Models["model1"].Name) assert.Equal(t, "dev", bundle.Config.Resources.Experiments["experiment1"].Experiment.Tags[0].Key) assert.True(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) } -func TestProcessEnvironmentModeApplyDefault(t *testing.T) { - bundle := &bundle.Bundle{ - Config: config.Root{ - Bundle: config.Bundle{ - Mode: "", - }, - Resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": {JobSettings: &jobs.JobSettings{Name: "job1"}}, - }, - Pipelines: map[string]*resources.Pipeline{ - "pipeline1": {PipelineSpec: &pipelines.PipelineSpec{Name: "pipeline1"}}, - }, - }, - }, - } +func TestProcessEnvironmentModeDefault(t *testing.T) { + bundle := mockBundle("") - m := mutator.ProcessEnvironmentMode() + m := ProcessEnvironmentMode() err := m.Apply(context.Background(), bundle) require.NoError(t, err) assert.Equal(t, "job1", bundle.Config.Resources.Jobs["job1"].Name) assert.Equal(t, "pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name) assert.False(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) } + +func TestProcessEnvironmentModeProduction(t *testing.T) { + bundle := mockBundle(config.Production) + bundle.Config.Workspace.StatePath = "/Shared/.bundle/x/y/state" + bundle.Config.Workspace.ArtifactsPath = "/Shared/.bundle/x/y/artifacts" + bundle.Config.Workspace.FilesPath = "/Shared/.bundle/x/y/files" + + m := ProcessEnvironmentMode() + m.getPrincipalGetByIdImpl = func(ctx context.Context, id string) (*iam.ServicePrincipal, error) { + return nil, &apierr.APIError{StatusCode: 404} + } + + err := m.Apply(context.Background(), bundle) + require.NoError(t, err) + assert.Equal(t, "job1", bundle.Config.Resources.Jobs["job1"].Name) + assert.Equal(t, "pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name) + assert.False(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) +} + +func TestProcessEnvironmentModeProductionFails(t *testing.T) { + bundle := mockBundle(config.Production) + + m := ProcessEnvironmentMode() + m.getPrincipalGetByIdImpl = func(ctx context.Context, id string) (*iam.ServicePrincipal, error) { + return nil, &apierr.APIError{StatusCode: 404} + } + + err := m.Apply(context.Background(), bundle) + require.Error(t, err) +} + +func TestProcessEnvironmentModeProductionOkForPrincipal(t *testing.T) { + bundle := mockBundle(config.Production) + + m := ProcessEnvironmentMode() + m.getPrincipalGetByIdImpl = func(ctx context.Context, id string) (*iam.ServicePrincipal, error) { + return nil, nil + } + + err := m.Apply(context.Background(), bundle) + require.NoError(t, err) +} + +// Make sure that we have test coverage for all resource types +func TestAllResourcesMocked(t *testing.T) { + bundle := mockBundle(config.Development) + resources := reflect.ValueOf(bundle.Config.Resources) + + for i := 0; i < resources.NumField(); i++ { + field := resources.Field(i) + if field.Kind() == reflect.Map { + assert.True( + t, + !field.IsNil() && field.Len() > 0, + "process_environment_mode should support '%s' (please add it to process_environment_mode.go and extend the test suite)", + resources.Type().Field(i).Name, + ) + } + } +} + +// Make sure that we at least rename all resources +func TestAllResourcesRenamed(t *testing.T) { + bundle := mockBundle(config.Development) + resources := reflect.ValueOf(bundle.Config.Resources) + + m := ProcessEnvironmentMode() + err := m.Apply(context.Background(), bundle) + require.NoError(t, err) + + for i := 0; i < resources.NumField(); i++ { + field := resources.Field(i) + + if field.Kind() == reflect.Map { + for _, key := range field.MapKeys() { + resource := field.MapIndex(key) + nameField := resource.Elem().FieldByName("Name") + if nameField.IsValid() && nameField.Kind() == reflect.String { + assert.True( + t, + strings.Contains(nameField.String(), "dev"), + "process_environment_mode should rename '%s' in '%s'", + key, + resources.Type().Field(i).Name, + ) + } + } + } + } +} diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 8a1205b3b..a13786d26 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -37,7 +37,7 @@ type Workspace struct { // CurrentUser holds the current user. // This is set after configuration initialization. - CurrentUser *iam.User `json:"current_user,omitempty" bundle:"readonly"` + CurrentUser *User `json:"current_user,omitempty" bundle:"readonly"` // Remote workspace base path for deployment state, for artifacts, as synchronization target. // This defaults to "~/.bundle/${bundle.name}/${bundle.environment}" where "~" expands to @@ -57,6 +57,13 @@ type Workspace struct { StatePath string `json:"state_path,omitempty"` } +type User struct { + // A short name for the user, based on the user's UserName. + ShortName string `json:"short_name,omitempty" bundle:"readonly"` + + *iam.User +} + func (w *Workspace) Client() (*databricks.WorkspaceClient, error) { cfg := databricks.Config{ // Generic diff --git a/bundle/deploy/files/sync.go b/bundle/deploy/files/sync.go index 77c64e529..84d79dc81 100644 --- a/bundle/deploy/files/sync.go +++ b/bundle/deploy/files/sync.go @@ -15,9 +15,10 @@ func getSync(ctx context.Context, b *bundle.Bundle) (*sync.Sync, error) { } opts := sync.SyncOptions{ - LocalPath: b.Config.Path, - RemotePath: b.Config.Workspace.FilesPath, - Full: false, + LocalPath: b.Config.Path, + RemotePath: b.Config.Workspace.FilesPath, + Full: false, + CurrentUser: b.Config.Workspace.CurrentUser.User, SnapshotBasePath: cacheDir, WorkspaceClient: b.WorkspaceClient(),