Extend deployment mode support (#577)

## Changes

This adds `mode: production` option. This mode doesn't do any
transformations but verifies that an environment is configured correctly
for production:

```
environments:
  prod:
    mode: production

    # paths should not be scoped to a user (unless a service principal is used)
    root_path: /Shared/non_user_path/...

    # run_as and permissions should be set at the resource level (or at the top level when that is implemented)
    run_as:
      user_name: Alice
    permissions:
    - level: CAN_MANAGE
      user_name: Alice
```

Additionally, this extends the existing `mode: development` option,
* now prefixing deployed assets with `[dev your.user]` instead of just
`[dev`]
* validating that development deployments _are_ scoped to a user

## Related

https://github.com/databricks/cli/pull/578/files (in draft)

## Tests

Manual testing to validate the experience, error messages, and
functionality with all resource types. Automated unit tests.

---------

Co-authored-by: Fabian Jakobs <fabian.jakobs@databricks.com>
This commit is contained in:
Lennart Kats (databricks) 2023-07-30 09:19:49 +02:00 committed by GitHub
parent 5deca8d7d8
commit d55652be07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 340 additions and 61 deletions

View File

@ -32,7 +32,13 @@ type Environment struct {
}
const (
// Right now, we just have a default / "" mode and a "development" mode.
// Additional modes are expected to come for pull-requests and production.
// Development mode: deployments done purely for running things in development.
// Any deployed resources will be marked as "dev" and might be hidden or cleaned up.
Development Mode = "development"
// Production mode: deployments done for production purposes.
// Any deployed resources will not be changed but this mode will enable
// various strictness checks to make sure that a deployment is correctly setup
// for production purposes.
Production Mode = "production"
)

View File

@ -16,8 +16,10 @@ func TestExpandWorkspaceRoot(t *testing.T) {
bundle := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
CurrentUser: &iam.User{
UserName: "jane@doe.com",
CurrentUser: &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
},
RootPath: "~/foo",
},
@ -32,8 +34,10 @@ func TestExpandWorkspaceRootDoesNothing(t *testing.T) {
bundle := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
CurrentUser: &iam.User{
UserName: "jane@doe.com",
CurrentUser: &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
},
RootPath: "/Users/charly@doe.com/foo",
},
@ -48,8 +52,10 @@ func TestExpandWorkspaceRootWithoutRoot(t *testing.T) {
bundle := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
CurrentUser: &iam.User{
UserName: "jane@doe.com",
CurrentUser: &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
},
},
},

View File

@ -2,8 +2,11 @@ package mutator
import (
"context"
"strings"
"unicode"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
)
type populateCurrentUser struct{}
@ -24,6 +27,22 @@ func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) error
return err
}
b.Config.Workspace.CurrentUser = me
b.Config.Workspace.CurrentUser = &config.User{
ShortName: getShortUserName(me.UserName),
User: me,
}
return nil
}
// Get a short-form username, based on the user's primary email address.
// We leave the full range of unicode letters in tact, but remove all "special" characters,
// including dots, which are not supported in e.g. experiment names.
func getShortUserName(emailAddress string) string {
r := []rune(strings.Split(emailAddress, "@")[0])
for i := 0; i < len(r); i++ {
if !unicode.IsLetter(r[i]) {
r[i] = '_'
}
}
return string(r)
}

View File

@ -1,3 +1,40 @@
package mutator
// We need to implement workspace client mocking to implement this test.
import "testing"
func TestPopulateCurrentUser(t *testing.T) {
// We need to implement workspace client mocking to implement this test.
}
func TestGetShortUserName(t *testing.T) {
tests := []struct {
name string
email string
expected string
}{
{
name: "test alphanumeric characters",
email: "test.user@example.com",
expected: "test_user",
},
{
name: "test unicode characters",
email: "tést.üser@example.com",
expected: "tést_üser",
},
{
name: "test special characters",
email: "test$.user@example.com",
expected: "test__user",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := getShortUserName(tt.email)
if result != tt.expected {
t.Errorf("getShortUserName(%q) = %q; expected %q", tt.email, result, tt.expected)
}
})
}
}

View File

@ -4,9 +4,11 @@ import (
"context"
"fmt"
"path"
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/ml"
)
@ -26,15 +28,17 @@ func (m *processEnvironmentMode) Name() string {
// Mark all resources as being for 'development' purposes, i.e.
// changing their their name, adding tags, and (in the future)
// marking them as 'hidden' in the UI.
func processDevelopmentMode(b *bundle.Bundle) error {
func transformDevelopmentMode(b *bundle.Bundle) error {
r := b.Config.Resources
prefix := "[dev " + b.Config.Workspace.CurrentUser.ShortName + "] "
for i := range r.Jobs {
r.Jobs[i].Name = "[dev] " + r.Jobs[i].Name
r.Jobs[i].Name = prefix + r.Jobs[i].Name
if r.Jobs[i].Tags == nil {
r.Jobs[i].Tags = make(map[string]string)
}
r.Jobs[i].Tags["dev"] = ""
r.Jobs[i].Tags["dev"] = b.Config.Workspace.CurrentUser.DisplayName
if r.Jobs[i].MaxConcurrentRuns == 0 {
r.Jobs[i].MaxConcurrentRuns = developmentConcurrentRuns
}
@ -50,13 +54,13 @@ func processDevelopmentMode(b *bundle.Bundle) error {
}
for i := range r.Pipelines {
r.Pipelines[i].Name = "[dev] " + r.Pipelines[i].Name
r.Pipelines[i].Name = prefix + r.Pipelines[i].Name
r.Pipelines[i].Development = true
// (pipelines don't yet support tags)
}
for i := range r.Models {
r.Models[i].Name = "[dev] " + r.Models[i].Name
r.Models[i].Name = prefix + r.Models[i].Name
r.Models[i].Tags = append(r.Models[i].Tags, ml.ModelTag{Key: "dev", Value: ""})
}
@ -65,20 +69,111 @@ func processDevelopmentMode(b *bundle.Bundle) error {
dir := path.Dir(filepath)
base := path.Base(filepath)
if dir == "." {
r.Experiments[i].Name = "[dev] " + base
r.Experiments[i].Name = prefix + base
} else {
r.Experiments[i].Name = dir + "/[dev] " + base
r.Experiments[i].Name = dir + "/" + prefix + base
}
r.Experiments[i].Tags = append(r.Experiments[i].Tags, ml.ExperimentTag{Key: "dev", Value: ""})
r.Experiments[i].Tags = append(r.Experiments[i].Tags, ml.ExperimentTag{Key: "dev", Value: b.Config.Workspace.CurrentUser.DisplayName})
}
return nil
}
func validateDevelopmentMode(b *bundle.Bundle) error {
if path := findIncorrectPath(b, config.Development); path != "" {
return fmt.Errorf("%s must start with '~/' or contain the current username when using 'mode: development'", path)
}
return nil
}
func findIncorrectPath(b *bundle.Bundle, mode config.Mode) string {
username := b.Config.Workspace.CurrentUser.UserName
containsExpected := true
if mode == config.Production {
containsExpected = false
}
if strings.Contains(b.Config.Workspace.RootPath, username) != containsExpected && b.Config.Workspace.RootPath != "" {
return "root_path"
}
if strings.Contains(b.Config.Workspace.StatePath, username) != containsExpected {
return "state_path"
}
if strings.Contains(b.Config.Workspace.FilesPath, username) != containsExpected {
return "files_path"
}
if strings.Contains(b.Config.Workspace.ArtifactsPath, username) != containsExpected {
return "artifacts_path"
}
return ""
}
func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUsed bool) error {
r := b.Config.Resources
for i := range r.Pipelines {
if r.Pipelines[i].Development {
return fmt.Errorf("environment with 'mode: production' cannot specify a pipeline with 'development: true'")
}
}
if !isPrincipalUsed {
if path := findIncorrectPath(b, config.Production); path != "" {
message := "%s must not contain the current username when using 'mode: production'"
if path == "root_path" {
return fmt.Errorf(message+"\n tip: set workspace.root_path to a shared path such as /Shared/.bundle/${bundle.name}/${bundle.environment}", path)
} else {
return fmt.Errorf(message, path)
}
}
if !isRunAsSet(r) {
return fmt.Errorf("'run_as' must be set for all jobs when using 'mode: production'")
}
}
return nil
}
// Determines whether a service principal identity is used to run the CLI.
func isServicePrincipalUsed(ctx context.Context, b *bundle.Bundle) (bool, error) {
ws := b.WorkspaceClient()
// Check if a principal with the current user's ID exists.
// We need to use the ListAll method since Get is only usable by admins.
matches, err := ws.ServicePrincipals.ListAll(ctx, iam.ListServicePrincipalsRequest{
Filter: "id eq " + b.Config.Workspace.CurrentUser.Id,
})
if err != nil {
return false, err
}
return len(matches) > 0, nil
}
// Determines whether run_as is explicitly set for all resources.
// We do this in a best-effort fashion rather than check the top-level
// 'run_as' field because the latter is not required to be set.
func isRunAsSet(r config.Resources) bool {
for i := range r.Jobs {
if r.Jobs[i].RunAs == nil {
return false
}
}
return true
}
func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) error {
switch b.Config.Bundle.Mode {
case config.Development:
return processDevelopmentMode(b)
err := validateDevelopmentMode(b)
if err != nil {
return err
}
return transformDevelopmentMode(b)
case config.Production:
isPrincipal, err := isServicePrincipalUsed(ctx, b)
if err != nil {
return err
}
return validateProductionMode(ctx, b, isPrincipal)
case "":
// No action
default:

View File

@ -1,13 +1,15 @@
package mutator_test
package mutator
import (
"context"
"reflect"
"strings"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/ml"
"github.com/databricks/databricks-sdk-go/service/pipelines"
@ -15,11 +17,23 @@ import (
"github.com/stretchr/testify/require"
)
func TestProcessEnvironmentModeApplyDebug(t *testing.T) {
bundle := &bundle.Bundle{
func mockBundle(mode config.Mode) *bundle.Bundle {
return &bundle.Bundle{
Config: config.Root{
Bundle: config.Bundle{
Mode: config.Development,
Mode: mode,
},
Workspace: config.Workspace{
CurrentUser: &config.User{
ShortName: "lennart",
User: &iam.User{
UserName: "lennart@company.com",
Id: "1",
},
},
StatePath: "/Users/lennart@company.com/.bundle/x/y/state",
ArtifactsPath: "/Users/lennart@company.com/.bundle/x/y/artifacts",
FilesPath: "/Users/lennart@company.com/.bundle/x/y/files",
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
@ -38,40 +52,124 @@ func TestProcessEnvironmentModeApplyDebug(t *testing.T) {
},
},
}
}
m := mutator.ProcessEnvironmentMode()
func TestProcessEnvironmentModeDevelopment(t *testing.T) {
bundle := mockBundle(config.Development)
m := ProcessEnvironmentMode()
err := m.Apply(context.Background(), bundle)
require.NoError(t, err)
assert.Equal(t, "[dev] job1", bundle.Config.Resources.Jobs["job1"].Name)
assert.Equal(t, "[dev] pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name)
assert.Equal(t, "/Users/lennart.kats@databricks.com/[dev] experiment1", bundle.Config.Resources.Experiments["experiment1"].Name)
assert.Equal(t, "[dev] experiment2", bundle.Config.Resources.Experiments["experiment2"].Name)
assert.Equal(t, "[dev] model1", bundle.Config.Resources.Models["model1"].Name)
assert.Equal(t, "[dev lennart] job1", bundle.Config.Resources.Jobs["job1"].Name)
assert.Equal(t, "[dev lennart] pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name)
assert.Equal(t, "/Users/lennart.kats@databricks.com/[dev lennart] experiment1", bundle.Config.Resources.Experiments["experiment1"].Name)
assert.Equal(t, "[dev lennart] experiment2", bundle.Config.Resources.Experiments["experiment2"].Name)
assert.Equal(t, "[dev lennart] model1", bundle.Config.Resources.Models["model1"].Name)
assert.Equal(t, "dev", bundle.Config.Resources.Experiments["experiment1"].Experiment.Tags[0].Key)
assert.True(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development)
}
func TestProcessEnvironmentModeApplyDefault(t *testing.T) {
bundle := &bundle.Bundle{
Config: config.Root{
Bundle: config.Bundle{
Mode: "",
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": {JobSettings: &jobs.JobSettings{Name: "job1"}},
},
Pipelines: map[string]*resources.Pipeline{
"pipeline1": {PipelineSpec: &pipelines.PipelineSpec{Name: "pipeline1"}},
},
},
},
}
func TestProcessEnvironmentModeDefault(t *testing.T) {
bundle := mockBundle("")
m := mutator.ProcessEnvironmentMode()
m := ProcessEnvironmentMode()
err := m.Apply(context.Background(), bundle)
require.NoError(t, err)
assert.Equal(t, "job1", bundle.Config.Resources.Jobs["job1"].Name)
assert.Equal(t, "pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name)
assert.False(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development)
}
func TestProcessEnvironmentModeProduction(t *testing.T) {
bundle := mockBundle(config.Production)
err := validateProductionMode(context.Background(), bundle, false)
require.ErrorContains(t, err, "state_path")
bundle.Config.Workspace.StatePath = "/Shared/.bundle/x/y/state"
bundle.Config.Workspace.ArtifactsPath = "/Shared/.bundle/x/y/artifacts"
bundle.Config.Workspace.FilesPath = "/Shared/.bundle/x/y/files"
err = validateProductionMode(context.Background(), bundle, false)
require.ErrorContains(t, err, "production")
permissions := []resources.Permission{
{
Level: "CAN_MANAGE",
UserName: "user@company.com",
},
}
bundle.Config.Resources.Jobs["job1"].Permissions = permissions
bundle.Config.Resources.Jobs["job1"].RunAs = &jobs.JobRunAs{UserName: "user@company.com"}
bundle.Config.Resources.Pipelines["pipeline1"].Permissions = permissions
bundle.Config.Resources.Experiments["experiment1"].Permissions = permissions
bundle.Config.Resources.Experiments["experiment2"].Permissions = permissions
bundle.Config.Resources.Models["model1"].Permissions = permissions
err = validateProductionMode(context.Background(), bundle, false)
require.NoError(t, err)
assert.Equal(t, "job1", bundle.Config.Resources.Jobs["job1"].Name)
assert.Equal(t, "pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name)
assert.False(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development)
}
func TestProcessEnvironmentModeProductionOkForPrincipal(t *testing.T) {
bundle := mockBundle(config.Production)
// Our environment has all kinds of problems when not using service principals ...
err := validateProductionMode(context.Background(), bundle, false)
require.Error(t, err)
// ... but we're much less strict when a principal is used
err = validateProductionMode(context.Background(), bundle, true)
require.NoError(t, err)
}
// Make sure that we have test coverage for all resource types
func TestAllResourcesMocked(t *testing.T) {
bundle := mockBundle(config.Development)
resources := reflect.ValueOf(bundle.Config.Resources)
for i := 0; i < resources.NumField(); i++ {
field := resources.Field(i)
if field.Kind() == reflect.Map {
assert.True(
t,
!field.IsNil() && field.Len() > 0,
"process_environment_mode should support '%s' (please add it to process_environment_mode.go and extend the test suite)",
resources.Type().Field(i).Name,
)
}
}
}
// Make sure that we at least rename all resources
func TestAllResourcesRenamed(t *testing.T) {
bundle := mockBundle(config.Development)
resources := reflect.ValueOf(bundle.Config.Resources)
m := ProcessEnvironmentMode()
err := m.Apply(context.Background(), bundle)
require.NoError(t, err)
for i := 0; i < resources.NumField(); i++ {
field := resources.Field(i)
if field.Kind() == reflect.Map {
for _, key := range field.MapKeys() {
resource := field.MapIndex(key)
nameField := resource.Elem().FieldByName("Name")
if nameField.IsValid() && nameField.Kind() == reflect.String {
assert.True(
t,
strings.Contains(nameField.String(), "dev"),
"process_environment_mode should rename '%s' in '%s'",
key,
resources.Type().Field(i).Name,
)
}
}
}
}
}

View File

@ -42,7 +42,7 @@ type Workspace struct {
// CurrentUser holds the current user.
// This is set after configuration initialization.
CurrentUser *iam.User `json:"current_user,omitempty" bundle:"readonly"`
CurrentUser *User `json:"current_user,omitempty" bundle:"readonly"`
// Remote workspace base path for deployment state, for artifacts, as synchronization target.
// This defaults to "~/.bundle/${bundle.name}/${bundle.environment}" where "~" expands to
@ -62,6 +62,13 @@ type Workspace struct {
StatePath string `json:"state_path,omitempty"`
}
type User struct {
// A short name for the user, based on the user's UserName.
ShortName string `json:"short_name,omitempty" bundle:"readonly"`
*iam.User
}
func (w *Workspace) Client() (*databricks.WorkspaceClient, error) {
cfg := databricks.Config{
// Generic

View File

@ -15,9 +15,10 @@ func getSync(ctx context.Context, b *bundle.Bundle) (*sync.Sync, error) {
}
opts := sync.SyncOptions{
LocalPath: b.Config.Path,
RemotePath: b.Config.Workspace.FilesPath,
Full: false,
LocalPath: b.Config.Path,
RemotePath: b.Config.Workspace.FilesPath,
Full: false,
CurrentUser: b.Config.Workspace.CurrentUser.User,
SnapshotBasePath: cacheDir,
WorkspaceClient: b.WorkspaceClient(),

View File

@ -23,6 +23,7 @@ environments:
development: false
production:
mode: production
resources:
pipelines:
nyc_taxi_pipeline:

View File

@ -509,12 +509,12 @@ func TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) {
// Hypothetical repo path doesn't exist.
nonExistingRepoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("doesnt-exist-"))
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath)
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath, nil)
assert.ErrorContains(t, err, " does not exist; please create it first")
// Paths nested under a hypothetical repo path should yield the same error.
nestedPath := path.Join(nonExistingRepoPath, "nested/directory")
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath)
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
assert.ErrorContains(t, err, " does not exist; please create it first")
}
@ -526,12 +526,12 @@ func TestAccSyncEnsureRemotePathIsUsableIfRepoExists(t *testing.T) {
_, remoteRepoPath := setupRepo(t, wsc, ctx)
// Repo itself is usable.
err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath)
err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath, nil)
assert.NoError(t, err)
// Path nested under repo path is usable.
nestedPath := path.Join(remoteRepoPath, "nested/directory")
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath)
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
assert.NoError(t, err)
// Verify that the directory has been created.
@ -549,7 +549,7 @@ func TestAccSyncEnsureRemotePathIsUsableInWorkspace(t *testing.T) {
require.NoError(t, err)
remotePath := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("ensure-path-exists-test-"))
err = sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath)
err = sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath, me)
assert.NoError(t, err)
// Clean up directory after test.

View File

@ -24,10 +24,16 @@ func repoPathForPath(me *iam.User, remotePath string) string {
// EnsureRemotePathIsUsable checks if the specified path is nested under
// expected base paths and if it is a directory or repository.
func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string) error {
me, err := wsc.CurrentUser.Me(ctx)
if err != nil {
return err
func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string, me *iam.User) error {
var err error
// TODO: we should cache CurrentUser.Me at the SDK level
// for now we let clients pass in any existing user they might already have
if me == nil {
me, err = wsc.CurrentUser.Me(ctx)
if err != nil {
return err
}
}
// Ensure that the remote path exists.

View File

@ -9,6 +9,7 @@ import (
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/log"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/iam"
)
type SyncOptions struct {
@ -23,6 +24,8 @@ type SyncOptions struct {
WorkspaceClient *databricks.WorkspaceClient
CurrentUser *iam.User
Host string
}
@ -50,7 +53,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
}
// Verify that the remote path we're about to synchronize to is valid and allowed.
err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath)
err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath, opts.CurrentUser)
if err != nil {
return nil, err
}