This commit is contained in:
Lennart Kats 2023-07-10 09:12:50 +02:00
parent 77fcc92e65
commit 368796ba12
9 changed files with 277 additions and 52 deletions

View File

@ -32,7 +32,13 @@ type Environment struct {
}
const (
// Right now, we just have a default / "" mode and a "development" mode.
// Additional modes are expected to come for pull-requests and production.
// Development mode: deployments done purely for running things in development.
// Any deployed resources will be marked as "dev" and might hidden or cleaned up.
Development Mode = "development"
// Production mode: deployments done for production purposes.
// Any deployed resources will not be changed but this mode will enable
// various strictness checks to make sure that a deployment is correctly setup
// for production purposes.
Production Mode = "production"
)

View File

@ -4,4 +4,7 @@ type Git struct {
Branch string `json:"branch,omitempty"`
OriginURL string `json:"origin_url,omitempty"`
Commit string `json:"commit,omitempty" bundle:"readonly"`
// Inferred is set to true if the Git details were inferred and weren't set explicitly
Inferred bool `json:"-" bundle:"readonly"`
}

View File

@ -16,8 +16,10 @@ func TestExpandWorkspaceRoot(t *testing.T) {
bundle := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
CurrentUser: &iam.User{
UserName: "jane@doe.com",
CurrentUser: &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
},
RootPath: "~/foo",
},
@ -32,8 +34,10 @@ func TestExpandWorkspaceRootDoesNothing(t *testing.T) {
bundle := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
CurrentUser: &iam.User{
UserName: "jane@doe.com",
CurrentUser: &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
},
RootPath: "/Users/charly@doe.com/foo",
},
@ -48,8 +52,10 @@ func TestExpandWorkspaceRootWithoutRoot(t *testing.T) {
bundle := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
CurrentUser: &iam.User{
UserName: "jane@doe.com",
CurrentUser: &config.User{
User: &iam.User{
UserName: "jane@doe.com",
},
},
},
},

View File

@ -32,6 +32,7 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) error {
} else {
b.Config.Bundle.Git.Branch = branch
}
b.Config.Bundle.Git.Inferred = true
}
// load commit hash if undefined
if b.Config.Bundle.Git.Commit == "" {

View File

@ -2,8 +2,10 @@ package mutator
import (
"context"
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
)
type populateCurrentUser struct{}
@ -24,6 +26,9 @@ func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) error
return err
}
b.Config.Workspace.CurrentUser = me
b.Config.Workspace.CurrentUser = &config.User{
ShortName: strings.Split(me.UserName, "@")[0],
User: me,
}
return nil
}

View File

@ -4,18 +4,24 @@ import (
"context"
"fmt"
"path"
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/ml"
)
type processEnvironmentMode struct{}
type processEnvironmentMode struct {
// getPrincipalGetByIdImpl overrides the GetPrincipalGetById implementation for testing purposes.
getPrincipalGetByIdImpl func(ctx context.Context, id string) (*iam.ServicePrincipal, error)
}
const developmentConcurrentRuns = 4
func ProcessEnvironmentMode() bundle.Mutator {
func ProcessEnvironmentMode() *processEnvironmentMode {
return &processEnvironmentMode{}
}
@ -26,15 +32,17 @@ func (m *processEnvironmentMode) Name() string {
// Mark all resources as being for 'development' purposes, i.e.
// changing their their name, adding tags, and (in the future)
// marking them as 'hidden' in the UI.
func processDevelopmentMode(b *bundle.Bundle) error {
func transformDevelopmentMode(b *bundle.Bundle) error {
r := b.Config.Resources
prefix := "[dev " + b.Config.Workspace.CurrentUser.ShortName + "] "
for i := range r.Jobs {
r.Jobs[i].Name = "[dev] " + r.Jobs[i].Name
r.Jobs[i].Name = prefix + r.Jobs[i].Name
if r.Jobs[i].Tags == nil {
r.Jobs[i].Tags = make(map[string]string)
}
r.Jobs[i].Tags["dev"] = ""
r.Jobs[i].Tags["dev"] = b.Config.Workspace.CurrentUser.DisplayName
if r.Jobs[i].MaxConcurrentRuns == 0 {
r.Jobs[i].MaxConcurrentRuns = developmentConcurrentRuns
}
@ -50,13 +58,13 @@ func processDevelopmentMode(b *bundle.Bundle) error {
}
for i := range r.Pipelines {
r.Pipelines[i].Name = "[dev] " + r.Pipelines[i].Name
r.Pipelines[i].Name = prefix + r.Pipelines[i].Name
r.Pipelines[i].Development = true
// (pipelines don't yet support tags)
}
for i := range r.Models {
r.Models[i].Name = "[dev] " + r.Models[i].Name
r.Models[i].Name = prefix + r.Models[i].Name
r.Models[i].Tags = append(r.Models[i].Tags, ml.ModelTag{Key: "dev", Value: ""})
}
@ -65,20 +73,113 @@ func processDevelopmentMode(b *bundle.Bundle) error {
dir := path.Dir(filepath)
base := path.Base(filepath)
if dir == "." {
r.Experiments[i].Name = "[dev] " + base
r.Experiments[i].Name = prefix + base
} else {
r.Experiments[i].Name = dir + "/[dev] " + base
r.Experiments[i].Name = dir + "/" + prefix + base
}
r.Experiments[i].Tags = append(r.Experiments[i].Tags, ml.ExperimentTag{Key: "dev", Value: ""})
r.Experiments[i].Tags = append(r.Experiments[i].Tags, ml.ExperimentTag{Key: "dev", Value: b.Config.Workspace.CurrentUser.DisplayName})
}
return nil
}
func validateDevelopmentMode(b *bundle.Bundle) error {
if isUserSpecificDeployment(b) {
return fmt.Errorf("environment with 'mode: development' must deploy to a location specific to the user, and should e.g. set 'root_path: ~/.bundle/${bundle.name}/${bundle.environment}'")
}
return nil
}
func isUserSpecificDeployment(b *bundle.Bundle) bool {
username := b.Config.Workspace.CurrentUser.UserName
return !strings.Contains(b.Config.Workspace.StatePath, username) ||
!strings.Contains(b.Config.Workspace.ArtifactsPath, username) ||
!strings.Contains(b.Config.Workspace.FilesPath, username)
}
func (m *processEnvironmentMode) validateProductionMode(ctx context.Context, b *bundle.Bundle) error {
if b.Config.Bundle.Git.Inferred {
TODO: show a nice human error here? :(
return fmt.Errorf("environment with 'mode: production' must specify an explicit 'git' configuration")
}
r := b.Config.Resources
for i := range r.Pipelines {
if r.Pipelines[i].Development {
return fmt.Errorf("environment with 'mode: production' cannot specify a pipeline with 'development: true'")
}
}
isPrincipal, err := m.isServicePrincipalUsed(ctx, b)
if err != nil {
return err
}
if !isPrincipal {
if isUserSpecificDeployment(b) {
return fmt.Errorf("environment with 'mode: development' must deploy to a location specific to the user, and should e.g. set 'root_path: ~/.bundle/${bundle.name}/${bundle.environment}'")
}
if !arePermissionsSetExplicitly(r) {
return fmt.Errorf("environment with 'mode: production' must set permissions and run_as for all resources (when not using service principals)")
}
}
return nil
}
// Determines whether a service principal identity is used to run the CLI.
func (m *processEnvironmentMode) isServicePrincipalUsed(ctx context.Context, b *bundle.Bundle) (bool, error) {
ws := b.WorkspaceClient()
getPrincipalById := m.getPrincipalGetByIdImpl
if getPrincipalById == nil {
getPrincipalById = ws.ServicePrincipals.GetById
}
_, err := getPrincipalById(ctx, b.Config.Workspace.CurrentUser.Id)
if err != nil {
apiError, ok := err.(*apierr.APIError)
if ok && apiError.StatusCode == 404 {
return false, nil
}
return false, err
}
return false, nil
}
// Determines whether permissions and run_as are explicitly set for all resources.
// We do this in a best-effort fashion; we may not actually test all resources,
// as we expect customers to use the top-level 'permissions' and 'run_as' fields.
// We'd rather not check for those specific fields though, as customers might
// set specific permissions instead!
func arePermissionsSetExplicitly(r config.Resources) bool {
for i := range r.Pipelines {
if r.Pipelines[i].Permissions == nil {
return false
}
}
for i := range r.Jobs {
if r.Jobs[i].Permissions == nil {
return false
}
if r.Jobs[i].RunAs == nil {
return false
}
}
return false
}
func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) error {
switch b.Config.Bundle.Mode {
case config.Development:
return processDevelopmentMode(b)
err := validateDevelopmentMode(b)
if err != nil {
return err
}
return transformDevelopmentMode(b)
case config.Production:
return m.validateProductionMode(ctx, b)
case "":
// No action
default:

View File

@ -1,13 +1,16 @@
package mutator_test
package mutator
import (
"context"
"reflect"
"strings"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/ml"
"github.com/databricks/databricks-sdk-go/service/pipelines"
@ -15,11 +18,23 @@ import (
"github.com/stretchr/testify/require"
)
func TestProcessEnvironmentModeApplyDebug(t *testing.T) {
bundle := &bundle.Bundle{
func mockBundle(mode config.Mode) *bundle.Bundle {
return &bundle.Bundle{
Config: config.Root{
Bundle: config.Bundle{
Mode: config.Development,
Mode: mode,
},
Workspace: config.Workspace{
CurrentUser: &config.User{
ShortName: "Lennart",
User: &iam.User{
UserName: "lennart@company.com",
Id: "1",
},
},
StatePath: "/Users/lennart@company.com/.bundle/x/y/state",
ArtifactsPath: "/Users/lennart@company.com/.bundle/x/y/artifacts",
FilesPath: "/Users/lennart@company.com/.bundle/x/y/files",
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
@ -38,40 +53,120 @@ func TestProcessEnvironmentModeApplyDebug(t *testing.T) {
},
},
}
}
m := mutator.ProcessEnvironmentMode()
func TestProcessEnvironmentModeDevelopment(t *testing.T) {
bundle := mockBundle(config.Development)
m := ProcessEnvironmentMode()
err := m.Apply(context.Background(), bundle)
require.NoError(t, err)
assert.Equal(t, "[dev] job1", bundle.Config.Resources.Jobs["job1"].Name)
assert.Equal(t, "[dev] pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name)
assert.Equal(t, "/Users/lennart.kats@databricks.com/[dev] experiment1", bundle.Config.Resources.Experiments["experiment1"].Name)
assert.Equal(t, "[dev] experiment2", bundle.Config.Resources.Experiments["experiment2"].Name)
assert.Equal(t, "[dev] model1", bundle.Config.Resources.Models["model1"].Name)
assert.Equal(t, "[dev Lennart] job1", bundle.Config.Resources.Jobs["job1"].Name)
assert.Equal(t, "[dev Lennart] pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name)
assert.Equal(t, "/Users/lennart.kats@databricks.com/[dev Lennart] experiment1", bundle.Config.Resources.Experiments["experiment1"].Name)
assert.Equal(t, "[dev Lennart] experiment2", bundle.Config.Resources.Experiments["experiment2"].Name)
assert.Equal(t, "[dev Lennart] model1", bundle.Config.Resources.Models["model1"].Name)
assert.Equal(t, "dev", bundle.Config.Resources.Experiments["experiment1"].Experiment.Tags[0].Key)
assert.True(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development)
}
func TestProcessEnvironmentModeApplyDefault(t *testing.T) {
bundle := &bundle.Bundle{
Config: config.Root{
Bundle: config.Bundle{
Mode: "",
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": {JobSettings: &jobs.JobSettings{Name: "job1"}},
},
Pipelines: map[string]*resources.Pipeline{
"pipeline1": {PipelineSpec: &pipelines.PipelineSpec{Name: "pipeline1"}},
},
},
},
}
func TestProcessEnvironmentModeDefault(t *testing.T) {
bundle := mockBundle("")
m := mutator.ProcessEnvironmentMode()
m := ProcessEnvironmentMode()
err := m.Apply(context.Background(), bundle)
require.NoError(t, err)
assert.Equal(t, "job1", bundle.Config.Resources.Jobs["job1"].Name)
assert.Equal(t, "pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name)
assert.False(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development)
}
func TestProcessEnvironmentModeProduction(t *testing.T) {
bundle := mockBundle(config.Production)
bundle.Config.Workspace.StatePath = "/Shared/.bundle/x/y/state"
bundle.Config.Workspace.ArtifactsPath = "/Shared/.bundle/x/y/artifacts"
bundle.Config.Workspace.FilesPath = "/Shared/.bundle/x/y/files"
m := ProcessEnvironmentMode()
m.getPrincipalGetByIdImpl = func(ctx context.Context, id string) (*iam.ServicePrincipal, error) {
return nil, &apierr.APIError{StatusCode: 404}
}
err := m.Apply(context.Background(), bundle)
require.NoError(t, err)
assert.Equal(t, "job1", bundle.Config.Resources.Jobs["job1"].Name)
assert.Equal(t, "pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name)
assert.False(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development)
}
func TestProcessEnvironmentModeProductionFails(t *testing.T) {
bundle := mockBundle(config.Production)
m := ProcessEnvironmentMode()
m.getPrincipalGetByIdImpl = func(ctx context.Context, id string) (*iam.ServicePrincipal, error) {
return nil, &apierr.APIError{StatusCode: 404}
}
err := m.Apply(context.Background(), bundle)
require.Error(t, err)
}
func TestProcessEnvironmentModeProductionOkForPrincipal(t *testing.T) {
bundle := mockBundle(config.Production)
m := ProcessEnvironmentMode()
m.getPrincipalGetByIdImpl = func(ctx context.Context, id string) (*iam.ServicePrincipal, error) {
return nil, nil
}
err := m.Apply(context.Background(), bundle)
require.NoError(t, err)
}
// Make sure that we have test coverage for all resource types
func TestAllResourcesMocked(t *testing.T) {
bundle := mockBundle(config.Development)
resources := reflect.ValueOf(bundle.Config.Resources)
for i := 0; i < resources.NumField(); i++ {
field := resources.Field(i)
if field.Kind() == reflect.Map {
assert.True(
t,
!field.IsNil() && field.Len() > 0,
"process_environment_mode should support '%s' (please add it to process_environment_mode.go and extend the test suite)",
resources.Type().Field(i).Name,
)
}
}
}
// Make sure that we at least rename all resources
func TestAllResourcesRenamed(t *testing.T) {
bundle := mockBundle(config.Development)
resources := reflect.ValueOf(bundle.Config.Resources)
m := ProcessEnvironmentMode()
err := m.Apply(context.Background(), bundle)
require.NoError(t, err)
for i := 0; i < resources.NumField(); i++ {
field := resources.Field(i)
if field.Kind() == reflect.Map {
for _, key := range field.MapKeys() {
resource := field.MapIndex(key)
nameField := resource.Elem().FieldByName("Name")
if nameField.IsValid() && nameField.Kind() == reflect.String {
assert.True(
t,
strings.Contains(nameField.String(), "dev"),
"process_environment_mode should rename '%s' in '%s'",
key,
resources.Type().Field(i).Name,
)
}
}
}
}
}

View File

@ -37,7 +37,7 @@ type Workspace struct {
// CurrentUser holds the current user.
// This is set after configuration initialization.
CurrentUser *iam.User `json:"current_user,omitempty" bundle:"readonly"`
CurrentUser *User `json:"current_user,omitempty" bundle:"readonly"`
// Remote workspace base path for deployment state, for artifacts, as synchronization target.
// This defaults to "~/.bundle/${bundle.name}/${bundle.environment}" where "~" expands to
@ -57,6 +57,13 @@ type Workspace struct {
StatePath string `json:"state_path,omitempty"`
}
type User struct {
// A short name for the user, based on the user's UserName.
ShortName string `json:"short_name,omitempty" bundle:"readonly"`
*iam.User
}
func (w *Workspace) Client() (*databricks.WorkspaceClient, error) {
cfg := databricks.Config{
// Generic

View File

@ -15,9 +15,10 @@ func getSync(ctx context.Context, b *bundle.Bundle) (*sync.Sync, error) {
}
opts := sync.SyncOptions{
LocalPath: b.Config.Path,
RemotePath: b.Config.Workspace.FilesPath,
Full: false,
LocalPath: b.Config.Path,
RemotePath: b.Config.Workspace.FilesPath,
Full: false,
CurrentUser: b.Config.Workspace.CurrentUser.User,
SnapshotBasePath: cacheDir,
WorkspaceClient: b.WorkspaceClient(),