This commit is contained in:
Lennart Kats 2023-07-15 16:10:40 +02:00
parent d68d054160
commit 9c064a8ca4
12 changed files with 167 additions and 42 deletions

View File

@ -3,6 +3,7 @@ package mutator
import ( import (
"context" "context"
"strings" "strings"
"unicode"
"github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config"
@ -27,8 +28,21 @@ func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) error
} }
b.Config.Workspace.CurrentUser = &config.User{ b.Config.Workspace.CurrentUser = &config.User{
ShortName: strings.Split(me.UserName, "@")[0], ShortName: getShortUserName(me.UserName),
User: me, User: me,
} }
return nil return nil
} }
// Get a short-form username, based on the user's primary email address.
// We leave the full range of unicode letters in tact, but remove all "special" characters,
// including dots, which are not supported in e.g. experiment names.
func getShortUserName(emailAddress string) string {
r := []rune(strings.Split(emailAddress, "@")[0])
for i := 0; i < len(r); i++ {
if !unicode.IsLetter(r[i]) {
r[i] = '_'
}
}
return string(r)
}

View File

@ -1,3 +1,40 @@
package mutator package mutator
// We need to implement workspace client mocking to implement this test. import "testing"
func TestPopulateCurrentUser(t *testing.T) {
// We need to implement workspace client mocking to implement this test.
}
func TestGetShortUserName(t *testing.T) {
tests := []struct {
name string
email string
expected string
}{
{
name: "test alphanumeric characters",
email: "test.user@example.com",
expected: "test_user",
},
{
name: "test unicode characters",
email: "tést.üser@example.com",
expected: "tést_üser",
},
{
name: "test special characters",
email: "test$.user@example.com",
expected: "test__user",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := getShortUserName(tt.email)
if result != tt.expected {
t.Errorf("getShortUserName(%q) = %q; expected %q", tt.email, result, tt.expected)
}
})
}
}

View File

@ -10,12 +10,11 @@ import (
"github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config"
"github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/ml" "github.com/databricks/databricks-sdk-go/service/ml"
) )
type processEnvironmentMode struct {} type processEnvironmentMode struct{}
const developmentConcurrentRuns = 4 const developmentConcurrentRuns = 4
@ -82,23 +81,37 @@ func transformDevelopmentMode(b *bundle.Bundle) error {
} }
func validateDevelopmentMode(b *bundle.Bundle) error { func validateDevelopmentMode(b *bundle.Bundle) error {
if isUserSpecificDeployment(b) { if path := findIncorrectPath(b, config.Development); path != "" {
return fmt.Errorf("environment with 'mode: development' must deploy to a location specific to the user, and should e.g. set 'root_path: ~/.bundle/${bundle.name}/${bundle.environment}'") return fmt.Errorf("%s must start with '~/' or contain the current username when using 'mode: development'", path)
} }
return nil return nil
} }
func isUserSpecificDeployment(b *bundle.Bundle) bool { func findIncorrectPath(b *bundle.Bundle, mode config.Mode) string {
username := b.Config.Workspace.CurrentUser.UserName username := b.Config.Workspace.CurrentUser.UserName
return !strings.Contains(b.Config.Workspace.StatePath, username) || containsExpected := true
!strings.Contains(b.Config.Workspace.ArtifactsPath, username) || if mode == config.Production {
!strings.Contains(b.Config.Workspace.FilesPath, username) containsExpected = false
}
if strings.Contains(b.Config.Workspace.RootPath, username) != containsExpected && b.Config.Workspace.RootPath != "" {
return "root_path"
}
if strings.Contains(b.Config.Workspace.StatePath, username) != containsExpected {
return "state_path"
}
if strings.Contains(b.Config.Workspace.FilesPath, username) != containsExpected {
return "files_path"
}
if strings.Contains(b.Config.Workspace.ArtifactsPath, username) != containsExpected {
return "artifacts_path"
}
return ""
} }
func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUsed bool) error { func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUsed bool) error {
if b.Config.Bundle.Git.Inferred { if b.Config.Bundle.Git.Inferred {
TODO: show a nice human error here? :( return fmt.Errorf("environment with 'mode: production' must specify an explicit 'environments.git' configuration")
return fmt.Errorf("environment with 'mode: production' must specify an explicit 'git' configuration")
} }
r := b.Config.Resources r := b.Config.Resources
@ -109,12 +122,17 @@ func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUs
} }
if !isPrincipalUsed { if !isPrincipalUsed {
if isUserSpecificDeployment(b) { if path := findIncorrectPath(b, config.Production); path != "" {
return fmt.Errorf("environment with 'mode: development' must deploy to a location specific to the user, and should e.g. set 'root_path: ~/.bundle/${bundle.name}/${bundle.environment}'") message := "%s must not contain the current username when using 'mode: production' without a service principal"
if path == "root_path" {
return fmt.Errorf(message+"\n tip: set workspace.root_path to a shared path such as /Shared/.bundle/${bundle.name}/${bundle.environment}", path)
} else {
return fmt.Errorf(message, path)
}
} }
if !arePermissionsSetExplicitly(r) { if !arePermissionsSetExplicitly(r) {
return fmt.Errorf("environment with 'mode: production' must set permissions and run_as for all resources (when not using service principals)") return fmt.Errorf("permissions and run_as must be set when using 'mode_production' without a service principals")
} }
} }
return nil return nil
@ -155,7 +173,7 @@ func arePermissionsSetExplicitly(r config.Resources) bool {
return false return false
} }
} }
return false return true
} }
func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) error { func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) error {
@ -167,7 +185,7 @@ func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) er
} }
return transformDevelopmentMode(b) return transformDevelopmentMode(b)
case config.Production: case config.Production:
isPrincipal, err := m.isServicePrincipalUsed(ctx, b) isPrincipal, err := isServicePrincipalUsed(ctx, b)
if err != nil { if err != nil {
return err return err
} }

View File

@ -23,6 +23,10 @@ func mockBundle(mode config.Mode) *bundle.Bundle {
Config: config.Root{ Config: config.Root{
Bundle: config.Bundle{ Bundle: config.Bundle{
Mode: mode, Mode: mode,
Git: config.Git{
OriginURL: "http://origin",
Branch: "main",
},
}, },
Workspace: config.Workspace{ Workspace: config.Workspace{
CurrentUser: &config.User{ CurrentUser: &config.User{
@ -83,31 +87,61 @@ func TestProcessEnvironmentModeDefault(t *testing.T) {
func TestProcessEnvironmentModeProduction(t *testing.T) { func TestProcessEnvironmentModeProduction(t *testing.T) {
bundle := mockBundle(config.Production) bundle := mockBundle(config.Production)
err := validateProductionMode(context.Background(), bundle, false)
require.ErrorContains(t, err, "state_path")
bundle.Config.Workspace.StatePath = "/Shared/.bundle/x/y/state" bundle.Config.Workspace.StatePath = "/Shared/.bundle/x/y/state"
bundle.Config.Workspace.ArtifactsPath = "/Shared/.bundle/x/y/artifacts" bundle.Config.Workspace.ArtifactsPath = "/Shared/.bundle/x/y/artifacts"
bundle.Config.Workspace.FilesPath = "/Shared/.bundle/x/y/files" bundle.Config.Workspace.FilesPath = "/Shared/.bundle/x/y/files"
err := validateProductionMode(context.Background(), bundle, false) err = validateProductionMode(context.Background(), bundle, false)
require.ErrorContains(t, err, "permissions")
permissions := []resources.Permission{
{
Level: "CAN_MANAGE",
UserName: "user@company.com",
},
}
bundle.Config.Resources.Jobs["job1"].Permissions = permissions
bundle.Config.Resources.Jobs["job1"].RunAs = &jobs.JobRunAs{UserName: "user@company.com"}
bundle.Config.Resources.Pipelines["pipeline1"].Permissions = permissions
bundle.Config.Resources.Experiments["experiment1"].Permissions = permissions
bundle.Config.Resources.Experiments["experiment2"].Permissions = permissions
bundle.Config.Resources.Models["model1"].Permissions = permissions
err = validateProductionMode(context.Background(), bundle, false)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, "job1", bundle.Config.Resources.Jobs["job1"].Name) assert.Equal(t, "job1", bundle.Config.Resources.Jobs["job1"].Name)
assert.Equal(t, "pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name) assert.Equal(t, "pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name)
assert.False(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development) assert.False(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development)
} }
func TestProcessEnvironmentModeProductionFails(t *testing.T) { func TestProcessEnvironmentModeProductionGit(t *testing.T) {
bundle := mockBundle(config.Production) bundle := mockBundle(config.Production)
err := validateProductionMode(context.Background(), bundle, false) // Pretend the user didn't set Git configuration explicitly
bundle.Config.Bundle.Git.Inferred = true
err := validateProductionMode(context.Background(), bundle, false)
require.ErrorContains(t, err, "git")
bundle.Config.Bundle.Git.Inferred = false
// TODO: fail if on wrong branch
require.Error(t, err)
} }
func TestProcessEnvironmentModeProductionOkForPrincipal(t *testing.T) { func TestProcessEnvironmentModeProductionOkForPrincipal(t *testing.T) {
bundle := mockBundle(config.Production) bundle := mockBundle(config.Production)
// Our environment has all kinds of problems when not using service principals ...
err := validateProductionMode(context.Background(), bundle, false) err := validateProductionMode(context.Background(), bundle, false)
require.Error(t, err)
// ... but we're much less strict when a principal is used
err = validateProductionMode(context.Background(), bundle, true)
require.NoError(t, err) require.NoError(t, err)
} }

View File

@ -200,6 +200,9 @@ func (r *Root) MergeEnvironment(env *Environment) error {
} }
if env.Git.Branch != "" { if env.Git.Branch != "" {
if r.Bundle.Git.Inferred && r.Bundle.Git.Branch != env.Git.Branch {
return fmt.Errorf("not on the right Git branch:\n expected according to configuration: %s\n actual: %s", env.Git.Branch, r.Bundle.Git.Branch)
}
r.Bundle.Git.Branch = env.Git.Branch r.Bundle.Git.Branch = env.Git.Branch
r.Bundle.Git.Inferred = false r.Bundle.Git.Inferred = false
} }

View File

@ -1,4 +1,10 @@
bundle: bundle:
name: autoload git config test name: autoload git config test
git:
branch: foo environments:
development:
default: true
production:
git:
branch: production-branch

View File

@ -6,10 +6,13 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func TestGitConfig(t *testing.T) { func TestAutoLoad(t *testing.T) {
b := load(t, "./autoload_git") b := load(t, "./autoload_git")
assert.Equal(t, "foo", b.Config.Bundle.Git.Branch) assert.NotEqual(t, "", b.Config.Bundle.Git.Branch)
sshUrl := "git@github.com:databricks/cli.git" assert.Contains(t, b.Config.Bundle.Git.OriginURL, "/cli")
httpsUrl := "https://github.com/databricks/cli" }
assert.Contains(t, []string{sshUrl, httpsUrl}, b.Config.Bundle.Git.OriginURL)
func TestWrongBranch(t *testing.T) {
err := loadEnvironmentWithError(t, "./autoload_git", "production")
assert.ErrorContains(t, err, "not on the right Git branch")
} }

View File

@ -23,6 +23,7 @@ environments:
development: false development: false
production: production:
mode: production
resources: resources:
pipelines: pipelines:
nyc_taxi_pipeline: nyc_taxi_pipeline:

View File

@ -23,3 +23,10 @@ func loadEnvironment(t *testing.T, path, env string) *bundle.Bundle {
require.NoError(t, err) require.NoError(t, err)
return b return b
} }
func loadEnvironmentWithError(t *testing.T, path, env string) error {
b := load(t, path)
err := bundle.Apply(context.Background(), b, mutator.SelectEnvironment(env))
require.Error(t, err)
return err
}

View File

@ -499,8 +499,6 @@ func TestAccSyncIncrementalSyncPythonNotebookDelete(t *testing.T) {
} }
func TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) { func TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient()) wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background() ctx := context.Background()
@ -509,29 +507,27 @@ func TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) {
// Hypothetical repo path doesn't exist. // Hypothetical repo path doesn't exist.
nonExistingRepoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("doesnt-exist-")) nonExistingRepoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("doesnt-exist-"))
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath) err = sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath, nil)
assert.ErrorContains(t, err, " does not exist; please create it first") assert.ErrorContains(t, err, " does not exist; please create it first")
// Paths nested under a hypothetical repo path should yield the same error. // Paths nested under a hypothetical repo path should yield the same error.
nestedPath := path.Join(nonExistingRepoPath, "nested/directory") nestedPath := path.Join(nonExistingRepoPath, "nested/directory")
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath) err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
assert.ErrorContains(t, err, " does not exist; please create it first") assert.ErrorContains(t, err, " does not exist; please create it first")
} }
func TestAccSyncEnsureRemotePathIsUsableIfRepoExists(t *testing.T) { func TestAccSyncEnsureRemotePathIsUsableIfRepoExists(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient()) wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background() ctx := context.Background()
_, remoteRepoPath := setupRepo(t, wsc, ctx) _, remoteRepoPath := setupRepo(t, wsc, ctx)
// Repo itself is usable. // Repo itself is usable.
err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath) err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath, nil)
assert.NoError(t, err) assert.NoError(t, err)
// Path nested under repo path is usable. // Path nested under repo path is usable.
nestedPath := path.Join(remoteRepoPath, "nested/directory") nestedPath := path.Join(remoteRepoPath, "nested/directory")
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath) err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
assert.NoError(t, err) assert.NoError(t, err)
// Verify that the directory has been created. // Verify that the directory has been created.
@ -549,7 +545,7 @@ func TestAccSyncEnsureRemotePathIsUsableInWorkspace(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
remotePath := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("ensure-path-exists-test-")) remotePath := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("ensure-path-exists-test-"))
err = sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath) err = sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath, me)
assert.NoError(t, err) assert.NoError(t, err)
// Clean up directory after test. // Clean up directory after test.

View File

@ -24,10 +24,13 @@ func repoPathForPath(me *iam.User, remotePath string) string {
// EnsureRemotePathIsUsable checks if the specified path is nested under // EnsureRemotePathIsUsable checks if the specified path is nested under
// expected base paths and if it is a directory or repository. // expected base paths and if it is a directory or repository.
func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string) error { func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string, me *iam.User) error {
me, err := wsc.CurrentUser.Me(ctx) var err error
if err != nil { if me == nil {
return err me, err = wsc.CurrentUser.Me(ctx)
if err != nil {
return err
}
} }
// Ensure that the remote path exists. // Ensure that the remote path exists.

View File

@ -9,6 +9,7 @@ import (
"github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/log"
"github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/iam"
) )
type SyncOptions struct { type SyncOptions struct {
@ -23,6 +24,8 @@ type SyncOptions struct {
WorkspaceClient *databricks.WorkspaceClient WorkspaceClient *databricks.WorkspaceClient
CurrentUser *iam.User
Host string Host string
} }
@ -50,7 +53,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
} }
// Verify that the remote path we're about to synchronize to is valid and allowed. // Verify that the remote path we're about to synchronize to is valid and allowed.
err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath) err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath, opts.CurrentUser)
if err != nil { if err != nil {
return nil, err return nil, err
} }