This commit is contained in:
Lennart Kats 2023-07-15 16:10:40 +02:00
parent d68d054160
commit 9c064a8ca4
12 changed files with 167 additions and 42 deletions

View File

@ -3,6 +3,7 @@ package mutator
import (
"context"
"strings"
"unicode"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
@ -27,8 +28,21 @@ func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) error
}
b.Config.Workspace.CurrentUser = &config.User{
ShortName: strings.Split(me.UserName, "@")[0],
ShortName: getShortUserName(me.UserName),
User: me,
}
return nil
}
// Get a short-form username, based on the user's primary email address.
// We leave the full range of unicode letters in tact, but remove all "special" characters,
// including dots, which are not supported in e.g. experiment names.
func getShortUserName(emailAddress string) string {
r := []rune(strings.Split(emailAddress, "@")[0])
for i := 0; i < len(r); i++ {
if !unicode.IsLetter(r[i]) {
r[i] = '_'
}
}
return string(r)
}

View File

@ -1,3 +1,40 @@
package mutator
import "testing"
func TestPopulateCurrentUser(t *testing.T) {
// We need to implement workspace client mocking to implement this test.
}
func TestGetShortUserName(t *testing.T) {
tests := []struct {
name string
email string
expected string
}{
{
name: "test alphanumeric characters",
email: "test.user@example.com",
expected: "test_user",
},
{
name: "test unicode characters",
email: "tést.üser@example.com",
expected: "tést_üser",
},
{
name: "test special characters",
email: "test$.user@example.com",
expected: "test__user",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := getShortUserName(tt.email)
if result != tt.expected {
t.Errorf("getShortUserName(%q) = %q; expected %q", tt.email, result, tt.expected)
}
})
}
}

View File

@ -10,7 +10,6 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/databricks/databricks-sdk-go/service/ml"
)
@ -82,23 +81,37 @@ func transformDevelopmentMode(b *bundle.Bundle) error {
}
func validateDevelopmentMode(b *bundle.Bundle) error {
if isUserSpecificDeployment(b) {
return fmt.Errorf("environment with 'mode: development' must deploy to a location specific to the user, and should e.g. set 'root_path: ~/.bundle/${bundle.name}/${bundle.environment}'")
if path := findIncorrectPath(b, config.Development); path != "" {
return fmt.Errorf("%s must start with '~/' or contain the current username when using 'mode: development'", path)
}
return nil
}
func isUserSpecificDeployment(b *bundle.Bundle) bool {
func findIncorrectPath(b *bundle.Bundle, mode config.Mode) string {
username := b.Config.Workspace.CurrentUser.UserName
return !strings.Contains(b.Config.Workspace.StatePath, username) ||
!strings.Contains(b.Config.Workspace.ArtifactsPath, username) ||
!strings.Contains(b.Config.Workspace.FilesPath, username)
containsExpected := true
if mode == config.Production {
containsExpected = false
}
if strings.Contains(b.Config.Workspace.RootPath, username) != containsExpected && b.Config.Workspace.RootPath != "" {
return "root_path"
}
if strings.Contains(b.Config.Workspace.StatePath, username) != containsExpected {
return "state_path"
}
if strings.Contains(b.Config.Workspace.FilesPath, username) != containsExpected {
return "files_path"
}
if strings.Contains(b.Config.Workspace.ArtifactsPath, username) != containsExpected {
return "artifacts_path"
}
return ""
}
func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUsed bool) error {
if b.Config.Bundle.Git.Inferred {
TODO: show a nice human error here? :(
return fmt.Errorf("environment with 'mode: production' must specify an explicit 'git' configuration")
return fmt.Errorf("environment with 'mode: production' must specify an explicit 'environments.git' configuration")
}
r := b.Config.Resources
@ -109,12 +122,17 @@ func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUs
}
if !isPrincipalUsed {
if isUserSpecificDeployment(b) {
return fmt.Errorf("environment with 'mode: development' must deploy to a location specific to the user, and should e.g. set 'root_path: ~/.bundle/${bundle.name}/${bundle.environment}'")
if path := findIncorrectPath(b, config.Production); path != "" {
message := "%s must not contain the current username when using 'mode: production' without a service principal"
if path == "root_path" {
return fmt.Errorf(message+"\n tip: set workspace.root_path to a shared path such as /Shared/.bundle/${bundle.name}/${bundle.environment}", path)
} else {
return fmt.Errorf(message, path)
}
}
if !arePermissionsSetExplicitly(r) {
return fmt.Errorf("environment with 'mode: production' must set permissions and run_as for all resources (when not using service principals)")
return fmt.Errorf("permissions and run_as must be set when using 'mode_production' without a service principals")
}
}
return nil
@ -155,7 +173,7 @@ func arePermissionsSetExplicitly(r config.Resources) bool {
return false
}
}
return false
return true
}
func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) error {
@ -167,7 +185,7 @@ func (m *processEnvironmentMode) Apply(ctx context.Context, b *bundle.Bundle) er
}
return transformDevelopmentMode(b)
case config.Production:
isPrincipal, err := m.isServicePrincipalUsed(ctx, b)
isPrincipal, err := isServicePrincipalUsed(ctx, b)
if err != nil {
return err
}

View File

@ -23,6 +23,10 @@ func mockBundle(mode config.Mode) *bundle.Bundle {
Config: config.Root{
Bundle: config.Bundle{
Mode: mode,
Git: config.Git{
OriginURL: "http://origin",
Branch: "main",
},
},
Workspace: config.Workspace{
CurrentUser: &config.User{
@ -83,31 +87,61 @@ func TestProcessEnvironmentModeDefault(t *testing.T) {
func TestProcessEnvironmentModeProduction(t *testing.T) {
bundle := mockBundle(config.Production)
err := validateProductionMode(context.Background(), bundle, false)
require.ErrorContains(t, err, "state_path")
bundle.Config.Workspace.StatePath = "/Shared/.bundle/x/y/state"
bundle.Config.Workspace.ArtifactsPath = "/Shared/.bundle/x/y/artifacts"
bundle.Config.Workspace.FilesPath = "/Shared/.bundle/x/y/files"
err := validateProductionMode(context.Background(), bundle, false)
err = validateProductionMode(context.Background(), bundle, false)
require.ErrorContains(t, err, "permissions")
permissions := []resources.Permission{
{
Level: "CAN_MANAGE",
UserName: "user@company.com",
},
}
bundle.Config.Resources.Jobs["job1"].Permissions = permissions
bundle.Config.Resources.Jobs["job1"].RunAs = &jobs.JobRunAs{UserName: "user@company.com"}
bundle.Config.Resources.Pipelines["pipeline1"].Permissions = permissions
bundle.Config.Resources.Experiments["experiment1"].Permissions = permissions
bundle.Config.Resources.Experiments["experiment2"].Permissions = permissions
bundle.Config.Resources.Models["model1"].Permissions = permissions
err = validateProductionMode(context.Background(), bundle, false)
require.NoError(t, err)
assert.Equal(t, "job1", bundle.Config.Resources.Jobs["job1"].Name)
assert.Equal(t, "pipeline1", bundle.Config.Resources.Pipelines["pipeline1"].Name)
assert.False(t, bundle.Config.Resources.Pipelines["pipeline1"].PipelineSpec.Development)
}
func TestProcessEnvironmentModeProductionFails(t *testing.T) {
func TestProcessEnvironmentModeProductionGit(t *testing.T) {
bundle := mockBundle(config.Production)
err := validateProductionMode(context.Background(), bundle, false)
// Pretend the user didn't set Git configuration explicitly
bundle.Config.Bundle.Git.Inferred = true
err := validateProductionMode(context.Background(), bundle, false)
require.ErrorContains(t, err, "git")
bundle.Config.Bundle.Git.Inferred = false
// TODO: fail if on wrong branch
require.Error(t, err)
}
func TestProcessEnvironmentModeProductionOkForPrincipal(t *testing.T) {
bundle := mockBundle(config.Production)
// Our environment has all kinds of problems when not using service principals ...
err := validateProductionMode(context.Background(), bundle, false)
require.Error(t, err)
// ... but we're much less strict when a principal is used
err = validateProductionMode(context.Background(), bundle, true)
require.NoError(t, err)
}

View File

@ -200,6 +200,9 @@ func (r *Root) MergeEnvironment(env *Environment) error {
}
if env.Git.Branch != "" {
if r.Bundle.Git.Inferred && r.Bundle.Git.Branch != env.Git.Branch {
return fmt.Errorf("not on the right Git branch:\n expected according to configuration: %s\n actual: %s", env.Git.Branch, r.Bundle.Git.Branch)
}
r.Bundle.Git.Branch = env.Git.Branch
r.Bundle.Git.Inferred = false
}

View File

@ -1,4 +1,10 @@
bundle:
name: autoload git config test
environments:
development:
default: true
production:
git:
branch: foo
branch: production-branch

View File

@ -6,10 +6,13 @@ import (
"github.com/stretchr/testify/assert"
)
func TestGitConfig(t *testing.T) {
func TestAutoLoad(t *testing.T) {
b := load(t, "./autoload_git")
assert.Equal(t, "foo", b.Config.Bundle.Git.Branch)
sshUrl := "git@github.com:databricks/cli.git"
httpsUrl := "https://github.com/databricks/cli"
assert.Contains(t, []string{sshUrl, httpsUrl}, b.Config.Bundle.Git.OriginURL)
assert.NotEqual(t, "", b.Config.Bundle.Git.Branch)
assert.Contains(t, b.Config.Bundle.Git.OriginURL, "/cli")
}
func TestWrongBranch(t *testing.T) {
err := loadEnvironmentWithError(t, "./autoload_git", "production")
assert.ErrorContains(t, err, "not on the right Git branch")
}

View File

@ -23,6 +23,7 @@ environments:
development: false
production:
mode: production
resources:
pipelines:
nyc_taxi_pipeline:

View File

@ -23,3 +23,10 @@ func loadEnvironment(t *testing.T, path, env string) *bundle.Bundle {
require.NoError(t, err)
return b
}
func loadEnvironmentWithError(t *testing.T, path, env string) error {
b := load(t, path)
err := bundle.Apply(context.Background(), b, mutator.SelectEnvironment(env))
require.Error(t, err)
return err
}

View File

@ -499,8 +499,6 @@ func TestAccSyncIncrementalSyncPythonNotebookDelete(t *testing.T) {
}
func TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
@ -509,29 +507,27 @@ func TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) {
// Hypothetical repo path doesn't exist.
nonExistingRepoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("doesnt-exist-"))
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath)
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath, nil)
assert.ErrorContains(t, err, " does not exist; please create it first")
// Paths nested under a hypothetical repo path should yield the same error.
nestedPath := path.Join(nonExistingRepoPath, "nested/directory")
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath)
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
assert.ErrorContains(t, err, " does not exist; please create it first")
}
func TestAccSyncEnsureRemotePathIsUsableIfRepoExists(t *testing.T) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
_, remoteRepoPath := setupRepo(t, wsc, ctx)
// Repo itself is usable.
err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath)
err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath, nil)
assert.NoError(t, err)
// Path nested under repo path is usable.
nestedPath := path.Join(remoteRepoPath, "nested/directory")
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath)
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
assert.NoError(t, err)
// Verify that the directory has been created.
@ -549,7 +545,7 @@ func TestAccSyncEnsureRemotePathIsUsableInWorkspace(t *testing.T) {
require.NoError(t, err)
remotePath := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("ensure-path-exists-test-"))
err = sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath)
err = sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath, me)
assert.NoError(t, err)
// Clean up directory after test.

View File

@ -24,11 +24,14 @@ func repoPathForPath(me *iam.User, remotePath string) string {
// EnsureRemotePathIsUsable checks if the specified path is nested under
// expected base paths and if it is a directory or repository.
func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string) error {
me, err := wsc.CurrentUser.Me(ctx)
func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string, me *iam.User) error {
var err error
if me == nil {
me, err = wsc.CurrentUser.Me(ctx)
if err != nil {
return err
}
}
// Ensure that the remote path exists.
// If it is a repo, it has to exist.

View File

@ -9,6 +9,7 @@ import (
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/log"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/iam"
)
type SyncOptions struct {
@ -23,6 +24,8 @@ type SyncOptions struct {
WorkspaceClient *databricks.WorkspaceClient
CurrentUser *iam.User
Host string
}
@ -50,7 +53,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) {
}
// Verify that the remote path we're about to synchronize to is valid and allowed.
err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath)
err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath, opts.CurrentUser)
if err != nil {
return nil, err
}