From 584c8d1b0ba22eb5ffb3060c4e12b2b6d8de4b05 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 20 Feb 2023 14:34:48 +0100 Subject: [PATCH] Allow synchronization to a directory inside a repo (#213) Before this commit this would error saying that the repo doesn't exist yet. With this commit it creates the directory, but only after checking that the repo exists. --- internal/sync_test.go | 68 ++++++++++++++++++++++++++++++++++++++++++ libs/sync/path.go | 47 ++++++++++++++++++----------- libs/sync/path_test.go | 16 ++++++++++ libs/sync/sync.go | 2 +- 4 files changed, 114 insertions(+), 19 deletions(-) diff --git a/internal/sync_test.go b/internal/sync_test.go index b2f8b317..8081f16c 100644 --- a/internal/sync_test.go +++ b/internal/sync_test.go @@ -461,3 +461,71 @@ func TestAccIncrementalSyncPythonNotebookDelete(t *testing.T) { f.Remove(t) assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) } + +func TestAccSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + wsc := databricks.Must(databricks.NewWorkspaceClient()) + ctx := context.Background() + + me, err := wsc.CurrentUser.Me(ctx) + require.NoError(t, err) + + // Hypothetical repo path doesn't exist. + nonExistingRepoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, RandomName("doesnt-exist-")) + err = sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath) + assert.ErrorContains(t, err, " does not exist; please create it first") + + // Paths nested under a hypothetical repo path should yield the same error. + nestedPath := path.Join(nonExistingRepoPath, "nested/directory") + err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath) + assert.ErrorContains(t, err, " does not exist; please create it first") +} + +func TestAccSyncEnsureRemotePathIsUsableIfRepoExists(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + wsc := databricks.Must(databricks.NewWorkspaceClient()) + ctx := context.Background() + _, remoteRepoPath := setupRepo(t, wsc, ctx) + + // Repo itself is usable. + err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath) + assert.NoError(t, err) + + // Path nested under repo path is usable. + nestedPath := path.Join(remoteRepoPath, "nested/directory") + err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath) + assert.NoError(t, err) + + // Verify that the directory has been created. + info, err := wsc.Workspace.GetStatusByPath(ctx, nestedPath) + require.NoError(t, err) + require.Equal(t, workspace.ObjectTypeDirectory, info.ObjectType) +} + +func TestAccSyncEnsureRemotePathIsUsableInWorkspace(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + wsc := databricks.Must(databricks.NewWorkspaceClient()) + ctx := context.Background() + me, err := wsc.CurrentUser.Me(ctx) + require.NoError(t, err) + + remotePath := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("ensure-path-exists-test-")) + err = sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath) + assert.NoError(t, err) + + // Clean up directory after test. + defer func() { + err := wsc.Workspace.Delete(ctx, workspace.Delete{ + Path: remotePath, + }) + assert.NoError(t, err) + }() + + // Verify that the directory has been created. + info, err := wsc.Workspace.GetStatusByPath(ctx, remotePath) + require.NoError(t, err) + require.Equal(t, workspace.ObjectTypeDirectory, info.ObjectType) +} diff --git a/libs/sync/path.go b/libs/sync/path.go index 6a87316f..633205da 100644 --- a/libs/sync/path.go +++ b/libs/sync/path.go @@ -49,15 +49,24 @@ func checkPathNestedUnderBasePaths(me *scim.User, p string) error { return nil } -// ensureRemotePathIsUsable checks if the specified path is nested under +func repoPathForPath(me *scim.User, remotePath string) string { + base := path.Clean(fmt.Sprintf("/Repos/%s", me.UserName)) + remotePath = path.Clean(remotePath) + for strings.HasPrefix(path.Dir(remotePath), base) && path.Dir(remotePath) != base { + remotePath = path.Dir(remotePath) + } + return remotePath +} + +// EnsureRemotePathIsUsable checks if the specified path is nested under // expected base paths and if it is a directory or repository. -func ensureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, path string) error { +func EnsureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClient, remotePath string) error { me, err := wsc.CurrentUser.Me(ctx) if err != nil { return err } - err = checkPathNestedUnderBasePaths(me, path) + err = checkPathNestedUnderBasePaths(me, remotePath) if err != nil { return err } @@ -65,27 +74,29 @@ func ensureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClie // Ensure that the remote path exists. // If it is a repo, it has to exist. // If it is a workspace path, it may not exist. - info, err := wsc.Workspace.GetStatusByPath(ctx, path) + info, err := wsc.Workspace.GetStatusByPath(ctx, remotePath) if err != nil { // We only deal with 404s below. if !apierr.IsMissing(err) { return err } - switch { - case strings.HasPrefix(path, "/Repos/"): - return fmt.Errorf("%s does not exist; please create it first", path) - case strings.HasPrefix(path, "/Users/"): - // The workspace path doesn't exist. Create it and try again. - err = wsc.Workspace.MkdirsByPath(ctx, path) - if err != nil { - return fmt.Errorf("unable to create directory at %s: %w", path, err) + // If the path is nested under a repo, the repo has to exist. + if strings.HasPrefix(remotePath, "/Repos/") { + repoPath := repoPathForPath(me, remotePath) + _, err = wsc.Workspace.GetStatusByPath(ctx, repoPath) + if err != nil && apierr.IsMissing(err) { + return fmt.Errorf("%s does not exist; please create it first", repoPath) } - info, err = wsc.Workspace.GetStatusByPath(ctx, path) - if err != nil { - return err - } - default: + } + + // The workspace path doesn't exist. Create it and try again. + err = wsc.Workspace.MkdirsByPath(ctx, remotePath) + if err != nil { + return fmt.Errorf("unable to create directory at %s: %w", remotePath, err) + } + info, err = wsc.Workspace.GetStatusByPath(ctx, remotePath) + if err != nil { return err } } @@ -105,5 +116,5 @@ func ensureRemotePathIsUsable(ctx context.Context, wsc *databricks.WorkspaceClie return nil } - return fmt.Errorf("%s points to a %s", path, strings.ToLower(info.ObjectType.String())) + return fmt.Errorf("%s points to a %s", remotePath, strings.ToLower(info.ObjectType.String())) } diff --git a/libs/sync/path_test.go b/libs/sync/path_test.go index 0671b634..b6421be7 100644 --- a/libs/sync/path_test.go +++ b/libs/sync/path_test.go @@ -37,3 +37,19 @@ func TestPathNestedUnderBasePaths(t *testing.T) { assert.NoError(t, checkPathNestedUnderBasePaths(&me, "/Users/jane@doe.com/./foo")) assert.NoError(t, checkPathNestedUnderBasePaths(&me, "/Users/jane@doe.com/foo/bar/qux")) } + +func TestPathToRepoPath(t *testing.T) { + me := scim.User{ + UserName: "jane@doe.com", + } + + assert.Equal(t, "/Repos/jane@doe.com/foo", repoPathForPath(&me, "/Repos/jane@doe.com/foo/bar/qux")) + assert.Equal(t, "/Repos/jane@doe.com/foo", repoPathForPath(&me, "/Repos/jane@doe.com/foo/bar")) + assert.Equal(t, "/Repos/jane@doe.com/foo", repoPathForPath(&me, "/Repos/jane@doe.com/foo")) + + // We expect this function to be called with a path nested under the user's repo path. + // If this is not the case it should return the input verbatim (albeit cleaned). + assert.Equal(t, "/Repos/jane@doe.com", repoPathForPath(&me, "/Repos/jane@doe.com")) + assert.Equal(t, "/Repos/hello@world.com/foo/bar/qux", repoPathForPath(&me, "/Repos/hello@world.com/foo/bar/qux")) + assert.Equal(t, "/Repos/hello@world.com/foo/bar/qux", repoPathForPath(&me, "/Repos/hello@world.com/foo/bar/qux/.")) +} diff --git a/libs/sync/sync.go b/libs/sync/sync.go index fe326f12..a7aef87c 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -47,7 +47,7 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { } // Verify that the remote path we're about to synchronize to is valid and allowed. - err = ensureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath) + err = EnsureRemotePathIsUsable(ctx, opts.WorkspaceClient, opts.RemotePath) if err != nil { return nil, err }