From 715a4dfb2153ed875ddee20fcd419df6089fa23b Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 15 Mar 2023 17:25:57 +0100 Subject: [PATCH] Path escape filepaths in the URL (#250) Before we were using url query escaping to escape the file path. This is wrong since the file path is a part of the URL path rather than URL query. These encoding schemes are similar but do not have identical encodings which was why we got these weird edge cases Fixed, and added nightly test for assert for this ``` 2023/03/15 16:07:50 [INFO] Action: PUT: .gitignore, a b/bar.py, c+d/uno.py, foo.py 2023/03/15 16:07:51 [INFO] Uploaded foo.py 2023/03/15 16:07:51 [INFO] Uploaded a b/bar.py 2023/03/15 16:07:51 [INFO] Uploaded .gitignore 2023/03/15 16:07:51 [INFO] Uploaded c+d/uno.py 2023/03/15 16:07:51 [INFO] Initial Sync Complete ``` ``` [VSCODE] bricks cli path: /Users/shreyas.goenka/.vscode/extensions/databricks.databricks-0.3.4-darwin-arm64/bin/bricks [VSCODE] sync command args: sync,.,/Repos/shreyas.goenka@databricks.com/sync-fail.ide,--watch,--output,json -------------------------------------------------------- Starting synchronization (4 files) Uploaded .gitignore Uploaded foo.py Uploaded c+d/uno.py Uploaded a b/bar.py Completed synchronization ``` --- internal/sync_test.go | 42 ++++++++++++++++++++++++++++++++ libs/sync/repofiles/repofiles.go | 2 +- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/internal/sync_test.go b/internal/sync_test.go index 43bf2a95..06ea4866 100644 --- a/internal/sync_test.go +++ b/internal/sync_test.go @@ -291,6 +291,48 @@ func TestAccNestedFolderSync(t *testing.T) { assertSync.snapshotContains(append(repoFiles, ".gitignore")) } +func TestAccNestedSpacePlusAndHashAreEscapedSync(t *testing.T) { + t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + + wsc := databricks.Must(databricks.NewWorkspaceClient()) + ctx := context.Background() + + localRepoPath, remoteRepoPath := setupRepo(t, wsc, ctx) + + // Run `bricks sync` in the background. + c := NewCobraTestRunner(t, "sync", localRepoPath, remoteRepoPath, "--watch") + c.RunBackground() + + assertSync := assertSync{ + t: t, + c: c, + w: wsc, + localRoot: localRepoPath, + remoteRoot: remoteRepoPath, + } + + // .gitignore is created by the sync process to enforce .databricks is not synced + assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore")) + + // New file + localFilePath := filepath.Join(localRepoPath, "dir1/a b+c/c+d e/e+f g#i.txt") + err := os.MkdirAll(filepath.Dir(localFilePath), 0o755) + assert.NoError(t, err) + f := testfile.CreateFile(t, localFilePath) + defer f.Close(t) + assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "dir1")) + assertSync.remoteDirContent(ctx, "dir1", []string{"a b+c"}) + assertSync.remoteDirContent(ctx, "dir1/a b+c", []string{"c+d e"}) + assertSync.remoteDirContent(ctx, "dir1/a b+c/c+d e", []string{"e+f g#i.txt"}) + assertSync.snapshotContains(append(repoFiles, ".gitignore", filepath.FromSlash("dir1/a b+c/c+d e/e+f g#i.txt"))) + + // delete + f.Remove(t) + // directories are not cleaned up right now. This is not ideal + assertSync.remoteDirContent(ctx, "dir1/a b+c/c+d e", []string{}) + assertSync.snapshotContains(append(repoFiles, ".gitignore")) +} + // sync does not clean up empty directories from the workspace file system. // This is a check for the edge case when a user does the following: // diff --git a/libs/sync/repofiles/repofiles.go b/libs/sync/repofiles/repofiles.go index fa2b917b..8fcabc11 100644 --- a/libs/sync/repofiles/repofiles.go +++ b/libs/sync/repofiles/repofiles.go @@ -62,7 +62,7 @@ func (r *RepoFiles) writeRemote(ctx context.Context, relativePath string, conten if err != nil { return err } - escapedPath := url.QueryEscape(strings.TrimLeft(remotePath, "/")) + escapedPath := url.PathEscape(strings.TrimLeft(remotePath, "/")) apiPath := fmt.Sprintf("/api/2.0/workspace-files/import-file/%s?overwrite=true", escapedPath) err = apiClient.Do(ctx, http.MethodPost, apiPath, content, nil)