databricks-cli/integration/cmd/sync/sync_test.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

570 lines
18 KiB
Go
Raw Normal View History

package sync_test
import (
"context"
"encoding/json"
"fmt"
"io"
Add directory tracking to sync (#425) ## Changes This change replaces usage of the `repofiles` package with the `filer` package to consolidate WSFS code paths. The `repofiles` package implemented the following behavior. If a file at `foo/bar.txt` was created and removed, the directory `foo` was kept around because we do not perform directory tracking. If subsequently, a file at `foo` was created, it resulted in an `fs.ErrExist` because it is impossible to overwrite a directory. It would then perform a recursive delete of the path if this happened and retry the file write. To make this use case work without resorting to a recursive delete on conflict, we need to implement directory tracking as part of sync. The approach in this commit is as follows: 1. Maintain set of directories needed for current set of files. Compare to previous set of files. This results in mkdir of added directories and rmdir of removed directories. 2. Creation of new directories should happen prior to writing files. Otherwise, many file writes may race to create the same parent directories, resulting in additional API calls. Removal of existing directories should happen after removing files. 3. Making new directories can be deduped across common prefixes where only the longest prefix is created recursively. 4. Removing existing directories must happen sequentially, starting with the longest prefix. 5. Removal of directories is a best effort. It fails only if the directory is not empty, and if this happens we know something placed a file or directory manually, outside of sync. ## Tests * Existing integration tests pass (modified where it used to assert directories weren't cleaned up) * New integration test to confirm the inability to remove a directory doesn't fail the sync run
2023-06-12 11:44:00 +00:00
"io/fs"
"net/http"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"testing"
"time"
"github.com/databricks/cli/internal/acc"
"github.com/databricks/cli/internal/testcli"
"github.com/databricks/cli/internal/testutil"
Add directory tracking to sync (#425) ## Changes This change replaces usage of the `repofiles` package with the `filer` package to consolidate WSFS code paths. The `repofiles` package implemented the following behavior. If a file at `foo/bar.txt` was created and removed, the directory `foo` was kept around because we do not perform directory tracking. If subsequently, a file at `foo` was created, it resulted in an `fs.ErrExist` because it is impossible to overwrite a directory. It would then perform a recursive delete of the path if this happened and retry the file write. To make this use case work without resorting to a recursive delete on conflict, we need to implement directory tracking as part of sync. The approach in this commit is as follows: 1. Maintain set of directories needed for current set of files. Compare to previous set of files. This results in mkdir of added directories and rmdir of removed directories. 2. Creation of new directories should happen prior to writing files. Otherwise, many file writes may race to create the same parent directories, resulting in additional API calls. Removal of existing directories should happen after removing files. 3. Making new directories can be deduped across common prefixes where only the longest prefix is created recursively. 4. Removing existing directories must happen sequentially, starting with the longest prefix. 5. Removal of directories is a best effort. It fails only if the directory is not empty, and if this happens we know something placed a file or directory manually, outside of sync. ## Tests * Existing integration tests pass (modified where it used to assert directories weren't cleaned up) * New integration test to confirm the inability to remove a directory doesn't fail the sync run
2023-06-12 11:44:00 +00:00
"github.com/databricks/cli/libs/filer"
"github.com/databricks/cli/libs/sync"
"github.com/databricks/cli/libs/testfile"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/client"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var (
repoUrl = "https://github.com/databricks/databricks-empty-ide-project.git"
repoFiles = []string{}
)
// This test needs auth env vars to run.
// Please run using the deco env test or deco env shell
func setupRepo(t *testing.T, wsc *databricks.WorkspaceClient, ctx context.Context) (localRoot, remoteRoot string) {
me, err := wsc.CurrentUser.Me(ctx)
require.NoError(t, err)
repoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, testutil.RandomName("empty-repo-sync-integration-"))
Bump github.com/databricks/databricks-sdk-go from 0.47.0 to 0.48.0 (#1810) Bumps [github.com/databricks/databricks-sdk-go](https://github.com/databricks/databricks-sdk-go) from 0.47.0 to 0.48.0. <details> <summary>Release notes</summary> <p><em>Sourced from <a href="https://github.com/databricks/databricks-sdk-go/releases">github.com/databricks/databricks-sdk-go's releases</a>.</em></p> <blockquote> <h2>v0.48.0</h2> <h3>Internal Changes</h3> <ul> <li>Update SDK to latest OpenAPI spec (<a href="https://redirect.github.com/databricks/databricks-sdk-go/pull/1057">#1057</a>).</li> </ul> <p>Note: This release contains breaking changes, please see the API changes below for more details.</p> <h3>API Changes:</h3> <ul> <li>Added <code>DefaultSourceCodePath</code> and <code>Resources</code> fields for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/apps#App">apps.App</a>.</li> <li>Added <code>Resources</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/apps#CreateAppRequest">apps.CreateAppRequest</a>.</li> <li>Added <code>Resources</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/apps#UpdateAppRequest">apps.UpdateAppRequest</a>.</li> <li>Added <code>Schema</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/pipelines#CreatePipeline">pipelines.CreatePipeline</a>.</li> <li>Added <code>Schema</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/pipelines#EditPipeline">pipelines.EditPipeline</a>.</li> <li>Added <code>Schema</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/pipelines#PipelineSpec">pipelines.PipelineSpec</a>.</li> <li>[Breaking] Changed <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#CreateCredentialsRequest">workspace.CreateCredentialsRequest</a>.</li> <li>[Breaking] Changed <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to type <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service.</li> <li>[Breaking] Changed <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#DeleteCredentialsRequest">workspace.DeleteCredentialsRequest</a>.</li> <li>[Breaking] Changed <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to return <code>any</code>.</li> <li>Changed <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to type <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service.</li> <li>[Breaking] Changed <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GetCredentialsRequest">workspace.GetCredentialsRequest</a>.</li> <li>Changed <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to type <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service.</li> <li>[Breaking] Changed <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to return <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GetCredentialsResponse">workspace.GetCredentialsResponse</a>.</li> <li>[Breaking] Changed <code>List</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to return <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ListCredentialsResponse">workspace.ListCredentialsResponse</a>.</li> <li>Changed <code>List</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to type <code>List</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service.</li> <li>Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to type <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service.</li> <li>[Breaking] Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to return <code>any</code>.</li> <li>[Breaking] Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#UpdateCredentialsRequest">workspace.UpdateCredentialsRequest</a>.</li> <li>Changed <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to type <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service.</li> <li>[Breaking] Changed <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#CreateRepoRequest">workspace.CreateRepoRequest</a>.</li> <li>[Breaking] Changed <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to return <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#CreateRepoResponse">workspace.CreateRepoResponse</a>.</li> <li>[Breaking] Changed <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to return <code>any</code>.</li> <li>Changed <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to type <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service.</li> <li>[Breaking] Changed <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to return <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GetRepoResponse">workspace.GetRepoResponse</a>.</li> <li>Changed <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to type <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service.</li> <li>Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to return <code>any</code>.</li> <li>[Breaking] Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#UpdateRepoRequest">workspace.UpdateRepoRequest</a>.</li> <li>Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to type <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service.</li> <li>[Breaking] Changed <code>CredentialId</code> and <code>GitProvider</code> fields for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#CreateCredentialsResponse">workspace.CreateCredentialsResponse</a> to be required.</li> <li>Changed <code>CredentialId</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#CredentialInfo">workspace.CredentialInfo</a> to be required.</li> <li>Changed <code>CredentialId</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GetCredentialsResponse">workspace.GetCredentialsResponse</a> to be required.</li> <li>Changed <code>Patterns</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#SparseCheckout">workspace.SparseCheckout</a> to type <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#List">workspace.List</a>.</li> <li>Changed <code>Patterns</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#SparseCheckoutUpdate">workspace.SparseCheckoutUpdate</a> to type <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#List">workspace.List</a>.</li> <li>[Breaking] Changed <code>GitProvider</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#UpdateCredentialsRequest">workspace.UpdateCredentialsRequest</a> to be required.</li> </ul> <p>OpenAPI SHA: 0c86ea6dbd9a730c24ff0d4e509603e476955ac5, Date: 2024-10-02</p> </blockquote> </details> <details> <summary>Changelog</summary> <p><em>Sourced from <a href="https://github.com/databricks/databricks-sdk-go/blob/main/CHANGELOG.md">github.com/databricks/databricks-sdk-go's changelog</a>.</em></p> <blockquote> <h2>[Release] Release v0.48.0</h2> <h3>Internal Changes</h3> <ul> <li>Update SDK to latest OpenAPI spec (<a href="https://redirect.github.com/databricks/databricks-sdk-go/pull/1057">#1057</a>).</li> </ul> <p>Note: This release contains breaking changes, please see the API changes below for more details.</p> <h3>API Changes:</h3> <ul> <li>Added <code>DefaultSourceCodePath</code> and <code>Resources</code> fields for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/apps#App">apps.App</a>.</li> <li>Added <code>Resources</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/apps#CreateAppRequest">apps.CreateAppRequest</a>.</li> <li>Added <code>Resources</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/apps#UpdateAppRequest">apps.UpdateAppRequest</a>.</li> <li>Added <code>Schema</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/pipelines#CreatePipeline">pipelines.CreatePipeline</a>.</li> <li>Added <code>Schema</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/pipelines#EditPipeline">pipelines.EditPipeline</a>.</li> <li>Added <code>Schema</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/pipelines#PipelineSpec">pipelines.PipelineSpec</a>.</li> <li>[Breaking] Changed <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#CreateCredentialsRequest">workspace.CreateCredentialsRequest</a>.</li> <li>[Breaking] Changed <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to type <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service.</li> <li>[Breaking] Changed <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#DeleteCredentialsRequest">workspace.DeleteCredentialsRequest</a>.</li> <li>[Breaking] Changed <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to return <code>any</code>.</li> <li>Changed <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to type <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service.</li> <li>[Breaking] Changed <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GetCredentialsRequest">workspace.GetCredentialsRequest</a>.</li> <li>Changed <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to type <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service.</li> <li>[Breaking] Changed <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to return <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GetCredentialsResponse">workspace.GetCredentialsResponse</a>.</li> <li>[Breaking] Changed <code>List</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to return <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ListCredentialsResponse">workspace.ListCredentialsResponse</a>.</li> <li>Changed <code>List</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to type <code>List</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service.</li> <li>Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to type <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service.</li> <li>[Breaking] Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service to return <code>any</code>.</li> <li>[Breaking] Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GitCredentialsAPI">w.GitCredentials</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#UpdateCredentialsRequest">workspace.UpdateCredentialsRequest</a>.</li> <li>Changed <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to type <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service.</li> <li>[Breaking] Changed <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#CreateRepoRequest">workspace.CreateRepoRequest</a>.</li> <li>[Breaking] Changed <code>Create</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to return <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#CreateRepoResponse">workspace.CreateRepoResponse</a>.</li> <li>[Breaking] Changed <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to return <code>any</code>.</li> <li>Changed <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to type <code>Delete</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service.</li> <li>[Breaking] Changed <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to return <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GetRepoResponse">workspace.GetRepoResponse</a>.</li> <li>Changed <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to type <code>Get</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service.</li> <li>Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to return <code>any</code>.</li> <li>[Breaking] Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service . New request type is <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#UpdateRepoRequest">workspace.UpdateRepoRequest</a>.</li> <li>Changed <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service to type <code>Update</code> method for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#ReposAPI">w.Repos</a> workspace-level service.</li> <li>[Breaking] Changed <code>CredentialId</code> and <code>GitProvider</code> fields for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#CreateCredentialsResponse">workspace.CreateCredentialsResponse</a> to be required.</li> <li>Changed <code>CredentialId</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#CredentialInfo">workspace.CredentialInfo</a> to be required.</li> <li>Changed <code>CredentialId</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#GetCredentialsResponse">workspace.GetCredentialsResponse</a> to be required.</li> <li>Changed <code>Patterns</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#SparseCheckout">workspace.SparseCheckout</a> to type <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#List">workspace.List</a>.</li> <li>Changed <code>Patterns</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#SparseCheckoutUpdate">workspace.SparseCheckoutUpdate</a> to type <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#List">workspace.List</a>.</li> <li>[Breaking] Changed <code>GitProvider</code> field for <a href="https://pkg.go.dev/github.com/databricks/databricks-sdk-go/service/workspace#UpdateCredentialsRequest">workspace.UpdateCredentialsRequest</a> to be required.</li> </ul> <p>OpenAPI SHA: 0c86ea6dbd9a730c24ff0d4e509603e476955ac5, Date: 2024-10-02</p> </blockquote> </details> <details> <summary>Commits</summary> <ul> <li><a href="https://github.com/databricks/databricks-sdk-go/commit/80c963696b3c68e0b8aa4dc7dd55e16862a777c5"><code>80c9636</code></a> [Release] Release v0.48.0 (<a href="https://redirect.github.com/databricks/databricks-sdk-go/issues/1058">#1058</a>)</li> <li><a href="https://github.com/databricks/databricks-sdk-go/commit/6cecc224cbf7ceb85ca160d22fd27073899c2c40"><code>6cecc22</code></a> [Internal] Update SDK to latest OpenAPI spec (<a href="https://redirect.github.com/databricks/databricks-sdk-go/issues/1057">#1057</a>)</li> <li>See full diff in <a href="https://github.com/databricks/databricks-sdk-go/compare/v0.47.0...v0.48.0">compare view</a></li> </ul> </details> <br /> <details> <summary>Most Recent Ignore Conditions Applied to This Pull Request</summary> | Dependency Name | Ignore Conditions | | --- | --- | | github.com/databricks/databricks-sdk-go | [>= 0.28.a, < 0.29] | </details> [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/databricks/databricks-sdk-go&package-manager=go_modules&previous-version=0.47.0&new-version=0.48.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) --- <details> <summary>Dependabot commands and options</summary> <br /> You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) </details> --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Andrew Nester <andrew.nester@databricks.com>
2024-10-07 13:21:05 +00:00
repoInfo, err := wsc.Repos.Create(ctx, workspace.CreateRepoRequest{
Path: repoPath,
Url: repoUrl,
Provider: "gitHub",
})
require.NoError(t, err)
t.Cleanup(func() {
2022-09-27 16:58:55 +00:00
err := wsc.Repos.DeleteByRepoId(ctx, repoInfo.Id)
assert.NoError(t, err)
})
tempDir := t.TempDir()
localRoot = filepath.Join(tempDir, "empty-repo")
remoteRoot = repoPath
// clone public empty remote repo
cmd := exec.Command("git", "clone", repoUrl, localRoot)
err = cmd.Run()
require.NoError(t, err)
return localRoot, remoteRoot
}
type syncTest struct {
t *testing.T
c *testcli.Runner
w *databricks.WorkspaceClient
Add directory tracking to sync (#425) ## Changes This change replaces usage of the `repofiles` package with the `filer` package to consolidate WSFS code paths. The `repofiles` package implemented the following behavior. If a file at `foo/bar.txt` was created and removed, the directory `foo` was kept around because we do not perform directory tracking. If subsequently, a file at `foo` was created, it resulted in an `fs.ErrExist` because it is impossible to overwrite a directory. It would then perform a recursive delete of the path if this happened and retry the file write. To make this use case work without resorting to a recursive delete on conflict, we need to implement directory tracking as part of sync. The approach in this commit is as follows: 1. Maintain set of directories needed for current set of files. Compare to previous set of files. This results in mkdir of added directories and rmdir of removed directories. 2. Creation of new directories should happen prior to writing files. Otherwise, many file writes may race to create the same parent directories, resulting in additional API calls. Removal of existing directories should happen after removing files. 3. Making new directories can be deduped across common prefixes where only the longest prefix is created recursively. 4. Removing existing directories must happen sequentially, starting with the longest prefix. 5. Removal of directories is a best effort. It fails only if the directory is not empty, and if this happens we know something placed a file or directory manually, outside of sync. ## Tests * Existing integration tests pass (modified where it used to assert directories weren't cleaned up) * New integration test to confirm the inability to remove a directory doesn't fail the sync run
2023-06-12 11:44:00 +00:00
f filer.Filer
localRoot string
remoteRoot string
}
func setupSyncTest(t *testing.T, args ...string) *syncTest {
_, wt := acc.WorkspaceTest(t)
w := wt.W
localRoot := t.TempDir()
remoteRoot := acc.TemporaryWorkspaceDir(wt, "sync-")
Add directory tracking to sync (#425) ## Changes This change replaces usage of the `repofiles` package with the `filer` package to consolidate WSFS code paths. The `repofiles` package implemented the following behavior. If a file at `foo/bar.txt` was created and removed, the directory `foo` was kept around because we do not perform directory tracking. If subsequently, a file at `foo` was created, it resulted in an `fs.ErrExist` because it is impossible to overwrite a directory. It would then perform a recursive delete of the path if this happened and retry the file write. To make this use case work without resorting to a recursive delete on conflict, we need to implement directory tracking as part of sync. The approach in this commit is as follows: 1. Maintain set of directories needed for current set of files. Compare to previous set of files. This results in mkdir of added directories and rmdir of removed directories. 2. Creation of new directories should happen prior to writing files. Otherwise, many file writes may race to create the same parent directories, resulting in additional API calls. Removal of existing directories should happen after removing files. 3. Making new directories can be deduped across common prefixes where only the longest prefix is created recursively. 4. Removing existing directories must happen sequentially, starting with the longest prefix. 5. Removal of directories is a best effort. It fails only if the directory is not empty, and if this happens we know something placed a file or directory manually, outside of sync. ## Tests * Existing integration tests pass (modified where it used to assert directories weren't cleaned up) * New integration test to confirm the inability to remove a directory doesn't fail the sync run
2023-06-12 11:44:00 +00:00
f, err := filer.NewWorkspaceFilesClient(w, remoteRoot)
require.NoError(t, err)
// Prepend common arguments.
args = append([]string{
"sync",
localRoot,
remoteRoot,
"--output",
"json",
}, args...)
c := testcli.NewRunner(t, args...)
c.RunBackground()
return &syncTest{
t: t,
c: c,
w: w,
Add directory tracking to sync (#425) ## Changes This change replaces usage of the `repofiles` package with the `filer` package to consolidate WSFS code paths. The `repofiles` package implemented the following behavior. If a file at `foo/bar.txt` was created and removed, the directory `foo` was kept around because we do not perform directory tracking. If subsequently, a file at `foo` was created, it resulted in an `fs.ErrExist` because it is impossible to overwrite a directory. It would then perform a recursive delete of the path if this happened and retry the file write. To make this use case work without resorting to a recursive delete on conflict, we need to implement directory tracking as part of sync. The approach in this commit is as follows: 1. Maintain set of directories needed for current set of files. Compare to previous set of files. This results in mkdir of added directories and rmdir of removed directories. 2. Creation of new directories should happen prior to writing files. Otherwise, many file writes may race to create the same parent directories, resulting in additional API calls. Removal of existing directories should happen after removing files. 3. Making new directories can be deduped across common prefixes where only the longest prefix is created recursively. 4. Removing existing directories must happen sequentially, starting with the longest prefix. 5. Removal of directories is a best effort. It fails only if the directory is not empty, and if this happens we know something placed a file or directory manually, outside of sync. ## Tests * Existing integration tests pass (modified where it used to assert directories weren't cleaned up) * New integration test to confirm the inability to remove a directory doesn't fail the sync run
2023-06-12 11:44:00 +00:00
f: f,
localRoot: localRoot,
remoteRoot: remoteRoot,
}
}
func (s *syncTest) waitForCompletionMarker() {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
for {
select {
case <-ctx.Done():
s.t.Fatal("timed out waiting for sync to complete")
case line := <-s.c.StdoutLines:
var event sync.EventBase
err := json.Unmarshal([]byte(line), &event)
require.NoError(s.t, err)
if event.Type == sync.EventTypeComplete {
return
}
}
}
}
func (a *syncTest) remoteDirContent(ctx context.Context, relativeDir string, expectedFiles []string) {
remoteDir := path.Join(a.remoteRoot, relativeDir)
a.c.Eventually(func() bool {
objects, err := a.w.Workspace.ListAll(ctx, workspace.ListWorkspaceRequest{
Path: remoteDir,
})
require.NoError(a.t, err)
return len(objects) == len(expectedFiles)
}, 30*time.Second, 5*time.Second)
objects, err := a.w.Workspace.ListAll(ctx, workspace.ListWorkspaceRequest{
Path: remoteDir,
})
require.NoError(a.t, err)
var actualFiles []string
for _, v := range objects {
actualFiles = append(actualFiles, v.Path)
}
assert.Len(a.t, actualFiles, len(expectedFiles))
for _, v := range expectedFiles {
assert.Contains(a.t, actualFiles, path.Join(a.remoteRoot, relativeDir, v))
}
}
func (a *syncTest) remoteFileContent(ctx context.Context, relativePath, expectedContent string) {
filePath := path.Join(a.remoteRoot, relativePath)
// Remove leading "/" so we can use it in the URL.
urlPath := fmt.Sprintf(
"/api/2.0/workspace-files/%s",
strings.TrimLeft(filePath, "/"),
)
apiClient, err := client.New(a.w.Config)
require.NoError(a.t, err)
var res []byte
a.c.Eventually(func() bool {
err = apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, &res)
require.NoError(a.t, err)
actualContent := string(res)
return actualContent == expectedContent
}, 30*time.Second, 5*time.Second)
}
Add directory tracking to sync (#425) ## Changes This change replaces usage of the `repofiles` package with the `filer` package to consolidate WSFS code paths. The `repofiles` package implemented the following behavior. If a file at `foo/bar.txt` was created and removed, the directory `foo` was kept around because we do not perform directory tracking. If subsequently, a file at `foo` was created, it resulted in an `fs.ErrExist` because it is impossible to overwrite a directory. It would then perform a recursive delete of the path if this happened and retry the file write. To make this use case work without resorting to a recursive delete on conflict, we need to implement directory tracking as part of sync. The approach in this commit is as follows: 1. Maintain set of directories needed for current set of files. Compare to previous set of files. This results in mkdir of added directories and rmdir of removed directories. 2. Creation of new directories should happen prior to writing files. Otherwise, many file writes may race to create the same parent directories, resulting in additional API calls. Removal of existing directories should happen after removing files. 3. Making new directories can be deduped across common prefixes where only the longest prefix is created recursively. 4. Removing existing directories must happen sequentially, starting with the longest prefix. 5. Removal of directories is a best effort. It fails only if the directory is not empty, and if this happens we know something placed a file or directory manually, outside of sync. ## Tests * Existing integration tests pass (modified where it used to assert directories weren't cleaned up) * New integration test to confirm the inability to remove a directory doesn't fail the sync run
2023-06-12 11:44:00 +00:00
func (a *syncTest) remoteNotExist(ctx context.Context, relativePath string) {
_, err := a.f.Stat(ctx, relativePath)
require.ErrorIs(a.t, err, fs.ErrNotExist)
}
func (a *syncTest) remoteExists(ctx context.Context, relativePath string) {
_, err := a.f.Stat(ctx, relativePath)
require.NoError(a.t, err)
}
func (a *syncTest) touchFile(ctx context.Context, path string) {
err := a.f.Write(ctx, path, strings.NewReader("contents"), filer.CreateParentDirectories)
require.NoError(a.t, err)
}
func (a *syncTest) objectType(ctx context.Context, relativePath, expected string) {
path := path.Join(a.remoteRoot, relativePath)
a.c.Eventually(func() bool {
metadata, err := a.w.Workspace.GetStatusByPath(ctx, path)
if err != nil {
return false
}
return metadata.ObjectType.String() == expected
}, 30*time.Second, 5*time.Second)
}
func (a *syncTest) language(ctx context.Context, relativePath, expected string) {
path := path.Join(a.remoteRoot, relativePath)
a.c.Eventually(func() bool {
metadata, err := a.w.Workspace.GetStatusByPath(ctx, path)
if err != nil {
return false
}
return metadata.Language.String() == expected
}, 30*time.Second, 5*time.Second)
}
func (a *syncTest) snapshotContains(files []string) {
snapshotPath := filepath.Join(a.localRoot, ".databricks/sync-snapshots", sync.GetFileName(a.w.Config.Host, a.remoteRoot))
assert.FileExists(a.t, snapshotPath)
var s *sync.Snapshot
f, err := os.Open(snapshotPath)
assert.NoError(a.t, err)
defer f.Close()
bytes, err := io.ReadAll(f)
assert.NoError(a.t, err)
err = json.Unmarshal(bytes, &s)
assert.NoError(a.t, err)
assert.Equal(a.t, s.Host, a.w.Config.Host)
assert.Equal(a.t, s.RemotePath, a.remoteRoot)
for _, filePath := range files {
_, ok := s.LastModifiedTimes[filePath]
assert.True(a.t, ok, fmt.Sprintf("%s not in snapshot file: %v", filePath, s.LastModifiedTimes))
}
assert.Equal(a.t, len(files), len(s.LastModifiedTimes))
}
func TestSyncFullFileSync(t *testing.T) {
ctx := context.Background()
assertSync := setupSyncTest(t, "--full", "--watch")
// .gitignore is created by the sync process to enforce .databricks is not synced
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore"))
// New file
localFilePath := filepath.Join(assertSync.localRoot, "foo.txt")
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo.txt", ".gitignore"))
assertSync.remoteFileContent(ctx, "foo.txt", "")
// Write to file
f.Overwrite(t, `{"statement": "Mi Gente"}`)
assertSync.waitForCompletionMarker()
assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Mi Gente"}`)
// Write again
f.Overwrite(t, `{"statement": "Young Dumb & Broke"}`)
assertSync.waitForCompletionMarker()
assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Young Dumb & Broke"}`)
// delete
f.Remove(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore"))
}
func TestSyncIncrementalFileSync(t *testing.T) {
ctx := context.Background()
assertSync := setupSyncTest(t, "--watch")
// .gitignore is created by the sync process to enforce .databricks is not synced
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore"))
// New file
localFilePath := filepath.Join(assertSync.localRoot, "foo.txt")
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, "foo.txt", ".gitignore"))
assertSync.remoteFileContent(ctx, "foo.txt", "")
assertSync.snapshotContains(append(repoFiles, "foo.txt", ".gitignore"))
// Write to file
f.Overwrite(t, `{"statement": "Mi Gente"}`)
assertSync.waitForCompletionMarker()
assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Mi Gente"}`)
// Write again
f.Overwrite(t, `{"statement": "Young Dumb & Broke"}`)
assertSync.waitForCompletionMarker()
assertSync.remoteFileContent(ctx, "foo.txt", `{"statement": "Young Dumb & Broke"}`)
// delete
f.Remove(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore"))
assertSync.snapshotContains(append(repoFiles, ".gitignore"))
}
func TestSyncNestedFolderSync(t *testing.T) {
ctx := context.Background()
assertSync := setupSyncTest(t, "--watch")
// .gitignore is created by the sync process to enforce .databricks is not synced
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore"))
// New file
localFilePath := filepath.Join(assertSync.localRoot, "dir1/dir2/dir3/foo.txt")
err := os.MkdirAll(filepath.Dir(localFilePath), 0o755)
assert.NoError(t, err)
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "dir1"))
assertSync.remoteDirContent(ctx, "dir1", []string{"dir2"})
assertSync.remoteDirContent(ctx, "dir1/dir2", []string{"dir3"})
assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{"foo.txt"})
assertSync.snapshotContains(append(repoFiles, ".gitignore", "dir1/dir2/dir3/foo.txt"))
// delete
f.Remove(t)
assertSync.waitForCompletionMarker()
Add directory tracking to sync (#425) ## Changes This change replaces usage of the `repofiles` package with the `filer` package to consolidate WSFS code paths. The `repofiles` package implemented the following behavior. If a file at `foo/bar.txt` was created and removed, the directory `foo` was kept around because we do not perform directory tracking. If subsequently, a file at `foo` was created, it resulted in an `fs.ErrExist` because it is impossible to overwrite a directory. It would then perform a recursive delete of the path if this happened and retry the file write. To make this use case work without resorting to a recursive delete on conflict, we need to implement directory tracking as part of sync. The approach in this commit is as follows: 1. Maintain set of directories needed for current set of files. Compare to previous set of files. This results in mkdir of added directories and rmdir of removed directories. 2. Creation of new directories should happen prior to writing files. Otherwise, many file writes may race to create the same parent directories, resulting in additional API calls. Removal of existing directories should happen after removing files. 3. Making new directories can be deduped across common prefixes where only the longest prefix is created recursively. 4. Removing existing directories must happen sequentially, starting with the longest prefix. 5. Removal of directories is a best effort. It fails only if the directory is not empty, and if this happens we know something placed a file or directory manually, outside of sync. ## Tests * Existing integration tests pass (modified where it used to assert directories weren't cleaned up) * New integration test to confirm the inability to remove a directory doesn't fail the sync run
2023-06-12 11:44:00 +00:00
assertSync.remoteNotExist(ctx, "dir1")
assertSync.snapshotContains(append(repoFiles, ".gitignore"))
}
func TestSyncNestedFolderDoesntFailOnNonEmptyDirectory(t *testing.T) {
Add directory tracking to sync (#425) ## Changes This change replaces usage of the `repofiles` package with the `filer` package to consolidate WSFS code paths. The `repofiles` package implemented the following behavior. If a file at `foo/bar.txt` was created and removed, the directory `foo` was kept around because we do not perform directory tracking. If subsequently, a file at `foo` was created, it resulted in an `fs.ErrExist` because it is impossible to overwrite a directory. It would then perform a recursive delete of the path if this happened and retry the file write. To make this use case work without resorting to a recursive delete on conflict, we need to implement directory tracking as part of sync. The approach in this commit is as follows: 1. Maintain set of directories needed for current set of files. Compare to previous set of files. This results in mkdir of added directories and rmdir of removed directories. 2. Creation of new directories should happen prior to writing files. Otherwise, many file writes may race to create the same parent directories, resulting in additional API calls. Removal of existing directories should happen after removing files. 3. Making new directories can be deduped across common prefixes where only the longest prefix is created recursively. 4. Removing existing directories must happen sequentially, starting with the longest prefix. 5. Removal of directories is a best effort. It fails only if the directory is not empty, and if this happens we know something placed a file or directory manually, outside of sync. ## Tests * Existing integration tests pass (modified where it used to assert directories weren't cleaned up) * New integration test to confirm the inability to remove a directory doesn't fail the sync run
2023-06-12 11:44:00 +00:00
ctx := context.Background()
assertSync := setupSyncTest(t, "--watch")
// .gitignore is created by the sync process to enforce .databricks is not synced
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore"))
// New file
localFilePath := filepath.Join(assertSync.localRoot, "dir1/dir2/dir3/foo.txt")
err := os.MkdirAll(filepath.Dir(localFilePath), 0o755)
assert.NoError(t, err)
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "dir1/dir2/dir3", []string{"foo.txt"})
// Add file to dir1 to simulate a user writing to the workspace directly.
assertSync.touchFile(ctx, "dir1/foo.txt")
// Remove original file.
f.Remove(t)
assertSync.waitForCompletionMarker()
// Sync should have removed these directories.
assertSync.remoteNotExist(ctx, "dir1/dir2/dir3")
assertSync.remoteNotExist(ctx, "dir1/dir2")
// Sync should have ignored not being able to delete dir1.
assertSync.remoteExists(ctx, "dir1/foo.txt")
assertSync.remoteExists(ctx, "dir1")
}
func TestSyncNestedSpacePlusAndHashAreEscapedSync(t *testing.T) {
ctx := context.Background()
assertSync := setupSyncTest(t, "--watch")
// .gitignore is created by the sync process to enforce .databricks is not synced
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore"))
// New file
localFilePath := filepath.Join(assertSync.localRoot, "dir1/a b+c/c+d e/e+f g#i.txt")
err := os.MkdirAll(filepath.Dir(localFilePath), 0o755)
assert.NoError(t, err)
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "dir1"))
assertSync.remoteDirContent(ctx, "dir1", []string{"a b+c"})
assertSync.remoteDirContent(ctx, "dir1/a b+c", []string{"c+d e"})
assertSync.remoteDirContent(ctx, "dir1/a b+c/c+d e", []string{"e+f g#i.txt"})
assertSync.snapshotContains(append(repoFiles, ".gitignore", "dir1/a b+c/c+d e/e+f g#i.txt"))
// delete
f.Remove(t)
assertSync.waitForCompletionMarker()
Add directory tracking to sync (#425) ## Changes This change replaces usage of the `repofiles` package with the `filer` package to consolidate WSFS code paths. The `repofiles` package implemented the following behavior. If a file at `foo/bar.txt` was created and removed, the directory `foo` was kept around because we do not perform directory tracking. If subsequently, a file at `foo` was created, it resulted in an `fs.ErrExist` because it is impossible to overwrite a directory. It would then perform a recursive delete of the path if this happened and retry the file write. To make this use case work without resorting to a recursive delete on conflict, we need to implement directory tracking as part of sync. The approach in this commit is as follows: 1. Maintain set of directories needed for current set of files. Compare to previous set of files. This results in mkdir of added directories and rmdir of removed directories. 2. Creation of new directories should happen prior to writing files. Otherwise, many file writes may race to create the same parent directories, resulting in additional API calls. Removal of existing directories should happen after removing files. 3. Making new directories can be deduped across common prefixes where only the longest prefix is created recursively. 4. Removing existing directories must happen sequentially, starting with the longest prefix. 5. Removal of directories is a best effort. It fails only if the directory is not empty, and if this happens we know something placed a file or directory manually, outside of sync. ## Tests * Existing integration tests pass (modified where it used to assert directories weren't cleaned up) * New integration test to confirm the inability to remove a directory doesn't fail the sync run
2023-06-12 11:44:00 +00:00
assertSync.remoteNotExist(ctx, "dir1/a b+c/c+d e")
assertSync.snapshotContains(append(repoFiles, ".gitignore"))
}
// This is a check for the edge case when a user does the following:
//
// 1. Add file foo/bar.txt
// 2. Delete foo/bar.txt (including the directory)
// 3. Add file foo
//
// In the above scenario sync should delete the empty folder and add foo to the remote
// file system
func TestSyncIncrementalFileOverwritesFolder(t *testing.T) {
ctx := context.Background()
assertSync := setupSyncTest(t, "--watch")
// create foo/bar.txt
localFilePath := filepath.Join(assertSync.localRoot, "foo/bar.txt")
err := os.MkdirAll(filepath.Dir(localFilePath), 0o755)
assert.NoError(t, err)
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.waitForCompletionMarker()
2023-02-20 14:41:37 +00:00
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo"))
assertSync.remoteDirContent(ctx, "foo", []string{"bar.txt"})
assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo/bar.txt"))
// delete foo/bar.txt
f.Remove(t)
os.Remove(filepath.Join(assertSync.localRoot, "foo"))
assertSync.waitForCompletionMarker()
Add directory tracking to sync (#425) ## Changes This change replaces usage of the `repofiles` package with the `filer` package to consolidate WSFS code paths. The `repofiles` package implemented the following behavior. If a file at `foo/bar.txt` was created and removed, the directory `foo` was kept around because we do not perform directory tracking. If subsequently, a file at `foo` was created, it resulted in an `fs.ErrExist` because it is impossible to overwrite a directory. It would then perform a recursive delete of the path if this happened and retry the file write. To make this use case work without resorting to a recursive delete on conflict, we need to implement directory tracking as part of sync. The approach in this commit is as follows: 1. Maintain set of directories needed for current set of files. Compare to previous set of files. This results in mkdir of added directories and rmdir of removed directories. 2. Creation of new directories should happen prior to writing files. Otherwise, many file writes may race to create the same parent directories, resulting in additional API calls. Removal of existing directories should happen after removing files. 3. Making new directories can be deduped across common prefixes where only the longest prefix is created recursively. 4. Removing existing directories must happen sequentially, starting with the longest prefix. 5. Removal of directories is a best effort. It fails only if the directory is not empty, and if this happens we know something placed a file or directory manually, outside of sync. ## Tests * Existing integration tests pass (modified where it used to assert directories weren't cleaned up) * New integration test to confirm the inability to remove a directory doesn't fail the sync run
2023-06-12 11:44:00 +00:00
assertSync.remoteNotExist(ctx, "foo")
assertSync.snapshotContains(append(repoFiles, ".gitignore"))
f2 := testfile.CreateFile(t, filepath.Join(assertSync.localRoot, "foo"))
defer f2.Close(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo"))
assertSync.objectType(ctx, "foo", "FILE")
assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo"))
}
func TestSyncIncrementalSyncPythonNotebookToFile(t *testing.T) {
ctx := context.Background()
assertSync := setupSyncTest(t, "--watch")
// create python notebook
localFilePath := filepath.Join(assertSync.localRoot, "foo.py")
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
f.Overwrite(t, "# Databricks notebook source")
// notebook was uploaded properly
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo"))
assertSync.objectType(ctx, "foo", "NOTEBOOK")
assertSync.language(ctx, "foo", "PYTHON")
assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo.py"))
// convert to vanilla python file
f.Overwrite(t, "# No longer a python notebook")
assertSync.waitForCompletionMarker()
assertSync.objectType(ctx, "foo.py", "FILE")
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo.py"))
assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo.py"))
// delete the vanilla python file
f.Remove(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore"))
assertSync.snapshotContains(append(repoFiles, ".gitignore"))
}
func TestSyncIncrementalSyncFileToPythonNotebook(t *testing.T) {
ctx := context.Background()
assertSync := setupSyncTest(t, "--watch")
// create vanilla python file
localFilePath := filepath.Join(assertSync.localRoot, "foo.py")
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
assertSync.waitForCompletionMarker()
// assert file upload
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo.py"))
assertSync.objectType(ctx, "foo.py", "FILE")
assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo.py"))
// convert to notebook
f.Overwrite(t, "# Databricks notebook source")
assertSync.waitForCompletionMarker()
assertSync.objectType(ctx, "foo", "NOTEBOOK")
assertSync.language(ctx, "foo", "PYTHON")
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo"))
assertSync.snapshotContains(append(repoFiles, ".gitignore", "foo.py"))
}
func TestSyncIncrementalSyncPythonNotebookDelete(t *testing.T) {
ctx := context.Background()
assertSync := setupSyncTest(t, "--watch")
// create python notebook
localFilePath := filepath.Join(assertSync.localRoot, "foo.py")
f := testfile.CreateFile(t, localFilePath)
defer f.Close(t)
f.Overwrite(t, "# Databricks notebook source")
assertSync.waitForCompletionMarker()
// notebook was uploaded properly
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore", "foo"))
assertSync.objectType(ctx, "foo", "NOTEBOOK")
assertSync.language(ctx, "foo", "PYTHON")
// Delete notebook
f.Remove(t)
assertSync.waitForCompletionMarker()
assertSync.remoteDirContent(ctx, "", append(repoFiles, ".gitignore"))
}
func TestSyncEnsureRemotePathIsUsableIfRepoDoesntExist(t *testing.T) {
t.Log(testutil.GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
me, err := wsc.CurrentUser.Me(ctx)
require.NoError(t, err)
// Hypothetical repo path doesn't exist.
nonExistingRepoPath := fmt.Sprintf("/Repos/%s/%s", me.UserName, testutil.RandomName("doesnt-exist-"))
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nonExistingRepoPath, nil)
assert.ErrorContains(t, err, " does not exist; please create it first")
// Paths nested under a hypothetical repo path should yield the same error.
nestedPath := path.Join(nonExistingRepoPath, "nested/directory")
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
assert.ErrorContains(t, err, " does not exist; please create it first")
}
func TestSyncEnsureRemotePathIsUsableIfRepoExists(t *testing.T) {
t.Log(testutil.GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
_, remoteRepoPath := setupRepo(t, wsc, ctx)
// Repo itself is usable.
err := sync.EnsureRemotePathIsUsable(ctx, wsc, remoteRepoPath, nil)
assert.NoError(t, err)
// Path nested under repo path is usable.
nestedPath := path.Join(remoteRepoPath, "nested/directory")
err = sync.EnsureRemotePathIsUsable(ctx, wsc, nestedPath, nil)
assert.NoError(t, err)
// Verify that the directory has been created.
info, err := wsc.Workspace.GetStatusByPath(ctx, nestedPath)
require.NoError(t, err)
require.Equal(t, workspace.ObjectTypeDirectory, info.ObjectType)
}
func TestSyncEnsureRemotePathIsUsableInWorkspace(t *testing.T) {
t.Log(testutil.GetEnvOrSkipTest(t, "CLOUD_ENV"))
wsc := databricks.Must(databricks.NewWorkspaceClient())
ctx := context.Background()
me, err := wsc.CurrentUser.Me(ctx)
require.NoError(t, err)
remotePath := fmt.Sprintf("/Users/%s/%s", me.UserName, testutil.RandomName("ensure-path-exists-test-"))
err = sync.EnsureRemotePathIsUsable(ctx, wsc, remotePath, me)
assert.NoError(t, err)
// Clean up directory after test.
defer func() {
err := wsc.Workspace.Delete(ctx, workspace.Delete{
Path: remotePath,
})
assert.NoError(t, err)
}()
// Verify that the directory has been created.
info, err := wsc.Workspace.GetStatusByPath(ctx, remotePath)
require.NoError(t, err)
require.Equal(t, workspace.ObjectTypeDirectory, info.ObjectType)
}