From 5273d0c51a1cd473557ef40d42e39ad10136f235 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 18 Oct 2023 12:20:43 +0200 Subject: [PATCH] Support Python wheels larger than 10MB (#879) ## Changes Previously we only supported uploading Python wheels smaller than 10mb due to using Workspace.Import API and `content ` field https://docs.databricks.com/api/workspace/workspace/import By switching to use `WorkspaceFilesClient` we overcome the limit because it uses POST body for the API instead. ## Tests `TestAccUploadArtifactFileToCorrectRemotePath` integration test passes ``` === RUN TestAccUploadArtifactFileToCorrectRemotePath artifacts_test.go:28: gcp 2023/10/17 15:24:04 INFO Using Google Credentials sdk=true helpers.go:356: Creating /Users/.../integration-test-wsfs-ekggbkcfdkid artifacts.Upload(test.whl): Uploading... 2023/10/17 15:24:06 INFO Using Google Credentials mutator=artifacts.Upload(test) sdk=true artifacts.Upload(test.whl): Upload succeeded helpers.go:362: Removing /Users/.../integration-test-wsfs-ekggbkcfdkid --- PASS: TestAccUploadArtifactFileToCorrectRemotePath (5.66s) PASS coverage: 14.9% of statements in ./... ok github.com/databricks/cli/internal 6.109s coverage: 14.9% of statements in ./... ``` --- bundle/artifacts/artifacts.go | 47 +++++------ bundle/artifacts/artifacts_test.go | 123 ----------------------------- internal/bundle/artifacts_test.go | 69 ++++++++++++++++ 3 files changed, 93 insertions(+), 146 deletions(-) delete mode 100644 bundle/artifacts/artifacts_test.go create mode 100644 internal/bundle/artifacts_test.go diff --git a/bundle/artifacts/artifacts.go b/bundle/artifacts/artifacts.go index 0331adb7..e55ae4e8 100644 --- a/bundle/artifacts/artifacts.go +++ b/bundle/artifacts/artifacts.go @@ -1,9 +1,9 @@ package artifacts import ( + "bytes" "context" "crypto/sha256" - "encoding/base64" "errors" "fmt" "os" @@ -14,7 +14,7 @@ import ( "github.com/databricks/cli/bundle/artifacts/whl" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/databricks/cli/libs/filer" ) type mutatorFactory = func(name string) bundle.Mutator @@ -83,7 +83,7 @@ func BasicUpload(name string) bundle.Mutator { } func (m *basicUpload) Name() string { - return fmt.Sprintf("artifacts.Build(%s)", m.name) + return fmt.Sprintf("artifacts.Upload(%s)", m.name) } func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error { @@ -96,7 +96,17 @@ func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error { return fmt.Errorf("artifact source is not configured: %s", m.name) } - err := uploadArtifact(ctx, artifact, b) + uploadPath, err := getUploadBasePath(b) + if err != nil { + return err + } + + client, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(), uploadPath) + if err != nil { + return err + } + + err = uploadArtifact(ctx, artifact, uploadPath, client) if err != nil { return fmt.Errorf("artifacts.Upload(%s): %w", m.name, err) } @@ -104,13 +114,14 @@ func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error { return nil } -func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) error { +func uploadArtifact(ctx context.Context, a *config.Artifact, uploadPath string, client filer.Filer) error { for i := range a.Files { f := &a.Files[i] if f.NeedsUpload() { filename := filepath.Base(f.Source) cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Uploading...", filename)) - remotePath, err := uploadArtifactFile(ctx, f.Source, b) + + remotePath, err := uploadArtifactFile(ctx, f.Source, uploadPath, client) if err != nil { return err } @@ -125,32 +136,22 @@ func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) e } // Function to upload artifact file to Workspace -func uploadArtifactFile(ctx context.Context, file string, b *bundle.Bundle) (string, error) { +func uploadArtifactFile(ctx context.Context, file string, uploadPath string, client filer.Filer) (string, error) { raw, err := os.ReadFile(file) if err != nil { return "", fmt.Errorf("unable to read %s: %w", file, errors.Unwrap(err)) } - uploadPath, err := getUploadBasePath(b) - if err != nil { - return "", err - } - fileHash := sha256.Sum256(raw) - remotePath := path.Join(uploadPath, fmt.Sprintf("%x", fileHash), filepath.Base(file)) - // Make sure target directory exists. - err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, path.Dir(remotePath)) + relPath := path.Join(fmt.Sprintf("%x", fileHash), filepath.Base(file)) + remotePath := path.Join(uploadPath, relPath) + + err = client.Mkdir(ctx, path.Dir(relPath)) if err != nil { - return "", fmt.Errorf("unable to create directory for %s: %w", remotePath, err) + return "", fmt.Errorf("unable to import %s: %w", remotePath, err) } - // Import to workspace. - err = b.WorkspaceClient().Workspace.Import(ctx, workspace.Import{ - Path: remotePath, - Overwrite: true, - Format: workspace.ImportFormatAuto, - Content: base64.StdEncoding.EncodeToString(raw), - }) + err = client.Write(ctx, relPath, bytes.NewReader(raw), filer.OverwriteIfExists, filer.CreateParentDirectories) if err != nil { return "", fmt.Errorf("unable to import %s: %w", remotePath, err) } diff --git a/bundle/artifacts/artifacts_test.go b/bundle/artifacts/artifacts_test.go deleted file mode 100644 index bbae44ef..00000000 --- a/bundle/artifacts/artifacts_test.go +++ /dev/null @@ -1,123 +0,0 @@ -package artifacts - -import ( - "context" - "os" - "path/filepath" - "regexp" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/config" - "github.com/databricks/databricks-sdk-go/service/compute" - "github.com/databricks/databricks-sdk-go/service/workspace" - "github.com/stretchr/testify/require" -) - -func touchEmptyFile(t *testing.T, path string) { - err := os.MkdirAll(filepath.Dir(path), 0700) - require.NoError(t, err) - f, err := os.Create(path) - require.NoError(t, err) - f.Close() -} - -type MockWorkspaceService struct { -} - -// Delete implements workspace.WorkspaceService. -func (MockWorkspaceService) Delete(ctx context.Context, request workspace.Delete) error { - panic("unimplemented") -} - -// Export implements workspace.WorkspaceService. -func (MockWorkspaceService) Export(ctx context.Context, request workspace.ExportRequest) (*workspace.ExportResponse, error) { - panic("unimplemented") -} - -// GetStatus implements workspace.WorkspaceService. -func (MockWorkspaceService) GetStatus(ctx context.Context, request workspace.GetStatusRequest) (*workspace.ObjectInfo, error) { - panic("unimplemented") -} - -// Import implements workspace.WorkspaceService. -func (MockWorkspaceService) Import(ctx context.Context, request workspace.Import) error { - return nil -} - -// List implements workspace.WorkspaceService. -func (MockWorkspaceService) List(ctx context.Context, request workspace.ListWorkspaceRequest) (*workspace.ListResponse, error) { - panic("unimplemented") -} - -// Mkdirs implements workspace.WorkspaceService. -func (MockWorkspaceService) Mkdirs(ctx context.Context, request workspace.Mkdirs) error { - return nil -} - -// GetPermissionLevels implements workspace.WorkspaceService. -func (MockWorkspaceService) GetPermissionLevels( - ctx context.Context, - request workspace.GetWorkspaceObjectPermissionLevelsRequest, -) (*workspace.GetWorkspaceObjectPermissionLevelsResponse, error) { - panic("unimplemented") -} - -// GetPermissions implements workspace.WorkspaceService. -func (MockWorkspaceService) GetPermissions( - ctx context.Context, - request workspace.GetWorkspaceObjectPermissionsRequest, -) (*workspace.WorkspaceObjectPermissions, error) { - panic("unimplemented") -} - -// SetPermissions implements workspace.WorkspaceService. -func (MockWorkspaceService) SetPermissions( - ctx context.Context, - request workspace.WorkspaceObjectPermissionsRequest, -) (*workspace.WorkspaceObjectPermissions, error) { - panic("unimplemented") -} - -// UpdatePermissions implements workspace.WorkspaceService. -func (MockWorkspaceService) UpdatePermissions( - ctx context.Context, - request workspace.WorkspaceObjectPermissionsRequest, -) (*workspace.WorkspaceObjectPermissions, error) { - panic("unimplemented") -} - -func TestUploadArtifactFileToCorrectRemotePath(t *testing.T) { - dir := t.TempDir() - whlPath := filepath.Join(dir, "dist", "test.whl") - touchEmptyFile(t, whlPath) - b := &bundle.Bundle{ - Config: config.Root{ - Path: dir, - Bundle: config.Bundle{ - Target: "whatever", - }, - Workspace: config.Workspace{ - ArtifactsPath: "/Users/test@databricks.com/whatever", - }, - }, - } - - b.WorkspaceClient().Workspace.WithImpl(MockWorkspaceService{}) - artifact := &config.Artifact{ - Type: "whl", - Files: []config.ArtifactFile{ - { - Source: whlPath, - Libraries: []*compute.Library{ - {Whl: "dist\\test.whl"}, - }, - }, - }, - } - - err := uploadArtifact(context.Background(), artifact, b) - require.NoError(t, err) - require.Regexp(t, regexp.MustCompile("/Users/test@databricks.com/whatever/.internal/[a-z0-9]+/test.whl"), artifact.Files[0].RemotePath) - require.Regexp(t, regexp.MustCompile("/Workspace/Users/test@databricks.com/whatever/.internal/[a-z0-9]+/test.whl"), artifact.Files[0].Libraries[0].Whl) -} diff --git a/internal/bundle/artifacts_test.go b/internal/bundle/artifacts_test.go new file mode 100644 index 00000000..1e77cae9 --- /dev/null +++ b/internal/bundle/artifacts_test.go @@ -0,0 +1,69 @@ +package bundle + +import ( + "context" + "os" + "path" + "path/filepath" + "regexp" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/artifacts" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/internal" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/stretchr/testify/require" +) + +func touchEmptyFile(t *testing.T, path string) { + err := os.MkdirAll(filepath.Dir(path), 0700) + require.NoError(t, err) + f, err := os.Create(path) + require.NoError(t, err) + f.Close() +} + +func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) { + t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV")) + + dir := t.TempDir() + whlPath := filepath.Join(dir, "dist", "test.whl") + touchEmptyFile(t, whlPath) + + artifact := &config.Artifact{ + Type: "whl", + Files: []config.ArtifactFile{ + { + Source: whlPath, + Libraries: []*compute.Library{ + {Whl: "dist\\test.whl"}, + }, + }, + }, + } + + w := databricks.Must(databricks.NewWorkspaceClient()) + wsDir := internal.TemporaryWorkspaceDir(t, w) + + b := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Bundle: config.Bundle{ + Target: "whatever", + }, + Workspace: config.Workspace{ + ArtifactsPath: wsDir, + }, + Artifacts: config.Artifacts{ + "test": artifact, + }, + }, + } + + err := bundle.Apply(context.Background(), b, artifacts.BasicUpload("test")) + require.NoError(t, err) + require.Regexp(t, regexp.MustCompile(path.Join(wsDir, ".internal/[a-z0-9]+/test.whl")), artifact.Files[0].RemotePath) + require.Regexp(t, regexp.MustCompile(path.Join("/Workspace", wsDir, ".internal/[a-z0-9]+/test.whl")), artifact.Files[0].Libraries[0].Whl) +}