Support Python wheels larger than 10MB (#879)

## Changes
Previously we only supported uploading Python wheels smaller than 10mb
due to using Workspace.Import API and `content ` field
https://docs.databricks.com/api/workspace/workspace/import

By switching to use `WorkspaceFilesClient` we overcome the limit because
it uses POST body for the API instead.

## Tests
`TestAccUploadArtifactFileToCorrectRemotePath` integration test passes

```
=== RUN   TestAccUploadArtifactFileToCorrectRemotePath
    artifacts_test.go:28: gcp
2023/10/17 15:24:04 INFO Using Google Credentials sdk=true
    helpers.go:356: Creating /Users/.../integration-test-wsfs-ekggbkcfdkid
artifacts.Upload(test.whl): Uploading...
2023/10/17 15:24:06 INFO Using Google Credentials mutator=artifacts.Upload(test) sdk=true
artifacts.Upload(test.whl): Upload succeeded
    helpers.go:362: Removing /Users/.../integration-test-wsfs-ekggbkcfdkid
--- PASS: TestAccUploadArtifactFileToCorrectRemotePath (5.66s)
PASS
coverage: 14.9% of statements in ./...
ok      github.com/databricks/cli/internal      6.109s  coverage: 14.9% of statements in ./...
```
This commit is contained in:
Andrew Nester 2023-10-18 12:20:43 +02:00 committed by GitHub
parent 1b992c0c1c
commit 5273d0c51a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 93 additions and 146 deletions

View File

@ -1,9 +1,9 @@
package artifacts
import (
"bytes"
"context"
"crypto/sha256"
"encoding/base64"
"errors"
"fmt"
"os"
@ -14,7 +14,7 @@ import (
"github.com/databricks/cli/bundle/artifacts/whl"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/databricks/cli/libs/filer"
)
type mutatorFactory = func(name string) bundle.Mutator
@ -83,7 +83,7 @@ func BasicUpload(name string) bundle.Mutator {
}
func (m *basicUpload) Name() string {
return fmt.Sprintf("artifacts.Build(%s)", m.name)
return fmt.Sprintf("artifacts.Upload(%s)", m.name)
}
func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
@ -96,7 +96,17 @@ func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
return fmt.Errorf("artifact source is not configured: %s", m.name)
}
err := uploadArtifact(ctx, artifact, b)
uploadPath, err := getUploadBasePath(b)
if err != nil {
return err
}
client, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(), uploadPath)
if err != nil {
return err
}
err = uploadArtifact(ctx, artifact, uploadPath, client)
if err != nil {
return fmt.Errorf("artifacts.Upload(%s): %w", m.name, err)
}
@ -104,13 +114,14 @@ func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
return nil
}
func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) error {
func uploadArtifact(ctx context.Context, a *config.Artifact, uploadPath string, client filer.Filer) error {
for i := range a.Files {
f := &a.Files[i]
if f.NeedsUpload() {
filename := filepath.Base(f.Source)
cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Uploading...", filename))
remotePath, err := uploadArtifactFile(ctx, f.Source, b)
remotePath, err := uploadArtifactFile(ctx, f.Source, uploadPath, client)
if err != nil {
return err
}
@ -125,32 +136,22 @@ func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) e
}
// Function to upload artifact file to Workspace
func uploadArtifactFile(ctx context.Context, file string, b *bundle.Bundle) (string, error) {
func uploadArtifactFile(ctx context.Context, file string, uploadPath string, client filer.Filer) (string, error) {
raw, err := os.ReadFile(file)
if err != nil {
return "", fmt.Errorf("unable to read %s: %w", file, errors.Unwrap(err))
}
uploadPath, err := getUploadBasePath(b)
if err != nil {
return "", err
}
fileHash := sha256.Sum256(raw)
remotePath := path.Join(uploadPath, fmt.Sprintf("%x", fileHash), filepath.Base(file))
// Make sure target directory exists.
err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, path.Dir(remotePath))
relPath := path.Join(fmt.Sprintf("%x", fileHash), filepath.Base(file))
remotePath := path.Join(uploadPath, relPath)
err = client.Mkdir(ctx, path.Dir(relPath))
if err != nil {
return "", fmt.Errorf("unable to create directory for %s: %w", remotePath, err)
return "", fmt.Errorf("unable to import %s: %w", remotePath, err)
}
// Import to workspace.
err = b.WorkspaceClient().Workspace.Import(ctx, workspace.Import{
Path: remotePath,
Overwrite: true,
Format: workspace.ImportFormatAuto,
Content: base64.StdEncoding.EncodeToString(raw),
})
err = client.Write(ctx, relPath, bytes.NewReader(raw), filer.OverwriteIfExists, filer.CreateParentDirectories)
if err != nil {
return "", fmt.Errorf("unable to import %s: %w", remotePath, err)
}

View File

@ -1,123 +0,0 @@
package artifacts
import (
"context"
"os"
"path/filepath"
"regexp"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/stretchr/testify/require"
)
func touchEmptyFile(t *testing.T, path string) {
err := os.MkdirAll(filepath.Dir(path), 0700)
require.NoError(t, err)
f, err := os.Create(path)
require.NoError(t, err)
f.Close()
}
type MockWorkspaceService struct {
}
// Delete implements workspace.WorkspaceService.
func (MockWorkspaceService) Delete(ctx context.Context, request workspace.Delete) error {
panic("unimplemented")
}
// Export implements workspace.WorkspaceService.
func (MockWorkspaceService) Export(ctx context.Context, request workspace.ExportRequest) (*workspace.ExportResponse, error) {
panic("unimplemented")
}
// GetStatus implements workspace.WorkspaceService.
func (MockWorkspaceService) GetStatus(ctx context.Context, request workspace.GetStatusRequest) (*workspace.ObjectInfo, error) {
panic("unimplemented")
}
// Import implements workspace.WorkspaceService.
func (MockWorkspaceService) Import(ctx context.Context, request workspace.Import) error {
return nil
}
// List implements workspace.WorkspaceService.
func (MockWorkspaceService) List(ctx context.Context, request workspace.ListWorkspaceRequest) (*workspace.ListResponse, error) {
panic("unimplemented")
}
// Mkdirs implements workspace.WorkspaceService.
func (MockWorkspaceService) Mkdirs(ctx context.Context, request workspace.Mkdirs) error {
return nil
}
// GetPermissionLevels implements workspace.WorkspaceService.
func (MockWorkspaceService) GetPermissionLevels(
ctx context.Context,
request workspace.GetWorkspaceObjectPermissionLevelsRequest,
) (*workspace.GetWorkspaceObjectPermissionLevelsResponse, error) {
panic("unimplemented")
}
// GetPermissions implements workspace.WorkspaceService.
func (MockWorkspaceService) GetPermissions(
ctx context.Context,
request workspace.GetWorkspaceObjectPermissionsRequest,
) (*workspace.WorkspaceObjectPermissions, error) {
panic("unimplemented")
}
// SetPermissions implements workspace.WorkspaceService.
func (MockWorkspaceService) SetPermissions(
ctx context.Context,
request workspace.WorkspaceObjectPermissionsRequest,
) (*workspace.WorkspaceObjectPermissions, error) {
panic("unimplemented")
}
// UpdatePermissions implements workspace.WorkspaceService.
func (MockWorkspaceService) UpdatePermissions(
ctx context.Context,
request workspace.WorkspaceObjectPermissionsRequest,
) (*workspace.WorkspaceObjectPermissions, error) {
panic("unimplemented")
}
func TestUploadArtifactFileToCorrectRemotePath(t *testing.T) {
dir := t.TempDir()
whlPath := filepath.Join(dir, "dist", "test.whl")
touchEmptyFile(t, whlPath)
b := &bundle.Bundle{
Config: config.Root{
Path: dir,
Bundle: config.Bundle{
Target: "whatever",
},
Workspace: config.Workspace{
ArtifactsPath: "/Users/test@databricks.com/whatever",
},
},
}
b.WorkspaceClient().Workspace.WithImpl(MockWorkspaceService{})
artifact := &config.Artifact{
Type: "whl",
Files: []config.ArtifactFile{
{
Source: whlPath,
Libraries: []*compute.Library{
{Whl: "dist\\test.whl"},
},
},
},
}
err := uploadArtifact(context.Background(), artifact, b)
require.NoError(t, err)
require.Regexp(t, regexp.MustCompile("/Users/test@databricks.com/whatever/.internal/[a-z0-9]+/test.whl"), artifact.Files[0].RemotePath)
require.Regexp(t, regexp.MustCompile("/Workspace/Users/test@databricks.com/whatever/.internal/[a-z0-9]+/test.whl"), artifact.Files[0].Libraries[0].Whl)
}

View File

@ -0,0 +1,69 @@
package bundle
import (
"context"
"os"
"path"
"path/filepath"
"regexp"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/artifacts"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/internal"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/stretchr/testify/require"
)
func touchEmptyFile(t *testing.T, path string) {
err := os.MkdirAll(filepath.Dir(path), 0700)
require.NoError(t, err)
f, err := os.Create(path)
require.NoError(t, err)
f.Close()
}
func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) {
t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV"))
dir := t.TempDir()
whlPath := filepath.Join(dir, "dist", "test.whl")
touchEmptyFile(t, whlPath)
artifact := &config.Artifact{
Type: "whl",
Files: []config.ArtifactFile{
{
Source: whlPath,
Libraries: []*compute.Library{
{Whl: "dist\\test.whl"},
},
},
},
}
w := databricks.Must(databricks.NewWorkspaceClient())
wsDir := internal.TemporaryWorkspaceDir(t, w)
b := &bundle.Bundle{
Config: config.Root{
Path: dir,
Bundle: config.Bundle{
Target: "whatever",
},
Workspace: config.Workspace{
ArtifactsPath: wsDir,
},
Artifacts: config.Artifacts{
"test": artifact,
},
},
}
err := bundle.Apply(context.Background(), b, artifacts.BasicUpload("test"))
require.NoError(t, err)
require.Regexp(t, regexp.MustCompile(path.Join(wsDir, ".internal/[a-z0-9]+/test.whl")), artifact.Files[0].RemotePath)
require.Regexp(t, regexp.MustCompile(path.Join("/Workspace", wsDir, ".internal/[a-z0-9]+/test.whl")), artifact.Files[0].Libraries[0].Whl)
}