mirror of https://github.com/databricks/cli.git
Support Python wheels larger than 10MB (#879)
## Changes Previously we only supported uploading Python wheels smaller than 10mb due to using Workspace.Import API and `content ` field https://docs.databricks.com/api/workspace/workspace/import By switching to use `WorkspaceFilesClient` we overcome the limit because it uses POST body for the API instead. ## Tests `TestAccUploadArtifactFileToCorrectRemotePath` integration test passes ``` === RUN TestAccUploadArtifactFileToCorrectRemotePath artifacts_test.go:28: gcp 2023/10/17 15:24:04 INFO Using Google Credentials sdk=true helpers.go:356: Creating /Users/.../integration-test-wsfs-ekggbkcfdkid artifacts.Upload(test.whl): Uploading... 2023/10/17 15:24:06 INFO Using Google Credentials mutator=artifacts.Upload(test) sdk=true artifacts.Upload(test.whl): Upload succeeded helpers.go:362: Removing /Users/.../integration-test-wsfs-ekggbkcfdkid --- PASS: TestAccUploadArtifactFileToCorrectRemotePath (5.66s) PASS coverage: 14.9% of statements in ./... ok github.com/databricks/cli/internal 6.109s coverage: 14.9% of statements in ./... ```
This commit is contained in:
parent
1b992c0c1c
commit
5273d0c51a
|
@ -1,9 +1,9 @@
|
|||
package artifacts
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
|
@ -14,7 +14,7 @@ import (
|
|||
"github.com/databricks/cli/bundle/artifacts/whl"
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
"github.com/databricks/cli/libs/cmdio"
|
||||
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||
"github.com/databricks/cli/libs/filer"
|
||||
)
|
||||
|
||||
type mutatorFactory = func(name string) bundle.Mutator
|
||||
|
@ -83,7 +83,7 @@ func BasicUpload(name string) bundle.Mutator {
|
|||
}
|
||||
|
||||
func (m *basicUpload) Name() string {
|
||||
return fmt.Sprintf("artifacts.Build(%s)", m.name)
|
||||
return fmt.Sprintf("artifacts.Upload(%s)", m.name)
|
||||
}
|
||||
|
||||
func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||
|
@ -96,7 +96,17 @@ func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
|
|||
return fmt.Errorf("artifact source is not configured: %s", m.name)
|
||||
}
|
||||
|
||||
err := uploadArtifact(ctx, artifact, b)
|
||||
uploadPath, err := getUploadBasePath(b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
client, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(), uploadPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = uploadArtifact(ctx, artifact, uploadPath, client)
|
||||
if err != nil {
|
||||
return fmt.Errorf("artifacts.Upload(%s): %w", m.name, err)
|
||||
}
|
||||
|
@ -104,13 +114,14 @@ func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) error {
|
||||
func uploadArtifact(ctx context.Context, a *config.Artifact, uploadPath string, client filer.Filer) error {
|
||||
for i := range a.Files {
|
||||
f := &a.Files[i]
|
||||
if f.NeedsUpload() {
|
||||
filename := filepath.Base(f.Source)
|
||||
cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Uploading...", filename))
|
||||
remotePath, err := uploadArtifactFile(ctx, f.Source, b)
|
||||
|
||||
remotePath, err := uploadArtifactFile(ctx, f.Source, uploadPath, client)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -125,32 +136,22 @@ func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) e
|
|||
}
|
||||
|
||||
// Function to upload artifact file to Workspace
|
||||
func uploadArtifactFile(ctx context.Context, file string, b *bundle.Bundle) (string, error) {
|
||||
func uploadArtifactFile(ctx context.Context, file string, uploadPath string, client filer.Filer) (string, error) {
|
||||
raw, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to read %s: %w", file, errors.Unwrap(err))
|
||||
}
|
||||
|
||||
uploadPath, err := getUploadBasePath(b)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
fileHash := sha256.Sum256(raw)
|
||||
remotePath := path.Join(uploadPath, fmt.Sprintf("%x", fileHash), filepath.Base(file))
|
||||
// Make sure target directory exists.
|
||||
err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, path.Dir(remotePath))
|
||||
relPath := path.Join(fmt.Sprintf("%x", fileHash), filepath.Base(file))
|
||||
remotePath := path.Join(uploadPath, relPath)
|
||||
|
||||
err = client.Mkdir(ctx, path.Dir(relPath))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to create directory for %s: %w", remotePath, err)
|
||||
return "", fmt.Errorf("unable to import %s: %w", remotePath, err)
|
||||
}
|
||||
|
||||
// Import to workspace.
|
||||
err = b.WorkspaceClient().Workspace.Import(ctx, workspace.Import{
|
||||
Path: remotePath,
|
||||
Overwrite: true,
|
||||
Format: workspace.ImportFormatAuto,
|
||||
Content: base64.StdEncoding.EncodeToString(raw),
|
||||
})
|
||||
err = client.Write(ctx, relPath, bytes.NewReader(raw), filer.OverwriteIfExists, filer.CreateParentDirectories)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to import %s: %w", remotePath, err)
|
||||
}
|
||||
|
|
|
@ -1,123 +0,0 @@
|
|||
package artifacts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func touchEmptyFile(t *testing.T, path string) {
|
||||
err := os.MkdirAll(filepath.Dir(path), 0700)
|
||||
require.NoError(t, err)
|
||||
f, err := os.Create(path)
|
||||
require.NoError(t, err)
|
||||
f.Close()
|
||||
}
|
||||
|
||||
type MockWorkspaceService struct {
|
||||
}
|
||||
|
||||
// Delete implements workspace.WorkspaceService.
|
||||
func (MockWorkspaceService) Delete(ctx context.Context, request workspace.Delete) error {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Export implements workspace.WorkspaceService.
|
||||
func (MockWorkspaceService) Export(ctx context.Context, request workspace.ExportRequest) (*workspace.ExportResponse, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// GetStatus implements workspace.WorkspaceService.
|
||||
func (MockWorkspaceService) GetStatus(ctx context.Context, request workspace.GetStatusRequest) (*workspace.ObjectInfo, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Import implements workspace.WorkspaceService.
|
||||
func (MockWorkspaceService) Import(ctx context.Context, request workspace.Import) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// List implements workspace.WorkspaceService.
|
||||
func (MockWorkspaceService) List(ctx context.Context, request workspace.ListWorkspaceRequest) (*workspace.ListResponse, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Mkdirs implements workspace.WorkspaceService.
|
||||
func (MockWorkspaceService) Mkdirs(ctx context.Context, request workspace.Mkdirs) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetPermissionLevels implements workspace.WorkspaceService.
|
||||
func (MockWorkspaceService) GetPermissionLevels(
|
||||
ctx context.Context,
|
||||
request workspace.GetWorkspaceObjectPermissionLevelsRequest,
|
||||
) (*workspace.GetWorkspaceObjectPermissionLevelsResponse, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// GetPermissions implements workspace.WorkspaceService.
|
||||
func (MockWorkspaceService) GetPermissions(
|
||||
ctx context.Context,
|
||||
request workspace.GetWorkspaceObjectPermissionsRequest,
|
||||
) (*workspace.WorkspaceObjectPermissions, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// SetPermissions implements workspace.WorkspaceService.
|
||||
func (MockWorkspaceService) SetPermissions(
|
||||
ctx context.Context,
|
||||
request workspace.WorkspaceObjectPermissionsRequest,
|
||||
) (*workspace.WorkspaceObjectPermissions, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// UpdatePermissions implements workspace.WorkspaceService.
|
||||
func (MockWorkspaceService) UpdatePermissions(
|
||||
ctx context.Context,
|
||||
request workspace.WorkspaceObjectPermissionsRequest,
|
||||
) (*workspace.WorkspaceObjectPermissions, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
func TestUploadArtifactFileToCorrectRemotePath(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
whlPath := filepath.Join(dir, "dist", "test.whl")
|
||||
touchEmptyFile(t, whlPath)
|
||||
b := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Path: dir,
|
||||
Bundle: config.Bundle{
|
||||
Target: "whatever",
|
||||
},
|
||||
Workspace: config.Workspace{
|
||||
ArtifactsPath: "/Users/test@databricks.com/whatever",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
b.WorkspaceClient().Workspace.WithImpl(MockWorkspaceService{})
|
||||
artifact := &config.Artifact{
|
||||
Type: "whl",
|
||||
Files: []config.ArtifactFile{
|
||||
{
|
||||
Source: whlPath,
|
||||
Libraries: []*compute.Library{
|
||||
{Whl: "dist\\test.whl"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err := uploadArtifact(context.Background(), artifact, b)
|
||||
require.NoError(t, err)
|
||||
require.Regexp(t, regexp.MustCompile("/Users/test@databricks.com/whatever/.internal/[a-z0-9]+/test.whl"), artifact.Files[0].RemotePath)
|
||||
require.Regexp(t, regexp.MustCompile("/Workspace/Users/test@databricks.com/whatever/.internal/[a-z0-9]+/test.whl"), artifact.Files[0].Libraries[0].Whl)
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package bundle
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/artifacts"
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
"github.com/databricks/cli/internal"
|
||||
"github.com/databricks/databricks-sdk-go"
|
||||
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func touchEmptyFile(t *testing.T, path string) {
|
||||
err := os.MkdirAll(filepath.Dir(path), 0700)
|
||||
require.NoError(t, err)
|
||||
f, err := os.Create(path)
|
||||
require.NoError(t, err)
|
||||
f.Close()
|
||||
}
|
||||
|
||||
func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) {
|
||||
t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV"))
|
||||
|
||||
dir := t.TempDir()
|
||||
whlPath := filepath.Join(dir, "dist", "test.whl")
|
||||
touchEmptyFile(t, whlPath)
|
||||
|
||||
artifact := &config.Artifact{
|
||||
Type: "whl",
|
||||
Files: []config.ArtifactFile{
|
||||
{
|
||||
Source: whlPath,
|
||||
Libraries: []*compute.Library{
|
||||
{Whl: "dist\\test.whl"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
w := databricks.Must(databricks.NewWorkspaceClient())
|
||||
wsDir := internal.TemporaryWorkspaceDir(t, w)
|
||||
|
||||
b := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Path: dir,
|
||||
Bundle: config.Bundle{
|
||||
Target: "whatever",
|
||||
},
|
||||
Workspace: config.Workspace{
|
||||
ArtifactsPath: wsDir,
|
||||
},
|
||||
Artifacts: config.Artifacts{
|
||||
"test": artifact,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err := bundle.Apply(context.Background(), b, artifacts.BasicUpload("test"))
|
||||
require.NoError(t, err)
|
||||
require.Regexp(t, regexp.MustCompile(path.Join(wsDir, ".internal/[a-z0-9]+/test.whl")), artifact.Files[0].RemotePath)
|
||||
require.Regexp(t, regexp.MustCompile(path.Join("/Workspace", wsDir, ".internal/[a-z0-9]+/test.whl")), artifact.Files[0].Libraries[0].Whl)
|
||||
}
|
Loading…
Reference in New Issue