diff --git a/bundle/artifacts/artifacts.go b/bundle/artifacts/artifacts.go index e703668e..dd261d3b 100644 --- a/bundle/artifacts/artifacts.go +++ b/bundle/artifacts/artifacts.go @@ -3,7 +3,6 @@ package artifacts import ( "bytes" "context" - "crypto/sha256" "errors" "fmt" "os" @@ -62,13 +61,13 @@ func (m *basicBuild) Apply(ctx context.Context, b *bundle.Bundle) error { return fmt.Errorf("artifact doesn't exist: %s", m.name) } - cmdio.LogString(ctx, fmt.Sprintf("artifacts.Build(%s): Building...", m.name)) + cmdio.LogString(ctx, fmt.Sprintf("Building %s...", m.name)) out, err := artifact.Build(ctx) if err != nil { - return fmt.Errorf("artifacts.Build(%s): %w, output: %s", m.name, err, out) + return fmt.Errorf("build for %s failed, error: %w, output: %s", m.name, err, out) } - cmdio.LogString(ctx, fmt.Sprintf("artifacts.Build(%s): Build succeeded", m.name)) + cmdio.LogString(ctx, "Build succeeded") return nil } @@ -108,7 +107,7 @@ func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error { err = uploadArtifact(ctx, artifact, uploadPath, client) if err != nil { - return fmt.Errorf("artifacts.Upload(%s): %w", m.name, err) + return fmt.Errorf("upload for %s failed, error: %w", m.name, err) } return nil @@ -119,15 +118,14 @@ func uploadArtifact(ctx context.Context, a *config.Artifact, uploadPath string, f := &a.Files[i] if f.NeedsUpload() { filename := filepath.Base(f.Source) - cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Uploading...", filename)) + cmdio.LogString(ctx, fmt.Sprintf("Uploading %s...", filename)) - remotePath, err := uploadArtifactFile(ctx, f.Source, uploadPath, client) + err := uploadArtifactFile(ctx, f.Source, client) if err != nil { return err } - cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Upload succeeded", filename)) - - f.RemotePath = remotePath + cmdio.LogString(ctx, "Upload succeeded") + f.RemotePath = path.Join(uploadPath, filepath.Base(f.Source)) } } @@ -136,27 +134,19 @@ func uploadArtifact(ctx context.Context, a *config.Artifact, uploadPath string, } // Function to upload artifact file to Workspace -func uploadArtifactFile(ctx context.Context, file string, uploadPath string, client filer.Filer) (string, error) { +func uploadArtifactFile(ctx context.Context, file string, client filer.Filer) error { raw, err := os.ReadFile(file) if err != nil { - return "", fmt.Errorf("unable to read %s: %w", file, errors.Unwrap(err)) + return fmt.Errorf("unable to read %s: %w", file, errors.Unwrap(err)) } - fileHash := sha256.Sum256(raw) - relPath := path.Join(fmt.Sprintf("%x", fileHash), filepath.Base(file)) - remotePath := path.Join(uploadPath, relPath) - - err = client.Mkdir(ctx, path.Dir(relPath)) + filename := filepath.Base(file) + err = client.Write(ctx, filename, bytes.NewReader(raw), filer.OverwriteIfExists, filer.CreateParentDirectories) if err != nil { - return "", fmt.Errorf("unable to import %s: %w", remotePath, err) + return fmt.Errorf("unable to import %s: %w", filename, err) } - err = client.Write(ctx, relPath, bytes.NewReader(raw), filer.OverwriteIfExists, filer.CreateParentDirectories) - if err != nil { - return "", fmt.Errorf("unable to import %s: %w", remotePath, err) - } - - return remotePath, nil + return nil } func getUploadBasePath(b *bundle.Bundle) (string, error) { diff --git a/bundle/artifacts/whl/autodetect.go b/bundle/artifacts/whl/autodetect.go index 29031e86..7c1c59d4 100644 --- a/bundle/artifacts/whl/autodetect.go +++ b/bundle/artifacts/whl/autodetect.go @@ -32,17 +32,17 @@ func (m *detectPkg) Apply(ctx context.Context, b *bundle.Bundle) error { log.Infof(ctx, "No local wheel tasks in databricks.yml config, skipping auto detect") return nil } - cmdio.LogString(ctx, "artifacts.whl.AutoDetect: Detecting Python wheel project...") + cmdio.LogString(ctx, "Detecting Python wheel project...") // checking if there is setup.py in the bundle root setupPy := filepath.Join(b.Config.Path, "setup.py") _, err := os.Stat(setupPy) if err != nil { - cmdio.LogString(ctx, "artifacts.whl.AutoDetect: No Python wheel project found at bundle root folder") + cmdio.LogString(ctx, "No Python wheel project found at bundle root folder") return nil } - cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.AutoDetect: Found Python wheel project at %s", b.Config.Path)) + cmdio.LogString(ctx, fmt.Sprintf("Found Python wheel project at %s", b.Config.Path)) module := extractModuleName(setupPy) if b.Config.Artifacts == nil { diff --git a/bundle/artifacts/whl/build.go b/bundle/artifacts/whl/build.go index 6ebc925f..c1e7e8fa 100644 --- a/bundle/artifacts/whl/build.go +++ b/bundle/artifacts/whl/build.go @@ -32,7 +32,7 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) error { return fmt.Errorf("artifact doesn't exist: %s", m.name) } - cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.Build(%s): Building...", m.name)) + cmdio.LogString(ctx, fmt.Sprintf("Building %s...", m.name)) dir := artifact.Path @@ -42,13 +42,13 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) error { out, err := artifact.Build(ctx) if err != nil { - return fmt.Errorf("artifacts.whl.Build(%s): Failed %w, output: %s", m.name, err, out) + return fmt.Errorf("build failed %s, error: %w, output: %s", m.name, err, out) } - cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.Build(%s): Build succeeded", m.name)) + cmdio.LogString(ctx, "Build succeeded") wheels := python.FindFilesWithSuffixInPath(distPath, ".whl") if len(wheels) == 0 { - return fmt.Errorf("artifacts.whl.Build(%s): cannot find built wheel in %s", m.name, dir) + return fmt.Errorf("cannot find built wheel in %s for package %s", dir, m.name) } for _, wheel := range wheels { artifact.Files = append(artifact.Files, config.ArtifactFile{ diff --git a/bundle/artifacts/whl/infer.go b/bundle/artifacts/whl/infer.go index 1c0e9857..dedecc30 100644 --- a/bundle/artifacts/whl/infer.go +++ b/bundle/artifacts/whl/infer.go @@ -18,6 +18,21 @@ func (m *infer) Apply(ctx context.Context, b *bundle.Bundle) error { if err != nil { return err } + + // Note: using --build-number (build tag) flag does not help with re-installing + // libraries on all-purpose clusters. The reason is that `pip` ignoring build tag + // when upgrading the library and only look at wheel version. + // Build tag is only used for sorting the versions and the one with higher build tag takes priority when installed. + // It only works if no library is installed + // See https://github.com/pypa/pip/blob/a15dd75d98884c94a77d349b800c7c755d8c34e4/src/pip/_internal/index/package_finder.py#L522-L556 + // https://github.com/pypa/pip/issues/4781 + // + // Thus, the only way to reinstall the library on all-purpose cluster is to increase wheel version manually or + // use automatic version generation, f.e. + // setup( + // version=datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S"), + // ... + //) artifact.BuildCommand = fmt.Sprintf("%s setup.py bdist_wheel", py) return nil diff --git a/internal/bundle/artifacts_test.go b/internal/bundle/artifacts_test.go index 689a4b4b..71f91fde 100644 --- a/internal/bundle/artifacts_test.go +++ b/internal/bundle/artifacts_test.go @@ -64,6 +64,6 @@ func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) { err := bundle.Apply(context.Background(), b, artifacts.BasicUpload("test")) require.NoError(t, err) - require.Regexp(t, regexp.MustCompile(path.Join(regexp.QuoteMeta(wsDir), `.internal/[a-z0-9]+/test\.whl`)), artifact.Files[0].RemotePath) - require.Regexp(t, regexp.MustCompile(path.Join("/Workspace", regexp.QuoteMeta(wsDir), `.internal/[a-z0-9]+/test\.whl`)), artifact.Files[0].Libraries[0].Whl) + require.Regexp(t, regexp.MustCompile(path.Join(regexp.QuoteMeta(wsDir), `.internal/test\.whl`)), artifact.Files[0].RemotePath) + require.Regexp(t, regexp.MustCompile(path.Join("/Workspace", regexp.QuoteMeta(wsDir), `.internal/test\.whl`)), artifact.Files[0].Libraries[0].Whl) }