Add libs/patchwheel: insert dynamic version in whl (#2427)

## Changes
- New library to patch existing whl with dynamic version. The version is
based on mtime of original wheel.
- New hidden command "selftest patchwhl" (for acceptance test).

## Why
Part of the project to deprecate dynamic version in setup.py template
(#1034). This would enable switching default-python to pyproject.toml
without dependency on setuptools-specific sections.

The dynamic version is needed to ensure that wheels are picked up by the
users code when doing development.

## Tests
- New unit tests that use uv to create envs on various versions and
install (and re-install) patched wheels there.
- New acceptance test that patches prebuilt whl and records the result
fully unzipped.
- New acceptance test helper setmtime.py to set mtime with nanosecond
precision cross-platform.
This commit is contained in:
Denis Bilenko 2025-03-06 11:28:30 +01:00 committed by GitHub
parent c38efaa5c4
commit edf37e7d0d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 1000 additions and 0 deletions

17
acceptance/bin/setmtime.py Executable file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env python3
"""
Cross-platform set mtime with nanosecond precision.
Usage: setmtime.py <timestamp> <filenames>
"""
import sys
import os
import datetime
timestamp = sys.argv[1]
ts, ns = timestamp.split(".")
dt = datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S").replace(tzinfo=datetime.timezone.utc)
ns = int(ns.ljust(9, "0"))
ts = int(dt.timestamp()) * 10**9 + ns
for filename in sys.argv[2:]:
os.utime(filename, ns=(ts, ts))

View File

@ -0,0 +1,32 @@
=== Test prebuilt wheel:
>>> setmtime.py 2025-03-05 15:07:33.123456700 my_test_code-0.0.1-py3-none-any.whl
>>> [CLI] selftest patchwhl my_test_code-0.0.1-py3-none-any.whl
Warn: Patched whl: my_test_code-0.0.1-py3-none-any.whl -> my_test_code-0.0.1+1741187253123456700-py3-none-any.whl
>>> diff.py original output
--- original/my_test_code-0.0.1+1741187253123456700.dist-info/METADATA
+++ output/my_test_code-0.0.1+1741187253123456700.dist-info/METADATA
@@ -1,5 +1,5 @@
Metadata-Version: 2.1
Name: my-test-code
-Version: 0.0.1
+Version: 0.0.1+1741187253123456700
Summary: my test wheel
Home-page: https://databricks.com
--- original/my_test_code-0.0.1+1741187253123456700.dist-info/RECORD
+++ output/my_test_code-0.0.1+1741187253123456700.dist-info/RECORD
@@ -1,7 +1,7 @@
src/__init__.py,sha256=BRmKeYehopKv4NG_SFa7t6wn248RrPHJivu7DM1R-Rw,48
src/__main__.py,sha256=8TtsnLsaJEM35Y4L8ocrv-qfxusgYpRL2HPyYiabHng,242
-my_test_code-0.0.1.dist-info/METADATA,sha256=6fyVq4hexXGUP_J2mB1CI-ijZ6CenvKNIlHx0bKPRJ0,197
-my_test_code-0.0.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-my_test_code-0.0.1.dist-info/entry_points.txt,sha256=oDWOW9SsBlk4Uejj1ftYPBxfhJ5ZJctb4JOUIG1rc-4,34
-my_test_code-0.0.1.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
-my_test_code-0.0.1.dist-info/RECORD,,
+my_test_code-0.0.1+1741187253123456700.dist-info/METADATA,sha256=H99P2vEwB_hBVPNtPwsXZotaDQzmWEGeSlOtMzWe62U,217
+my_test_code-0.0.1+1741187253123456700.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+my_test_code-0.0.1+1741187253123456700.dist-info/entry_points.txt,sha256=oDWOW9SsBlk4Uejj1ftYPBxfhJ5ZJctb4JOUIG1rc-4,34
+my_test_code-0.0.1+1741187253123456700.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
+my_test_code-0.0.1+1741187253123456700.dist-info/RECORD,,

View File

@ -0,0 +1,9 @@
Metadata-Version: 2.1
Name: my-test-code
Version: 0.0.1+1741187253123456700
Summary: my test wheel
Home-page: https://databricks.com
Author: Databricks
Author-email: john.doe@databricks.com
Requires-Dist: setuptools

View File

@ -0,0 +1,7 @@
src/__init__.py,sha256=BRmKeYehopKv4NG_SFa7t6wn248RrPHJivu7DM1R-Rw,48
src/__main__.py,sha256=8TtsnLsaJEM35Y4L8ocrv-qfxusgYpRL2HPyYiabHng,242
my_test_code-0.0.1+1741187253123456700.dist-info/METADATA,sha256=H99P2vEwB_hBVPNtPwsXZotaDQzmWEGeSlOtMzWe62U,217
my_test_code-0.0.1+1741187253123456700.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
my_test_code-0.0.1+1741187253123456700.dist-info/entry_points.txt,sha256=oDWOW9SsBlk4Uejj1ftYPBxfhJ5ZJctb4JOUIG1rc-4,34
my_test_code-0.0.1+1741187253123456700.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
my_test_code-0.0.1+1741187253123456700.dist-info/RECORD,,

View File

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.42.0)
Root-Is-Purelib: true
Tag: py3-none-any

View File

@ -0,0 +1,2 @@
[group_1]
run = src.__main__:main

View File

@ -0,0 +1,2 @@
__version__ = "0.0.1"
__author__ = "Databricks"

View File

@ -0,0 +1,16 @@
"""
The entry point of the Python Wheel
"""
import sys
def main():
# This method will print the provided arguments
print('Hello from my func')
print('Got arguments:')
print(sys.argv)
if __name__ == '__main__':
main()

View File

@ -0,0 +1 @@
exclude = ["output"]

View File

@ -0,0 +1,14 @@
title "Test prebuilt wheel:"
trace setmtime.py "2025-03-05 15:07:33.123456700" my_test_code-0.0.1-py3-none-any.whl
trace $CLI selftest patchwhl my_test_code-0.0.1-py3-none-any.whl
mkdir output original
unzip -q my_test_code-0.0.1+1*-py3-none-any.whl -d output
unzip -q my_test_code-0.0.1-py3-none-any.whl -d original
rm my_test_code-0.0.1+1*-py3-none-any.whl
# rename directory to match so that we can compare contents
mv original/my_test_code-0.0.1.dist-info original/my_test_code-0.0.1+1741187253123456700.dist-info
trace diff.py original output
rm -fr original

24
cmd/selftest/patchwhl.go Normal file
View File

@ -0,0 +1,24 @@
package selftest
import (
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/patchwheel"
"github.com/spf13/cobra"
)
func newPatchWhl() *cobra.Command {
return &cobra.Command{
Use: "patchwhl",
Run: func(cmd *cobra.Command, args []string) {
ctx := cmd.Context()
for _, arg := range args {
out, err := patchwheel.PatchWheel(ctx, arg, ".")
if err != nil {
log.Warnf(ctx, "Failed to patch whl: %s: %s", arg, err)
} else {
log.Warnf(ctx, "Patched whl: %s -> %s", arg, out)
}
}
},
}
}

View File

@ -12,5 +12,6 @@ func New() *cobra.Command {
} }
cmd.AddCommand(newPanic()) cmd.AddCommand(newPanic())
cmd.AddCommand(newPatchWhl())
return cmd return cmd
} }

64
libs/patchwheel/parse.go Normal file
View File

@ -0,0 +1,64 @@
package patchwheel
import (
"fmt"
"strconv"
"strings"
"time"
)
// WheelInfo contains information extracted from a wheel filename
type WheelInfo struct {
Distribution string // Package distribution name
Version string // Package version
Tags []string // [optional build tag,] python_tag, abi_tag, platform_tag
}
// calculateNewVersion generates a new version string and filename based on the wheel info and modification time.
// The version is updated according to the following rules:
// - if there is an existing part after + it is dropped
// - append +<mtime of the original wheel> to version
//
// Example version transform: "1.2.3" -> "1.2.3+1741091696780123321"
func calculateNewVersion(info WheelInfo, mtime time.Time) (newVersion, newFilename string) {
baseVersion, _, _ := strings.Cut(info.Version, "+")
newVersion = baseVersion + "+" + strconv.FormatInt(mtime.UnixNano(), 10)
newFilename = fmt.Sprintf("%s-%s-%s.whl",
info.Distribution,
newVersion,
strings.Join(info.Tags, "-"))
return newVersion, newFilename
}
// ParseWheelFilename parses a wheel filename and extracts its components.
// Wheel filenames follow the pattern: {distribution}-{version}(-{build tag})?-{python_tag}-{abi_tag}-{platform_tag}.whl
// https://peps.python.org/pep-0491
// The function does not try hard to validate if the format is correct, it tries to parse whatever is available.
// However, it does require 5 or 6 components in the filename.
func ParseWheelFilename(filename string) (WheelInfo, error) {
parts := strings.Split(filename, "-")
if len(parts) < 5 {
return WheelInfo{}, fmt.Errorf("invalid wheel filename format: not enough parts in %s", filename)
}
if len(parts) > 6 {
return WheelInfo{}, fmt.Errorf("invalid wheel filename format: too many parts in %s", filename)
}
trimmedLastTag, foundWhl := strings.CutSuffix(parts[len(parts)-1], ".whl")
if !foundWhl {
return WheelInfo{}, fmt.Errorf("invalid wheel filename format: missing .whl extension in %s", filename)
}
parts[len(parts)-1] = trimmedLastTag
return WheelInfo{
Distribution: parts[0],
Version: parts[1],
Tags: parts[2:],
}, nil
}

View File

@ -0,0 +1,297 @@
package patchwheel
import (
"testing"
"time"
"github.com/stretchr/testify/require"
)
func TestCalculateNewVersion(t *testing.T) {
tests := []struct {
name string
info WheelInfo
mtime time.Time
expectedVersion string
expectedFilename string
}{
{
name: "basic version",
info: WheelInfo{
Distribution: "mypkg",
Version: "1.2.3",
Tags: []string{"py3", "none", "any"},
},
mtime: time.Date(2025, 3, 4, 12, 34, 56, 780_123_321, time.UTC),
expectedVersion: "1.2.3+1741091696780123321",
expectedFilename: "mypkg-1.2.3+1741091696780123321-py3-none-any.whl",
},
{
name: "existing plus version",
info: WheelInfo{
Distribution: "mypkg",
Version: "1.2.3+local",
Tags: []string{"py3", "none", "any"},
},
mtime: time.Date(2025, 3, 4, 12, 34, 56, 100_000_005, time.UTC),
expectedVersion: "1.2.3+1741091696100000005",
expectedFilename: "mypkg-1.2.3+1741091696100000005-py3-none-any.whl",
},
{
name: "complex distribution name",
info: WheelInfo{
Distribution: "my-pkg-name",
Version: "1.2.3",
Tags: []string{"py3", "none", "any"},
},
mtime: time.Date(2025, 3, 4, 12, 34, 56, 0, time.UTC),
expectedVersion: "1.2.3+1741091696000000000",
expectedFilename: "my-pkg-name-1.2.3+1741091696000000000-py3-none-any.whl",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
newVersion, newFilename := calculateNewVersion(tt.info, tt.mtime)
if newVersion != tt.expectedVersion {
t.Errorf("expected version %s, got %s", tt.expectedVersion, newVersion)
}
if newFilename != tt.expectedFilename {
t.Errorf("expected filename %s, got %s", tt.expectedFilename, newFilename)
}
})
}
}
func TestParseWheelFilename(t *testing.T) {
tests := []struct {
filename string
wantDistribution string
wantVersion string
wantTags []string
wantErr bool
}{
{
filename: "myproj-0.1.0-py3-none-any.whl",
wantDistribution: "myproj",
wantVersion: "0.1.0",
wantTags: []string{"py3", "none", "any"},
wantErr: false,
},
{
filename: "myproj-0.1.0+20240303123456-py3-none-any.whl",
wantDistribution: "myproj",
wantVersion: "0.1.0+20240303123456",
wantTags: []string{"py3", "none", "any"},
wantErr: false,
},
{
filename: "my_proj_with_parts-0.1.0-py3-none-any.whl",
wantDistribution: "my_proj_with_parts",
wantVersion: "0.1.0",
wantTags: []string{"py3", "none", "any"},
wantErr: false,
},
// Test cases adapted from wheelodex/wheel-filename
// https://github.com/wheelodex/wheel-filename/blob/f5a72b560d016cc9663c8b5a094c96dc338a2209/test/test_parse.py
// MIT License: https://github.com/wheelodex/wheel-filename/blob/f5a72b560d016cc9663c8b5a094c96dc338a2209/LICENSE
{
filename: "astrocats-0.3.2-universal-none-any.whl",
wantDistribution: "astrocats",
wantVersion: "0.3.2",
wantTags: []string{"universal", "none", "any"},
wantErr: false,
},
{
filename: "bencoder.pyx-1.1.2-pp226-pp226-win32.whl",
wantDistribution: "bencoder.pyx",
wantVersion: "1.1.2",
wantTags: []string{"pp226", "pp226", "win32"},
wantErr: false,
},
{
filename: "brotlipy-0.1.2-pp27-none-macosx_10_10_x86_64.whl",
wantDistribution: "brotlipy",
wantVersion: "0.1.2",
wantTags: []string{"pp27", "none", "macosx_10_10_x86_64"},
wantErr: false,
},
{
filename: "brotlipy-0.3.0-pp226-pp226u-macosx_10_10_x86_64.whl",
wantDistribution: "brotlipy",
wantVersion: "0.3.0",
wantTags: []string{"pp226", "pp226u", "macosx_10_10_x86_64"},
wantErr: false,
},
{
filename: "carbonara_archinfo-7.7.9.14.post1-py2-none-any.whl",
wantDistribution: "carbonara_archinfo",
wantVersion: "7.7.9.14.post1",
wantTags: []string{"py2", "none", "any"},
wantErr: false,
},
{
filename: "coremltools-0.3.0-py2.7-none-any.whl",
wantDistribution: "coremltools",
wantVersion: "0.3.0",
wantTags: []string{"py2.7", "none", "any"},
wantErr: false,
},
{
filename: "cvxopt-1.2.0-001-cp34-cp34m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
wantDistribution: "cvxopt",
wantVersion: "1.2.0",
wantTags: []string{"001", "cp34", "cp34m", "macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64"},
wantErr: false,
},
{
filename: "django_mbrowse-0.0.1-10-py2-none-any.whl",
wantDistribution: "django_mbrowse",
wantVersion: "0.0.1",
wantTags: []string{"10", "py2", "none", "any"},
wantErr: false,
},
{
filename: "efilter-1!1.2-py2-none-any.whl",
wantDistribution: "efilter",
wantVersion: "1!1.2",
wantTags: []string{"py2", "none", "any"},
wantErr: false,
},
{
filename: "line.sep-0.2.0.dev1-py2.py3-none-any.whl",
wantDistribution: "line.sep",
wantVersion: "0.2.0.dev1",
wantTags: []string{"py2.py3", "none", "any"},
wantErr: false,
},
{
filename: "mayan_edms-1.1.0-1502100955-py2-none-any.whl",
wantDistribution: "mayan_edms",
wantVersion: "1.1.0",
wantTags: []string{"1502100955", "py2", "none", "any"},
wantErr: false,
},
{
filename: "mxnet_model_server-1.0a5-20180816-py2.py3-none-any.whl",
wantDistribution: "mxnet_model_server",
wantVersion: "1.0a5",
wantTags: []string{"20180816", "py2.py3", "none", "any"},
wantErr: false,
},
{
filename: "pip-18.0-py2.py3-none-any.whl",
wantDistribution: "pip",
wantVersion: "18.0",
wantTags: []string{"py2.py3", "none", "any"},
wantErr: false,
},
{
filename: "polarTransform-2-1.0.0-py3-none-any.whl",
wantDistribution: "polarTransform",
wantVersion: "2",
wantTags: []string{"1.0.0", "py3", "none", "any"},
wantErr: false,
},
{
filename: "psycopg2-2.7.5-cp37-cp37m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl",
wantDistribution: "psycopg2",
wantVersion: "2.7.5",
wantTags: []string{"cp37", "cp37m", "macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64"},
wantErr: false,
},
{
filename: "pyinterval-1.0.0-0-cp27-none-win32.whl",
wantDistribution: "pyinterval",
wantVersion: "1.0.0",
wantTags: []string{"0", "cp27", "none", "win32"},
wantErr: false,
},
{
filename: "pypi_simple-0.1.0.dev1-py2.py3-none-any.whl",
wantDistribution: "pypi_simple",
wantVersion: "0.1.0.dev1",
wantTags: []string{"py2.py3", "none", "any"},
wantErr: false,
},
{
filename: "PyQt3D-5.7.1-5.7.1-cp34.cp35.cp36-abi3-macosx_10_6_intel.whl",
wantDistribution: "PyQt3D",
wantVersion: "5.7.1",
wantTags: []string{"5.7.1", "cp34.cp35.cp36", "abi3", "macosx_10_6_intel"},
wantErr: false,
},
{
filename: "qypi-0.4.1-py3-none-any.whl",
wantDistribution: "qypi",
wantVersion: "0.4.1",
wantTags: []string{"py3", "none", "any"},
wantErr: false,
},
{
filename: "SimpleSteem-1.1.9-3.0-none-any.whl",
wantDistribution: "SimpleSteem",
wantVersion: "1.1.9",
wantTags: []string{"3.0", "none", "any"},
wantErr: false,
},
{
filename: "simple_workflow-0.1.47-pypy-none-any.whl",
wantDistribution: "simple_workflow",
wantVersion: "0.1.47",
wantTags: []string{"pypy", "none", "any"},
wantErr: false,
},
{
filename: "tables-3.4.2-3-cp27-cp27m-manylinux1_i686.whl",
wantDistribution: "tables",
wantVersion: "3.4.2",
wantTags: []string{"3", "cp27", "cp27m", "manylinux1_i686"},
wantErr: false,
},
{
filename: "invalid-filename.txt",
wantDistribution: "",
wantVersion: "",
wantTags: nil,
wantErr: true,
},
{
filename: "not-enough-parts-py3.whl",
wantDistribution: "",
wantVersion: "",
wantTags: nil,
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.filename, func(t *testing.T) {
info, err := ParseWheelFilename(tt.filename)
if tt.wantErr {
require.Error(t, err)
} else {
require.NoError(t, err)
require.Equal(t, tt.wantDistribution, info.Distribution, "distribution mismatch")
require.Equal(t, tt.wantVersion, info.Version, "version mismatch")
require.Equal(t, tt.wantTags, info.Tags, "tags mismatch")
}
})
}
}
func TestParseError(t *testing.T) {
tests := []string{
"",
"a-b",
"a-b-c.whl",
"a-b-c-d.whl",
"a-b-c-d-e-f-g.whl",
}
for _, tt := range tests {
t.Run(tt, func(t *testing.T) {
info, err := ParseWheelFilename(tt)
require.Error(t, err, "info=%v", info)
})
}
}

268
libs/patchwheel/patch.go Normal file
View File

@ -0,0 +1,268 @@
package patchwheel
import (
"archive/zip"
"bufio"
"bytes"
"context"
"crypto/sha256"
"encoding/base64"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/databricks/cli/libs/log"
)
var versionKey []byte = []byte("Version:")
func findFile(r *zip.ReadCloser, filename string) *zip.File {
for _, f := range r.File {
if f.Name == filename {
return f
}
}
return nil
}
// patchMetadata returns new METADATA content with an updated "Version:" field and validates that previous version matches oldVersion
func patchMetadata(r io.Reader, oldVersion, newVersion string) ([]byte, error) {
scanner := bufio.NewScanner(r)
var buf bytes.Buffer
for scanner.Scan() {
line := scanner.Bytes()
if versionValue, ok := bytes.CutPrefix(line, versionKey); ok {
foundVersion := string(bytes.TrimSpace(versionValue))
if foundVersion != oldVersion {
return nil, fmt.Errorf("Unexpected version in METADATA: %s (expected %s)", strings.TrimSpace(string(line)), oldVersion)
}
buf.WriteString(string(versionKey) + " " + newVersion + "\n")
} else {
buf.Write(line)
buf.WriteString("\n")
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// patchRecord updates RECORD content: it replaces the old dist-info prefix with the new one
// in all file paths and, for the METADATA entry, updates the hash and size.
func patchRecord(r io.Reader, oldDistInfoPrefix, newDistInfoPrefix, metadataHash string, metadataSize int) ([]byte, error) {
metadataPath := newDistInfoPrefix + "METADATA"
scanner := bufio.NewScanner(r)
var buf bytes.Buffer
for scanner.Scan() {
line := scanner.Bytes()
if len(bytes.TrimSpace(line)) == 0 {
continue
}
parts := strings.Split(string(line), ",")
if len(parts) < 3 {
// If the line doesn't have enough parts, preserve it as-is
buf.Write(line)
buf.WriteString("\n")
continue
}
origPath := parts[0]
pathSuffix, hasDistPrefix := strings.CutPrefix(origPath, oldDistInfoPrefix)
if hasDistPrefix {
parts[0] = newDistInfoPrefix + pathSuffix
}
if metadataPath == parts[0] {
parts[1] = "sha256=" + metadataHash
parts[2] = strconv.Itoa(metadataSize)
}
buf.WriteString(strings.Join(parts, ",") + "\n")
}
if err := scanner.Err(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// PatchWheel reads an existing wheel file path and outputs a new one in outputDir,
// with a version modified according to the following rules:
// - if there is an existing part after + it is dropped
// - append +<mtime of the original wheel> to version
//
// All parts of wheel are modified to ensure the wheel is in correct format:
// METADATA: Version field is updated
// RECORD: METADATA entry is updated with correct hash and size
// <dist>-<version>.dist-info directory is renamed to <dist>-<newVersion>.dist-info
//
// The function is idempotent: repeated calls with the same input will produce the same output.
// If the target wheel already exists, it returns the path to the existing wheel without redoing the patching.
func PatchWheel(ctx context.Context, path, outputDir string) (string, error) {
fileInfo, err := os.Stat(path)
if err != nil {
return "", err
}
wheelMtime := fileInfo.ModTime().UTC()
filename := filepath.Base(path)
wheelInfo, err := ParseWheelFilename(filename)
if err != nil {
return "", err
}
newVersion, newFilename := calculateNewVersion(wheelInfo, wheelMtime)
outpath := filepath.Join(outputDir, newFilename)
if _, err := os.Stat(outpath); err == nil {
log.Debugf(ctx, "Skipping patching of %s, already exists: %s", path, outpath)
return outpath, nil
}
tmpFilename := outpath + fmt.Sprintf(".tmp%d", os.Getpid())
needRemoval := true
defer func() {
if needRemoval {
_ = os.Remove(tmpFilename)
}
}()
r, err := zip.OpenReader(path)
if err != nil {
return "", err
}
defer r.Close()
oldDistInfoPrefix := wheelInfo.Distribution + "-" + wheelInfo.Version + ".dist-info/"
metadataFile := findFile(r, oldDistInfoPrefix+"METADATA")
if metadataFile == nil {
return "", fmt.Errorf("wheel %s missing %sMETADATA", path, oldDistInfoPrefix)
}
recordFile := findFile(r, oldDistInfoPrefix+"RECORD")
if recordFile == nil {
return "", fmt.Errorf("wheel %s missing %sRECORD file", path, oldDistInfoPrefix)
}
metadataReader, err := metadataFile.Open()
if err != nil {
return "", err
}
defer metadataReader.Close()
newMetadata, err := patchMetadata(metadataReader, wheelInfo.Version, newVersion)
if err != nil {
return "", err
}
h := sha256.New()
h.Write(newMetadata)
metadataHash := base64.URLEncoding.WithPadding(base64.NoPadding).EncodeToString(h.Sum(nil))
metadataSize := len(newMetadata)
// Compute the new dist-info directory prefix.
newDistInfoPrefix := strings.Replace(oldDistInfoPrefix, wheelInfo.Version, newVersion, 1)
if newDistInfoPrefix == oldDistInfoPrefix {
return "", fmt.Errorf("unexpected dist-info directory format: %s (version=%s)", oldDistInfoPrefix, wheelInfo.Version)
}
recordReader, err := recordFile.Open()
if err != nil {
return "", err
}
defer recordReader.Close()
newRecord, err := patchRecord(recordReader, oldDistInfoPrefix, newDistInfoPrefix, metadataHash, metadataSize)
if err != nil {
return "", err
}
outFile, err := os.Create(tmpFilename)
if err != nil {
return "", err
}
defer outFile.Close()
metadataUpdated := 0
recordUpdated := 0
zipw := zip.NewWriter(outFile)
for _, f := range r.File {
// If the file is inside the old dist-info directory, update its name.
newName := f.Name
if strings.HasPrefix(f.Name, oldDistInfoPrefix) {
newName = newDistInfoPrefix + f.Name[len(oldDistInfoPrefix):]
}
header := &zip.FileHeader{
Name: newName,
Method: f.Method,
}
header.Modified = f.ModTime()
header.Comment = f.Comment
if f.FileInfo().IsDir() && !strings.HasSuffix(header.Name, "/") {
header.Name += "/"
}
writer, err := zipw.CreateHeader(header)
if err != nil {
return "", err
}
if f.Name == metadataFile.Name {
_, err = writer.Write(newMetadata)
if err != nil {
return "", err
}
metadataUpdated += 1
} else if f.Name == recordFile.Name {
_, err = writer.Write(newRecord)
if err != nil {
return "", err
}
recordUpdated += 1
} else {
rc, err := f.Open()
if err != nil {
return "", err
}
_, err = io.Copy(writer, rc)
if err != nil {
rc.Close()
return "", err
}
if err := rc.Close(); err != nil {
return "", err
}
}
}
if err := zipw.Close(); err != nil {
return "", err
}
outFile.Close()
if metadataUpdated != 1 {
return "", errors.New("Could not update METADATA")
}
if recordUpdated != 1 {
return "", errors.New("Could not update RECORD")
}
if err := os.Rename(tmpFilename, outpath); err != nil {
return "", err
}
needRemoval = false
return outpath, nil
}

View File

@ -0,0 +1,218 @@
package patchwheel
import (
"bytes"
"context"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
var (
scriptsDir = getPythonScriptsDir()
prebuiltWheel = "testdata/my_test_code-0.0.1-py3-none-any.whl"
emptyZip = "testdata/empty.zip"
)
func getPythonScriptsDir() string {
if runtime.GOOS == "windows" {
return "Scripts"
}
return "bin"
}
func getPythonVersions() []string {
return []string{
"python3.9",
"python3.10",
"python3.11",
"python3.12",
"python3.13",
}
}
func verifyVersion(t *testing.T, tempDir, wheelPath string) {
wheelInfo, err := ParseWheelFilename(wheelPath)
require.NoError(t, err)
expectedVersion := wheelInfo.Version
pyExec := filepath.Join(tempDir, ".venv", scriptsDir, "python")
cmdOut := captureOutput(t, tempDir, pyExec, "-c", "import myproj; myproj.print_version()")
actualVersion := strings.TrimSpace(cmdOut)
t.Logf("Verified installed version: %s", actualVersion)
assert.Equal(t, expectedVersion, actualVersion, "Installed version doesn't match expected version from wheel filename")
}
func minimalPythonProject() map[string]string {
return map[string]string{
"pyproject.toml": `[project]
name = "myproj"
version = "0.1.0"
[build-system]
requires = ["setuptools>=61.0.0", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools.packages.find]
where = ["src"]
`,
"src/myproj/__init__.py": `
def hello():
return "Hello, world!"
def print_version():
from importlib.metadata import version
print(version("myproj"))
`,
}
}
func writeProjectFiles(t *testing.T, baseDir string, files map[string]string) {
for path, content := range files {
fullPath := filepath.Join(baseDir, path)
if err := os.MkdirAll(filepath.Dir(fullPath), 0o755); err != nil {
t.Fatalf("Failed to create directory %s: %v", filepath.Dir(fullPath), err)
}
if err := os.WriteFile(fullPath, []byte(content), 0o644); err != nil {
t.Fatalf("Failed to write file %s: %v", fullPath, err)
}
}
}
func runCmd(t *testing.T, dir, name string, args ...string) {
out := captureOutput(t, dir, name, args...)
if len(out) > 0 {
t.Errorf("Output from %s %s:\n%s", name, args, out)
}
}
func captureOutput(t *testing.T, dir, name string, args ...string) string {
cmd := exec.Command(name, args...)
cmd.Dir = dir
var out bytes.Buffer
cmd.Stdout = &out
cmd.Stderr = &out
err := cmd.Run()
if err != nil {
t.Logf("Command failed: %s %s", name, strings.Join(args, " "))
t.Logf("Output:\n%s", out.String())
t.Fatal(err)
}
return out.String()
}
func getWheel(t *testing.T, dir string) string {
pattern := filepath.Join(dir, "*.whl")
matches, err := filepath.Glob(pattern)
if err != nil {
t.Fatalf("Error matching pattern %s: %v", pattern, err)
}
if len(matches) == 0 {
t.Fatalf("No files found matching %s", pattern)
return ""
}
if len(matches) != 1 {
t.Fatalf("Too many matches %s: %v", pattern, matches)
return ""
}
return matches[0]
}
func TestPatchWheel(t *testing.T) {
pythonVersions := getPythonVersions()
for _, py := range pythonVersions {
t.Run(py, func(t *testing.T) {
t.Parallel()
tempDir := t.TempDir()
projFiles := minimalPythonProject()
writeProjectFiles(t, tempDir, projFiles)
runCmd(t, tempDir, "uv", "venv", "-q", "--python", py)
runCmd(t, tempDir, "uv", "build", "-q", "--wheel")
distDir := filepath.Join(tempDir, "dist")
origWheel := getWheel(t, distDir)
patchedWheel, err := PatchWheel(context.Background(), origWheel, distDir)
require.NoError(t, err)
patchedInfo, err := os.Stat(patchedWheel)
require.NoError(t, err)
patchedTime := patchedInfo.ModTime()
// Test idempotency - patching the same wheel again should produce the same result
// and should not recreate the file (file modification time should remain the same)
patchedWheel2, err := PatchWheel(context.Background(), origWheel, distDir)
require.NoError(t, err)
require.Equal(t, patchedWheel, patchedWheel2, "PatchWheel is not idempotent")
patchedInfo2, err := os.Stat(patchedWheel2)
require.NoError(t, err)
require.Equal(t, patchedTime, patchedInfo2.ModTime(), "File was recreated when it shouldn't have been")
runCmd(t, tempDir, "uv", "pip", "install", "-q", patchedWheel)
verifyVersion(t, tempDir, patchedWheel)
newTime := patchedInfo.ModTime().Add(10 * time.Millisecond)
err = os.Chtimes(origWheel, newTime, newTime)
require.NoError(t, err)
patchedWheel3, err := PatchWheel(context.Background(), origWheel, distDir)
require.NoError(t, err)
require.Greater(t, patchedWheel3, patchedWheel)
// Now use regular pip to re-install the wheel. First install pip.
runCmd(t, tempDir, "uv", "pip", "install", "-q", "pip")
pippath := filepath.Join(".venv", getPythonScriptsDir(), "pip")
runCmd(t, tempDir, pippath, "install", "-q", patchedWheel3)
verifyVersion(t, tempDir, patchedWheel3)
})
}
}
func errPatchWheel(t *testing.T, name, out string) {
ctx := context.Background()
outname, err := PatchWheel(ctx, name, out)
assert.Error(t, err, "PatchWheel(%s, %s) expected to error", name, out)
assert.Empty(t, outname)
}
func TestError(t *testing.T) {
// empty name and dir
errPatchWheel(t, "", "")
// empty name
errPatchWheel(t, "", ".")
// file not found
errPatchWheel(t, "not-found.txt", ".")
// output directory not found
errPatchWheel(t, prebuiltWheel, "not-found/a/b/c")
}
func TestEmptyZip(t *testing.T) {
tempDir := t.TempDir()
errPatchWheel(t, emptyZip, tempDir)
}
func TestNonZip(t *testing.T) {
tempDir := t.TempDir()
_, err := os.Stat("patch.go")
require.NoError(t, err, "file must exist for this test")
errPatchWheel(t, "patch.go", tempDir)
}

22
libs/patchwheel/pkg.go Normal file
View File

@ -0,0 +1,22 @@
package patchwheel
/*
Patching whl file with a dynamic version suffix.
When developing a DAB, users want to redeploy a wheel without updating a version in pyproject.toml / setup.py manually.
However, installing the same version with pip causes pip to skip the install. Databricks envs follow this behaviour.
For this reason, we've modified default-python template to auto-update the version https://github.com/databricks/cli/pull/1034
However, that makes it tied to setup.py / setuptools and puts onus on users to keep this behaviour.
This package removes the constraint on how the wheel is built and allows adding dynamic part as a post-build step.
PatchWheel(ctx, path, outputDir) takes existing whl file and creates a new patched one with a version that includes
mtime of the original wheel as a suffix.
METADATA, directory names, RECORD are all updated to ensure the correct format.
ParseWheelFilename(filename) extracts version from the filename, according to WHL format rules.
*/

BIN
libs/patchwheel/testdata/empty.zip vendored Normal file

Binary file not shown.