diff --git a/python/env.go b/python/env.go index 8aec6131..2032c5b1 100644 --- a/python/env.go +++ b/python/env.go @@ -58,6 +58,8 @@ func DependencyFromSpec(raw string) (d Dependency) { return } +// Distribution holds part of PEP426 metadata +// See https://peps.python.org/pep-0426/ type Distribution struct { Name string `json:"name"` Version string `json:"version"` @@ -73,6 +75,15 @@ func (d Distribution) InstallEnvironment() (env Environment) { return } +// NormalizedName returns PEP503-compatible Python Package Index project name. +// As per PEP 426 the only valid characters in a name are the ASCII alphabet, +// ASCII numbers, ., -, and _. The name should be lowercased with all runs of +// the characters ., -, or _ replaced with a single - character. +func (d Distribution) NormalizedName() string { + // TODO: implement https://peps.python.org/pep-0503/#normalized-names + return d.Name +} + // ReadDistribution "parses" metadata from setup.py file. func ReadDistribution(ctx context.Context) (d Distribution, err error) { out, err := PyInline(ctx, ` diff --git a/python/wheel.go b/python/wheel.go index 2870d270..94cb6d72 100644 --- a/python/wheel.go +++ b/python/wheel.go @@ -3,10 +3,14 @@ package python import ( "context" "fmt" + "io" "log" "os" "path" "strings" + + "github.com/databricks/bricks/project" + "github.com/databrickslabs/terraform-provider-databricks/storage" ) func BuildWheel(ctx context.Context, dir string) (string, error) { @@ -33,6 +37,51 @@ func BuildWheel(ctx context.Context, dir string) (string, error) { return path.Join(dir, wheel), nil } +const DBFSWheelLocation = "dbfs:/FileStore/wheels/simple" + +// TODO: research deeper if we make new data resource for terraform, like `databricks_latest_wheel` (preferred), +// or do we bypass the environment variable into terraform deployer. And make a decision. +// +// Whatever this method gets refactored to is intended to be used for two purposes: +// - uploading project's wheel archives: one per project or one per project/developer, depending on isolation +// - synchronising enterprise artifactories, jfrogs, azdo feeds, so that we fix the gap of private code artifact +// repository integration. +func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error) { + wheel, err := BuildWheel(ctx, dir) + if err != nil { + return "", err + } + defer chdirAndBack(dir)() + dist, err := ReadDistribution(ctx) + if err != nil { + return "", err + } + // TODO: figure out wheel naming criteria for Soft project isolation to allow multiple + // people workin on the same project to upload wheels and let them be deployed as independent jobs. + // we should also consider multiple PEP503 index stacking: per enterprise, per project, per developer. + // PEP503 indexes can be rolled out to clusters via checksummed global init script, that creates + // a driver/worker `/etc/pip.conf` with FUSE-mounted file:///dbfs/FileStore/wheels/simple/.. + // extra index URLs. See more pointers at https://stackoverflow.com/q/30889494/277035 + dbfsLoc := fmt.Sprintf("%s/%s/%s", DBFSWheelLocation, dist.NormalizedName(), path.Base(wheel)) + dbfs := storage.NewDbfsAPI(ctx, project.Current.Client()) // circular dep?.. + wf, err := os.Open(wheel) + if err != nil { + return "", err + } + defer wf.Close() + raw, err := io.ReadAll(wf) + if err != nil { + return "", err + } + err = dbfs.Create(dbfsLoc, raw, true) + if err != nil { + return "", err + } + // TODO: maintain PEP503 compliance and update meta-files: + // ${DBFSWheelLocation}/index.html and ${DBFSWheelLocation}/${NormalizedName}/index.html + return dbfsLoc, nil +} + func silentlyCleanupWheelFolder(dir string) { // there or not there - we don't care os.RemoveAll(path.Join(dir, "__pycache__"))