mirror of https://github.com/databricks/cli.git
upload wheel to pep503-compatible DBFS path
This commit is contained in:
parent
82438b9f1c
commit
0d76ab3d28
|
@ -58,6 +58,8 @@ func DependencyFromSpec(raw string) (d Dependency) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Distribution holds part of PEP426 metadata
|
||||||
|
// See https://peps.python.org/pep-0426/
|
||||||
type Distribution struct {
|
type Distribution struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Version string `json:"version"`
|
Version string `json:"version"`
|
||||||
|
@ -73,6 +75,15 @@ func (d Distribution) InstallEnvironment() (env Environment) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NormalizedName returns PEP503-compatible Python Package Index project name.
|
||||||
|
// As per PEP 426 the only valid characters in a name are the ASCII alphabet,
|
||||||
|
// ASCII numbers, ., -, and _. The name should be lowercased with all runs of
|
||||||
|
// the characters ., -, or _ replaced with a single - character.
|
||||||
|
func (d Distribution) NormalizedName() string {
|
||||||
|
// TODO: implement https://peps.python.org/pep-0503/#normalized-names
|
||||||
|
return d.Name
|
||||||
|
}
|
||||||
|
|
||||||
// ReadDistribution "parses" metadata from setup.py file.
|
// ReadDistribution "parses" metadata from setup.py file.
|
||||||
func ReadDistribution(ctx context.Context) (d Distribution, err error) {
|
func ReadDistribution(ctx context.Context) (d Distribution, err error) {
|
||||||
out, err := PyInline(ctx, `
|
out, err := PyInline(ctx, `
|
||||||
|
|
|
@ -3,10 +3,14 @@ package python
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/databricks/bricks/project"
|
||||||
|
"github.com/databrickslabs/terraform-provider-databricks/storage"
|
||||||
)
|
)
|
||||||
|
|
||||||
func BuildWheel(ctx context.Context, dir string) (string, error) {
|
func BuildWheel(ctx context.Context, dir string) (string, error) {
|
||||||
|
@ -33,6 +37,51 @@ func BuildWheel(ctx context.Context, dir string) (string, error) {
|
||||||
return path.Join(dir, wheel), nil
|
return path.Join(dir, wheel), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const DBFSWheelLocation = "dbfs:/FileStore/wheels/simple"
|
||||||
|
|
||||||
|
// TODO: research deeper if we make new data resource for terraform, like `databricks_latest_wheel` (preferred),
|
||||||
|
// or do we bypass the environment variable into terraform deployer. And make a decision.
|
||||||
|
//
|
||||||
|
// Whatever this method gets refactored to is intended to be used for two purposes:
|
||||||
|
// - uploading project's wheel archives: one per project or one per project/developer, depending on isolation
|
||||||
|
// - synchronising enterprise artifactories, jfrogs, azdo feeds, so that we fix the gap of private code artifact
|
||||||
|
// repository integration.
|
||||||
|
func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error) {
|
||||||
|
wheel, err := BuildWheel(ctx, dir)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer chdirAndBack(dir)()
|
||||||
|
dist, err := ReadDistribution(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
// TODO: figure out wheel naming criteria for Soft project isolation to allow multiple
|
||||||
|
// people workin on the same project to upload wheels and let them be deployed as independent jobs.
|
||||||
|
// we should also consider multiple PEP503 index stacking: per enterprise, per project, per developer.
|
||||||
|
// PEP503 indexes can be rolled out to clusters via checksummed global init script, that creates
|
||||||
|
// a driver/worker `/etc/pip.conf` with FUSE-mounted file:///dbfs/FileStore/wheels/simple/..
|
||||||
|
// extra index URLs. See more pointers at https://stackoverflow.com/q/30889494/277035
|
||||||
|
dbfsLoc := fmt.Sprintf("%s/%s/%s", DBFSWheelLocation, dist.NormalizedName(), path.Base(wheel))
|
||||||
|
dbfs := storage.NewDbfsAPI(ctx, project.Current.Client()) // circular dep?..
|
||||||
|
wf, err := os.Open(wheel)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer wf.Close()
|
||||||
|
raw, err := io.ReadAll(wf)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
err = dbfs.Create(dbfsLoc, raw, true)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
// TODO: maintain PEP503 compliance and update meta-files:
|
||||||
|
// ${DBFSWheelLocation}/index.html and ${DBFSWheelLocation}/${NormalizedName}/index.html
|
||||||
|
return dbfsLoc, nil
|
||||||
|
}
|
||||||
|
|
||||||
func silentlyCleanupWheelFolder(dir string) {
|
func silentlyCleanupWheelFolder(dir string) {
|
||||||
// there or not there - we don't care
|
// there or not there - we don't care
|
||||||
os.RemoveAll(path.Join(dir, "__pycache__"))
|
os.RemoveAll(path.Join(dir, "__pycache__"))
|
||||||
|
|
Loading…
Reference in New Issue