2022-05-20 19:40:03 +00:00
|
|
|
package python
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
2022-12-28 10:32:04 +00:00
|
|
|
"io"
|
2022-05-20 19:40:03 +00:00
|
|
|
"os"
|
|
|
|
"path"
|
|
|
|
"strings"
|
2022-05-23 09:41:36 +00:00
|
|
|
|
2023-03-17 14:17:31 +00:00
|
|
|
"github.com/databricks/bricks/libs/log"
|
2022-05-23 09:41:36 +00:00
|
|
|
"github.com/databricks/bricks/project"
|
2022-12-28 10:32:04 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/service/dbfs"
|
2022-05-20 19:40:03 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func BuildWheel(ctx context.Context, dir string) (string, error) {
|
|
|
|
defer chdirAndBack(dir)()
|
|
|
|
// remove previous dist leak
|
|
|
|
os.RemoveAll("dist")
|
|
|
|
// remove all other irrelevant traces
|
|
|
|
silentlyCleanupWheelFolder(".")
|
|
|
|
// call simple wheel builder. we may need to pip install wheel as well
|
|
|
|
out, err := Py(ctx, "setup.py", "bdist_wheel")
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Debugf(ctx, "Built wheel: %s", out)
|
2022-05-20 19:40:03 +00:00
|
|
|
|
|
|
|
// and cleanup afterwards
|
|
|
|
silentlyCleanupWheelFolder(".")
|
|
|
|
|
|
|
|
wheel := silentChildWithSuffix("dist", ".whl")
|
|
|
|
if wheel == "" {
|
|
|
|
return "", fmt.Errorf("cannot find built wheel in %s", dir)
|
|
|
|
}
|
|
|
|
return path.Join(dir, wheel), nil
|
|
|
|
}
|
|
|
|
|
2022-05-23 09:41:36 +00:00
|
|
|
const DBFSWheelLocation = "dbfs:/FileStore/wheels/simple"
|
|
|
|
|
|
|
|
// TODO: research deeper if we make new data resource for terraform, like `databricks_latest_wheel` (preferred),
|
|
|
|
// or do we bypass the environment variable into terraform deployer. And make a decision.
|
|
|
|
//
|
|
|
|
// Whatever this method gets refactored to is intended to be used for two purposes:
|
2022-09-07 13:15:23 +00:00
|
|
|
// - uploading project's wheel archives: one per project or one per project/developer, depending on isolation
|
|
|
|
// - synchronising enterprise artifactories, jfrogs, azdo feeds, so that we fix the gap of private code artifact
|
|
|
|
// repository integration.
|
2022-05-23 09:41:36 +00:00
|
|
|
func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error) {
|
|
|
|
wheel, err := BuildWheel(ctx, dir)
|
|
|
|
if err != nil {
|
2022-09-07 09:55:59 +00:00
|
|
|
return "", err
|
2022-05-23 09:41:36 +00:00
|
|
|
}
|
|
|
|
defer chdirAndBack(dir)()
|
|
|
|
dist, err := ReadDistribution(ctx)
|
|
|
|
if err != nil {
|
2022-09-07 09:55:59 +00:00
|
|
|
return "", err
|
2022-05-23 09:41:36 +00:00
|
|
|
}
|
2022-09-07 09:55:59 +00:00
|
|
|
// TODO: figure out wheel naming criteria for Soft project isolation to allow multiple
|
2022-05-23 09:41:36 +00:00
|
|
|
// people workin on the same project to upload wheels and let them be deployed as independent jobs.
|
|
|
|
// we should also consider multiple PEP503 index stacking: per enterprise, per project, per developer.
|
|
|
|
// PEP503 indexes can be rolled out to clusters via checksummed global init script, that creates
|
|
|
|
// a driver/worker `/etc/pip.conf` with FUSE-mounted file:///dbfs/FileStore/wheels/simple/..
|
|
|
|
// extra index URLs. See more pointers at https://stackoverflow.com/q/30889494/277035
|
|
|
|
dbfsLoc := fmt.Sprintf("%s/%s/%s", DBFSWheelLocation, dist.NormalizedName(), path.Base(wheel))
|
2022-09-07 09:55:59 +00:00
|
|
|
|
2022-09-16 09:06:58 +00:00
|
|
|
wsc := project.Get(ctx).WorkspacesClient()
|
2022-05-23 09:41:36 +00:00
|
|
|
wf, err := os.Open(wheel)
|
|
|
|
if err != nil {
|
2022-09-07 09:55:59 +00:00
|
|
|
return "", err
|
2022-05-23 09:41:36 +00:00
|
|
|
}
|
|
|
|
defer wf.Close()
|
2022-12-28 10:32:04 +00:00
|
|
|
h, err := wsc.Dbfs.Open(ctx, dbfsLoc, dbfs.FileModeOverwrite|dbfs.FileModeWrite)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
_, err = io.Copy(h, wf)
|
2022-05-23 09:41:36 +00:00
|
|
|
// TODO: maintain PEP503 compliance and update meta-files:
|
|
|
|
// ${DBFSWheelLocation}/index.html and ${DBFSWheelLocation}/${NormalizedName}/index.html
|
2022-09-07 09:55:59 +00:00
|
|
|
return dbfsLoc, err
|
2022-05-23 09:41:36 +00:00
|
|
|
}
|
|
|
|
|
2022-05-20 19:40:03 +00:00
|
|
|
func silentlyCleanupWheelFolder(dir string) {
|
|
|
|
// there or not there - we don't care
|
|
|
|
os.RemoveAll(path.Join(dir, "__pycache__"))
|
|
|
|
os.RemoveAll(path.Join(dir, "build"))
|
|
|
|
eggInfo := silentChildWithSuffix(dir, ".egg-info")
|
|
|
|
if eggInfo == "" {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
os.RemoveAll(eggInfo)
|
|
|
|
}
|
|
|
|
|
|
|
|
func silentChildWithSuffix(dir, suffix string) string {
|
|
|
|
f, err := os.Open(dir)
|
|
|
|
if err != nil {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Debugf(context.Background(), "open dir %s: %s", dir, err)
|
2022-05-20 19:40:03 +00:00
|
|
|
return ""
|
|
|
|
}
|
|
|
|
entries, err := f.ReadDir(0)
|
|
|
|
if err != nil {
|
2023-03-17 14:17:31 +00:00
|
|
|
log.Debugf(context.Background(), "read dir %s: %s", dir, err)
|
2022-05-20 19:40:03 +00:00
|
|
|
// todo: log
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
for _, child := range entries {
|
|
|
|
if !strings.HasSuffix(child.Name(), suffix) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
return path.Join(dir, child.Name())
|
|
|
|
}
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
func chdirAndBack(dir string) func() {
|
|
|
|
wd, _ := os.Getwd()
|
|
|
|
os.Chdir(dir)
|
|
|
|
return func() {
|
|
|
|
os.Chdir(wd)
|
|
|
|
}
|
|
|
|
}
|