mirror of https://github.com/databricks/cli.git
308 lines
8.7 KiB
Go
308 lines
8.7 KiB
Go
package py
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"golang.org/x/exp/slices"
|
|
|
|
"github.com/databricks/bricks/lib/dbr"
|
|
"github.com/databricks/bricks/lib/flavor"
|
|
"github.com/databricks/bricks/lib/spawn"
|
|
"github.com/databricks/bricks/python"
|
|
"github.com/databricks/databricks-sdk-go/databricks/apierr"
|
|
"github.com/databricks/databricks-sdk-go/service/commands"
|
|
"github.com/databricks/databricks-sdk-go/service/libraries"
|
|
)
|
|
|
|
type SetupDotPy struct {
|
|
SetupPy string `json:"setup_py,omitempty"`
|
|
MirrorLibraries bool `json:"mirror_libraries,omitempty"`
|
|
|
|
venv string
|
|
wheelName string
|
|
}
|
|
|
|
func (s *SetupDotPy) RequiresCluster() bool {
|
|
return true
|
|
}
|
|
|
|
// Python libraries do not require a restart
|
|
func (s *SetupDotPy) RequiresRestart() bool {
|
|
return false
|
|
}
|
|
|
|
func (s *SetupDotPy) setupPyLoc(prj flavor.Project) string {
|
|
if s.SetupPy == "" {
|
|
s.SetupPy = "setup.py"
|
|
}
|
|
return filepath.Join(prj.Root(), s.SetupPy)
|
|
}
|
|
|
|
// We detect only setuptools build backend for now. Hatchling, PDM,
|
|
// and Flit _might_ be added in some distant future.
|
|
//
|
|
// See: https://packaging.python.org/en/latest/tutorials/packaging-projects/
|
|
func (s *SetupDotPy) Detected(prj flavor.Project) bool {
|
|
_, err := os.Stat(s.setupPyLoc(prj))
|
|
return err == nil
|
|
}
|
|
|
|
// readDistribution "parses" metadata from setup.py file from context
|
|
// of current project root and virtual env
|
|
func (s *SetupDotPy) readDistribution(ctx context.Context, prj flavor.Project) (d Distribution, err error) {
|
|
ctx = spawn.WithRoot(ctx, filepath.Dir(s.setupPyLoc(prj)))
|
|
out, err := python.Py(ctx, "-c", commands.TrimLeadingWhitespace(`
|
|
import setuptools, json, sys
|
|
setup_config = {} # actual args for setuptools.dist.Distribution
|
|
def capture(**kwargs): global setup_config; setup_config = kwargs
|
|
setuptools.setup = capture
|
|
import setup
|
|
json.dump(setup_config, sys.stdout)`))
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = json.Unmarshal([]byte(out), &d)
|
|
return
|
|
}
|
|
|
|
func (s *SetupDotPy) Prepare(ctx context.Context, prj flavor.Project, status func(string)) error {
|
|
venv, err := s.fastDetectVirtualEnv(prj.Root())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
s.venv = venv
|
|
if s.venv == "" {
|
|
// this allows CLI to be usable in existing projects with existing virtualenvs
|
|
venv = filepath.Join(prj.Root(), ".databricks") // TODO: integrate with pipenv
|
|
err := os.MkdirAll(venv, 0o700)
|
|
if err != nil {
|
|
return fmt.Errorf("mk venv: %w", err)
|
|
}
|
|
status(fmt.Sprintf("Creating virtualenv in %s", venv))
|
|
_, err = python.Py(ctx, "-m", "venv", venv)
|
|
if err != nil {
|
|
return fmt.Errorf("create venv: %w", err)
|
|
}
|
|
s.venv = venv
|
|
status("Upgrading pip")
|
|
_, err = s.Pip(ctx, "install", "--upgrade", "pip", "wheel")
|
|
if err != nil {
|
|
return fmt.Errorf("upgrade pip: %w", err)
|
|
}
|
|
}
|
|
env, err := s.Freeze(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("pip freeze: %s", err)
|
|
}
|
|
var remotePkgs []string
|
|
d, err := s.readDistribution(ctx, prj)
|
|
if err != nil {
|
|
return fmt.Errorf("setup.py: %s", err)
|
|
}
|
|
if s.MirrorLibraries {
|
|
// TODO: name `MirrorLibraries` is TBD
|
|
// TODO: must be part of init command survey
|
|
status("Fetching remote libraries")
|
|
remoteInfo, err := s.runtimeInfo(ctx, prj, status)
|
|
if err != nil && errors.As(err, &apierr.APIError{}) {
|
|
return err
|
|
}
|
|
// TODO: check Spark compatibility with locall install_requires
|
|
// TODO: check Python version compatibility with local virtualenv
|
|
if err != errNoCluster {
|
|
skipLibs := []string{
|
|
"dbus-python",
|
|
"distro-info",
|
|
"pip",
|
|
"psycopg2",
|
|
"pygobject",
|
|
"python-apt",
|
|
"requests-unixsocket",
|
|
"setuptools",
|
|
"unattended-upgrades",
|
|
"wheel",
|
|
}
|
|
PYPI:
|
|
for _, pkg := range remoteInfo.PyPI {
|
|
if env.Has(pkg.PyPiName()) {
|
|
continue
|
|
}
|
|
if pkg.Name == d.NormalizedName() {
|
|
// skip installing self
|
|
continue
|
|
}
|
|
for _, skip := range skipLibs {
|
|
if skip == pkg.Name {
|
|
continue PYPI
|
|
}
|
|
}
|
|
remotePkgs = append(remotePkgs, pkg.PyPiName())
|
|
}
|
|
}
|
|
}
|
|
type depList struct {
|
|
name string
|
|
packages []string
|
|
}
|
|
dbrDepsName := "remote cluster"
|
|
dependencyLists := []depList{
|
|
{dbrDepsName, remotePkgs},
|
|
{"install_requires", d.InstallRequires},
|
|
{"tests_require", d.TestsRequire},
|
|
}
|
|
for _, deps := range dependencyLists {
|
|
for _, dep := range deps.packages {
|
|
if env.Has(dep) {
|
|
continue
|
|
}
|
|
status(fmt.Sprintf("Installing %s in virtualenv (%s)", dep, deps.name))
|
|
_, err = s.Pip(ctx, "install", "--prefer-binary", dep)
|
|
if err != nil && deps.name == dbrDepsName &&
|
|
strings.Contains(err.Error(), "Could not find a version") {
|
|
continue
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("%s: %w", dep, err)
|
|
}
|
|
// repeatedly run pip freeze so that we potentially have less installs
|
|
env, err = s.Freeze(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("pip freeze: %s", err)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var errNoCluster = errors.New("no development cluster")
|
|
|
|
func (s *SetupDotPy) runtimeInfo(ctx context.Context, prj flavor.Project,
|
|
status func(string)) (*dbr.RuntimeInfo, error) {
|
|
clusterId, err := prj.GetDevelopmentClusterId(ctx)
|
|
if err != nil && errors.As(err, &apierr.APIError{}) {
|
|
return nil, err
|
|
}
|
|
if err != nil {
|
|
return nil, errNoCluster
|
|
}
|
|
return dbr.GetRuntimeInfo(ctx, prj.WorkspacesClient(), clusterId, status)
|
|
}
|
|
|
|
func (s *SetupDotPy) Freeze(ctx context.Context) (Environment, error) {
|
|
out, err := s.Pip(ctx, "freeze")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
env := Environment{}
|
|
deps := strings.Split(out, "\n")
|
|
for _, raw := range deps {
|
|
env = append(env, DependencyFromSpec(raw))
|
|
}
|
|
return env, nil
|
|
}
|
|
|
|
func (s *SetupDotPy) LocalArtifacts(ctx context.Context, prj flavor.Project) (flavor.Artifacts, error) {
|
|
dist, err := s.readDistribution(ctx, prj)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
all := flavor.Artifacts{}
|
|
// install dependencies for the wheel to run
|
|
for _, dependency := range dist.InstallRequires {
|
|
if strings.HasPrefix(dependency, "pyspark") {
|
|
// pyspark will conflict with DBR
|
|
continue
|
|
}
|
|
all = append(all, flavor.Artifact{
|
|
Flavor: s,
|
|
Library: libraries.Library{
|
|
Pypi: &libraries.PythonPyPiLibrary{
|
|
Package: dependency,
|
|
},
|
|
},
|
|
})
|
|
}
|
|
s.wheelName = dist.WheelName()
|
|
all = append(all, flavor.Artifact{
|
|
Flavor: s,
|
|
Library: libraries.Library{
|
|
Whl: fmt.Sprintf("%s/.databricks/dist/%s", prj.Root(), s.wheelName),
|
|
},
|
|
})
|
|
return all, nil
|
|
}
|
|
|
|
// Build creates a python wheel, while keeping project root in a clean state, removing the need
|
|
// to execute rm -fr dist build *.egg-info after each build
|
|
func (s *SetupDotPy) Build(ctx context.Context, prj flavor.Project, status func(string)) error {
|
|
status(fmt.Sprintf("Building %s", s.wheelName))
|
|
ctx = spawn.WithRoot(ctx, filepath.Dir(s.setupPyLoc(prj)))
|
|
_, err := s.Py(ctx, "setup.py",
|
|
// see https://github.com/pypa/setuptools/blob/main/setuptools/_distutils/command/build.py#L23-L31
|
|
"build", "--build-lib=.databricks/build/lib", "--build-base=.databricks/build",
|
|
// see https://github.com/pypa/setuptools/blob/main/setuptools/command/egg_info.py#L167-L168
|
|
"egg_info", "--egg-base=.databricks",
|
|
// see https://github.com/pypa/wheel/blob/main/src/wheel/bdist_wheel.py#L140
|
|
"bdist_wheel", "--dist-dir=.databricks/dist")
|
|
return err
|
|
}
|
|
|
|
// Py calls project-specific Python interpreter from the virtual env from project root dir
|
|
func (s *SetupDotPy) Py(ctx context.Context, args ...string) (string, error) {
|
|
if s.venv == "" {
|
|
return "", fmt.Errorf("virtualenv not detected")
|
|
}
|
|
out, err := spawn.ExecAndPassErr(ctx, fmt.Sprintf("%s/bin/python3", s.venv), args...)
|
|
if err != nil {
|
|
// current error message chain is longer:
|
|
// failed to call {pyExec} __non_existing__.py: {pyExec}: can't open
|
|
// ... file '{pwd}/__non_existing__.py': [Errno 2] No such file or directory"
|
|
// probably we'll need to make it shorter:
|
|
// can't open file '$PWD/__non_existing__.py': [Errno 2] No such file or directory
|
|
return "", err
|
|
}
|
|
return strings.Trim(string(out), "\n\r"), nil
|
|
}
|
|
|
|
func (s *SetupDotPy) Pip(ctx context.Context, args ...string) (string, error) {
|
|
return s.Py(ctx, append([]string{"-m", "pip"}, args...)...)
|
|
}
|
|
|
|
// fastDetectVirtualEnv performs very quick detection, by running over top level directories only
|
|
func (s *SetupDotPy) fastDetectVirtualEnv(root string) (string, error) {
|
|
wdf, err := os.Open(root)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
files, err := wdf.ReadDir(0)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
// virtual env is most likely in dot-directory
|
|
slices.SortFunc(files, func(a, b fs.DirEntry) bool {
|
|
return a.Name() < b.Name()
|
|
})
|
|
for _, v := range files {
|
|
if !v.IsDir() {
|
|
continue
|
|
}
|
|
candidate := fmt.Sprintf("%s/%s", root, v.Name())
|
|
_, err = os.Stat(fmt.Sprintf("%s/pyvenv.cfg", candidate))
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
continue
|
|
}
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return candidate, nil
|
|
}
|
|
return "", nil
|
|
}
|