From 7d0f170eee721ff5d8f1d98c4c515d1a77fbbf90 Mon Sep 17 00:00:00 2001 From: Serge Smertin <259697+nfx@users.noreply.github.com> Date: Tue, 3 Oct 2023 10:47:09 +0000 Subject: [PATCH] Added `python.DetectInterpreters` and other utils (#805) This PR adds a few utilities related to Python interpreter detection: - `python.DetectInterpreters` to detect all Python versions available in `$PATH` by executing every matched binary name with `--version` flag. - `python.DetectVirtualEnvPath` to detect if there's any child virtual environment in `src` directory - `python.DetectExecutable` to detect if there's python3 installed either by `which python3` command or by calling `python.DetectInterpreters().AtLeast("v3.8")` To be merged after https://github.com/databricks/cli/pull/804, as one of the steps to get https://github.com/databricks/cli/pull/637 in, as previously discussed. --- bundle/artifacts/whl/build.go | 2 +- bundle/artifacts/whl/infer.go | 2 +- libs/python/detect.go | 34 +++ libs/python/detect_unix_test.go | 39 ++++ libs/python/detect_win_test.go | 24 ++ libs/python/interpreters.go | 216 ++++++++++++++++++ libs/python/interpreters_unix_test.go | 95 ++++++++ libs/python/interpreters_win_test.go | 28 +++ libs/python/testdata/no-python3/python | 6 + libs/python/testdata/no-python3/python3.6 | 3 + libs/python/testdata/no-python3/pythonw | 5 + .../testdata/other-binaries-filtered/python | 6 + .../other-binaries-filtered/python3-whatever | 4 + .../other-binaries-filtered/python3.10 | 3 + .../other-binaries-filtered/python3.10.100 | 3 + .../other-binaries-filtered/python3.11 | 1 + .../other-binaries-filtered/python4.8 | 1 + .../testdata/other-binaries-filtered/python5 | 5 + .../testdata/other-binaries-filtered/python6 | 3 + .../testdata/other-binaries-filtered/python7 | 4 + .../testdata/other-binaries-filtered/pythonw | 5 + .../other-binaries-filtered/real-python3.11.4 | 3 + .../testdata/other-binaries-filtered/whatever | 4 + .../some-dir-with-venv/.venv/pyvenv.cfg | 8 + .../testdata/some-dir-with-venv/__main__.py | 2 + .../python/testdata/world-writeable/python8.4 | 3 + {python => libs/python}/utils.go | 2 - {python => libs/python}/utils_test.go | 0 libs/python/venv.go | 35 +++ libs/python/venv_test.go | 33 +++ python/env.go | 101 -------- python/env_test.go | 41 ---- python/runner.go | 149 ------------ python/runner_test.go | 94 -------- .../simple-python-wheel/databricks.yml | 4 - .../simple-python-wheel/dummy/__init__.py | 0 .../simple-python-wheel/dummy/transforms.py | 1 - python/testdata/simple-python-wheel/setup.py | 8 - python/wheel.go | 93 -------- python/wheel_test.go | 40 ---- 40 files changed, 575 insertions(+), 535 deletions(-) create mode 100644 libs/python/detect.go create mode 100644 libs/python/detect_unix_test.go create mode 100644 libs/python/detect_win_test.go create mode 100644 libs/python/interpreters.go create mode 100644 libs/python/interpreters_unix_test.go create mode 100644 libs/python/interpreters_win_test.go create mode 100755 libs/python/testdata/no-python3/python create mode 100755 libs/python/testdata/no-python3/python3.6 create mode 100755 libs/python/testdata/no-python3/pythonw create mode 100755 libs/python/testdata/other-binaries-filtered/python create mode 100755 libs/python/testdata/other-binaries-filtered/python3-whatever create mode 100755 libs/python/testdata/other-binaries-filtered/python3.10 create mode 100755 libs/python/testdata/other-binaries-filtered/python3.10.100 create mode 120000 libs/python/testdata/other-binaries-filtered/python3.11 create mode 120000 libs/python/testdata/other-binaries-filtered/python4.8 create mode 100755 libs/python/testdata/other-binaries-filtered/python5 create mode 100755 libs/python/testdata/other-binaries-filtered/python6 create mode 100755 libs/python/testdata/other-binaries-filtered/python7 create mode 100755 libs/python/testdata/other-binaries-filtered/pythonw create mode 100755 libs/python/testdata/other-binaries-filtered/real-python3.11.4 create mode 100755 libs/python/testdata/other-binaries-filtered/whatever create mode 100644 libs/python/testdata/some-dir-with-venv/.venv/pyvenv.cfg create mode 100644 libs/python/testdata/some-dir-with-venv/__main__.py create mode 100755 libs/python/testdata/world-writeable/python8.4 rename {python => libs/python}/utils.go (95%) rename {python => libs/python}/utils_test.go (100%) create mode 100644 libs/python/venv.go create mode 100644 libs/python/venv_test.go delete mode 100644 python/env.go delete mode 100644 python/env_test.go delete mode 100644 python/runner.go delete mode 100644 python/runner_test.go delete mode 100644 python/testdata/simple-python-wheel/databricks.yml delete mode 100644 python/testdata/simple-python-wheel/dummy/__init__.py delete mode 100644 python/testdata/simple-python-wheel/dummy/transforms.py delete mode 100644 python/testdata/simple-python-wheel/setup.py delete mode 100644 python/wheel.go delete mode 100644 python/wheel_test.go diff --git a/bundle/artifacts/whl/build.go b/bundle/artifacts/whl/build.go index 4565a4c8..6ebc925f 100644 --- a/bundle/artifacts/whl/build.go +++ b/bundle/artifacts/whl/build.go @@ -9,7 +9,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/libs/cmdio" - "github.com/databricks/cli/python" + "github.com/databricks/cli/libs/python" ) type build struct { diff --git a/bundle/artifacts/whl/infer.go b/bundle/artifacts/whl/infer.go index 518d926c..1c0e9857 100644 --- a/bundle/artifacts/whl/infer.go +++ b/bundle/artifacts/whl/infer.go @@ -5,7 +5,7 @@ import ( "fmt" "github.com/databricks/cli/bundle" - "github.com/databricks/cli/python" + "github.com/databricks/cli/libs/python" ) type infer struct { diff --git a/libs/python/detect.go b/libs/python/detect.go new file mode 100644 index 00000000..b0c1475c --- /dev/null +++ b/libs/python/detect.go @@ -0,0 +1,34 @@ +package python + +import ( + "context" + "errors" + "os/exec" +) + +func DetectExecutable(ctx context.Context) (string, error) { + // TODO: add a shortcut if .python-version file is detected somewhere in + // the parent directory tree. + // + // See https://github.com/pyenv/pyenv#understanding-python-version-selection + out, err := exec.LookPath("python3") + // most of the OS'es have python3 in $PATH, but for those which don't, + // we perform the latest version lookup + if err != nil && !errors.Is(err, exec.ErrNotFound) { + return "", err + } + if out != "" { + return out, nil + } + // otherwise, detect all interpreters and pick the least that satisfies + // minimal version requirements + all, err := DetectInterpreters(ctx) + if err != nil { + return "", err + } + interpreter, err := all.AtLeast("3.8") + if err != nil { + return "", err + } + return interpreter.Path, nil +} diff --git a/libs/python/detect_unix_test.go b/libs/python/detect_unix_test.go new file mode 100644 index 00000000..a962e1f5 --- /dev/null +++ b/libs/python/detect_unix_test.go @@ -0,0 +1,39 @@ +//go:build unix + +package python + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDetectsViaPathLookup(t *testing.T) { + ctx := context.Background() + py, err := DetectExecutable(ctx) + assert.NoError(t, err) + assert.NotEmpty(t, py) +} + +func TestDetectsViaListing(t *testing.T) { + t.Setenv("PATH", "testdata/other-binaries-filtered") + ctx := context.Background() + py, err := DetectExecutable(ctx) + assert.NoError(t, err) + assert.Equal(t, "testdata/other-binaries-filtered/python3.10", py) +} + +func TestDetectFailsNoInterpreters(t *testing.T) { + t.Setenv("PATH", "testdata") + ctx := context.Background() + _, err := DetectExecutable(ctx) + assert.Equal(t, ErrNoPythonInterpreters, err) +} + +func TestDetectFailsNoMinimalVersion(t *testing.T) { + t.Setenv("PATH", "testdata/no-python3") + ctx := context.Background() + _, err := DetectExecutable(ctx) + assert.EqualError(t, err, "cannot find Python greater or equal to v3.8.0") +} diff --git a/libs/python/detect_win_test.go b/libs/python/detect_win_test.go new file mode 100644 index 00000000..2ef811a4 --- /dev/null +++ b/libs/python/detect_win_test.go @@ -0,0 +1,24 @@ +//go:build windows + +package python + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDetectsViaPathLookup(t *testing.T) { + ctx := context.Background() + py, err := DetectExecutable(ctx) + assert.NoError(t, err) + assert.NotEmpty(t, py) +} + +func TestDetectFailsNoInterpreters(t *testing.T) { + t.Setenv("PATH", "testdata") + ctx := context.Background() + _, err := DetectExecutable(ctx) + assert.ErrorIs(t, err, ErrNoPythonInterpreters) +} diff --git a/libs/python/interpreters.go b/libs/python/interpreters.go new file mode 100644 index 00000000..94f5074d --- /dev/null +++ b/libs/python/interpreters.go @@ -0,0 +1,216 @@ +package python + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "runtime" + "sort" + "strings" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/process" + "golang.org/x/mod/semver" +) + +var ErrNoPythonInterpreters = errors.New("no python3 interpreters found") + +const officialMswinPython = "(Python Official) https://python.org/downloads/windows" +const microsoftStorePython = "(Microsoft Store) https://apps.microsoft.com/store/search?publisher=Python%20Software%20Foundation" + +const worldWriteable = 0o002 + +type Interpreter struct { + Version string + Path string +} + +func (i Interpreter) String() string { + return fmt.Sprintf("%s (%s)", i.Version, i.Path) +} + +type allInterpreters []Interpreter + +func (a allInterpreters) Latest() Interpreter { + return a[len(a)-1] +} + +func (a allInterpreters) AtLeast(minimalVersion string) (*Interpreter, error) { + canonicalMinimalVersion := semver.Canonical("v" + strings.TrimPrefix(minimalVersion, "v")) + if canonicalMinimalVersion == "" { + return nil, fmt.Errorf("invalid SemVer: %s", minimalVersion) + } + for _, interpreter := range a { + cmp := semver.Compare(interpreter.Version, canonicalMinimalVersion) + if cmp < 0 { + continue + } + return &interpreter, nil + } + return nil, fmt.Errorf("cannot find Python greater or equal to %s", canonicalMinimalVersion) +} + +func DetectInterpreters(ctx context.Context) (allInterpreters, error) { + found := allInterpreters{} + seen := map[string]bool{} + executables, err := pythonicExecutablesFromPathEnvironment(ctx) + if err != nil { + return nil, err + } + log.Debugf(ctx, "found %d potential alternative Python versions in $PATH", len(executables)) + for _, resolved := range executables { + if seen[resolved] { + continue + } + seen[resolved] = true + // probe the binary version by executing it, like `python --version` + // and parsing the output. + // + // Keep in mind, that mswin installations get python.exe and pythonw.exe, + // which are slightly different: see https://stackoverflow.com/a/30313091 + out, err := process.Background(ctx, []string{resolved, "--version"}) + var processErr *process.ProcessError + if errors.As(err, &processErr) { + log.Debugf(ctx, "failed to check version for %s: %s", resolved, processErr.Err) + continue + } + if err != nil { + log.Debugf(ctx, "failed to check version for %s: %s", resolved, err) + continue + } + version := validPythonVersion(ctx, resolved, out) + if version == "" { + continue + } + found = append(found, Interpreter{ + Version: version, + Path: resolved, + }) + } + if runtime.GOOS == "windows" && len(found) == 0 { + return nil, fmt.Errorf("%w. Install them from %s or %s and restart the shell", + ErrNoPythonInterpreters, officialMswinPython, microsoftStorePython) + } + if len(found) == 0 { + return nil, ErrNoPythonInterpreters + } + sort.Slice(found, func(i, j int) bool { + a := found[i].Version + b := found[j].Version + cmp := semver.Compare(a, b) + if cmp != 0 { + return cmp < 0 + } + return a < b + }) + return found, nil +} + +func pythonicExecutablesFromPathEnvironment(ctx context.Context) (out []string, err error) { + paths := strings.Split(os.Getenv("PATH"), string(os.PathListSeparator)) + for _, prefix := range paths { + info, err := os.Stat(prefix) + if errors.Is(err, fs.ErrNotExist) { + // some directories in $PATH may not exist + continue + } + if errors.Is(err, fs.ErrPermission) { + // some directories we cannot list + continue + } + if err != nil { + return nil, fmt.Errorf("stat %s: %w", prefix, err) + } + if !info.IsDir() { + continue + } + perm := info.Mode().Perm() + if runtime.GOOS != "windows" && perm&worldWriteable != 0 { + // we try not to run any python binary that sits in a writable folder by all users. + // this is mainly to avoid breaking the security model on a multi-user system. + // If the PATH is pointing somewhere untrusted it is the user fault, but we can + // help here. + // + // See https://github.com/databricks/cli/pull/805#issuecomment-1735403952 + log.Debugf(ctx, "%s is world-writeable (%s), skipping for security reasons", prefix, perm) + continue + } + entries, err := os.ReadDir(prefix) + if errors.Is(err, fs.ErrPermission) { + // some directories we cannot list + continue + } + if err != nil { + return nil, fmt.Errorf("listing %s: %w", prefix, err) + } + for _, v := range entries { + if v.IsDir() { + continue + } + if strings.Contains(v.Name(), "-") { + // skip python3-config, python3.10-config, etc + continue + } + // If Python3 is installed on Windows through GUI installer app that was + // downloaded from https://python.org/downloads/windows, it may appear + // in $PATH as `python`, even though it means Python 2.7 in all other + // operating systems (macOS, Linux). + // + // See https://github.com/databrickslabs/ucx/issues/281 + if !strings.HasPrefix(v.Name(), "python") { + continue + } + bin := filepath.Join(prefix, v.Name()) + resolved, err := filepath.EvalSymlinks(bin) + if err != nil { + log.Debugf(ctx, "cannot resolve symlink for %s: %s", bin, resolved) + continue + } + out = append(out, resolved) + } + } + return out, nil +} + +func validPythonVersion(ctx context.Context, resolved, out string) string { + out = strings.TrimSpace(out) + log.Debugf(ctx, "%s --version: %s", resolved, out) + + words := strings.Split(out, " ") + // The Python distribution from the Windows Store is available in $PATH as `python.exe` + // and `python3.exe`, even though it symlinks to a real file packaged with some versions of Windows: + // /c/Program Files/WindowsApps/Microsoft.DesktopAppInstaller_.../AppInstallerPythonRedirector.exe. + // Executing the `python` command from this distribution opens the Windows Store, allowing users to + // download and install Python. Once installed, it replaces the `python.exe` and `python3.exe`` stub + // with the genuine Python executable. Additionally, once user installs from the main installer at + // https://python.org/downloads/windows, it does not replace this stub. + // + // However, a drawback is that if this initial stub is run with any command line arguments, it quietly + // fails to execute. According to https://github.com/databrickslabs/ucx/issues/281, it can be + // detected by seeing just the "Python" output without any version info from the `python --version` + // command execution. + // + // See https://github.com/pypa/packaging-problems/issues/379 + // See https://bugs.python.org/issue41327 + if len(words) < 2 { + log.Debugf(ctx, "%s --version: stub from Windows Store", resolved) + return "" + } + + if words[0] != "Python" { + log.Debugf(ctx, "%s --version: not a Python", resolved) + return "" + } + + lastWord := words[len(words)-1] + version := semver.Canonical("v" + lastWord) + if version == "" { + log.Debugf(ctx, "%s --version: invalid SemVer: %s", resolved, lastWord) + return "" + } + + return version +} diff --git a/libs/python/interpreters_unix_test.go b/libs/python/interpreters_unix_test.go new file mode 100644 index 00000000..e2b0a5a1 --- /dev/null +++ b/libs/python/interpreters_unix_test.go @@ -0,0 +1,95 @@ +//go:build unix + +package python + +import ( + "context" + "os" + "os/exec" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAtLeastOnePythonInstalled(t *testing.T) { + ctx := context.Background() + all, err := DetectInterpreters(ctx) + assert.NoError(t, err) + a := all.Latest() + t.Logf("latest is: %s", a) + assert.True(t, len(all) > 0) +} + +func TestNoInterpretersFound(t *testing.T) { + t.Setenv("PATH", t.TempDir()) + + ctx := context.Background() + all, err := DetectInterpreters(ctx) + assert.Nil(t, all) + assert.Equal(t, ErrNoPythonInterpreters, err) +} + +func TestFilteringInterpreters(t *testing.T) { + rogueBin := filepath.Join(t.TempDir(), "rogue-bin") + err := os.Mkdir(rogueBin, 0o777) + assert.NoError(t, err) + os.Chmod(rogueBin, 0o777) + + raw, err := os.ReadFile("testdata/world-writeable/python8.4") + assert.NoError(t, err) + + injectedBinary := filepath.Join(rogueBin, "python8.4") + err = os.WriteFile(injectedBinary, raw, 00777) + assert.NoError(t, err) + + t.Setenv("PATH", "testdata/other-binaries-filtered:"+rogueBin) + + roguePath, err := exec.LookPath("python8.4") + assert.NoError(t, err) + assert.Equal(t, injectedBinary, roguePath) + + ctx := context.Background() + all, err := DetectInterpreters(ctx) + assert.NoError(t, err) + assert.Len(t, all, 3) + assert.Equal(t, "v2.7.18", all[0].Version) + assert.Equal(t, "v3.10.5", all[1].Version) + assert.Equal(t, "testdata/other-binaries-filtered/python3.10", all[1].Path) + assert.Equal(t, "v3.11.4", all[2].Version) + assert.Equal(t, "testdata/other-binaries-filtered/real-python3.11.4", all[2].Path) +} + +func TestInterpretersAtLeastInvalidSemver(t *testing.T) { + t.Setenv("PATH", "testdata/other-binaries-filtered") + + ctx := context.Background() + all, err := DetectInterpreters(ctx) + assert.NoError(t, err) + + _, err = all.AtLeast("v1.2.3.4") + assert.EqualError(t, err, "invalid SemVer: v1.2.3.4") +} + +func TestInterpretersAtLeast(t *testing.T) { + t.Setenv("PATH", "testdata/other-binaries-filtered") + + ctx := context.Background() + all, err := DetectInterpreters(ctx) + assert.NoError(t, err) + + interpreter, err := all.AtLeast("3.10") + assert.NoError(t, err) + assert.Equal(t, "testdata/other-binaries-filtered/python3.10", interpreter.Path) +} + +func TestInterpretersAtLeastNotSatisfied(t *testing.T) { + t.Setenv("PATH", "testdata/other-binaries-filtered") + + ctx := context.Background() + all, err := DetectInterpreters(ctx) + assert.NoError(t, err) + + _, err = all.AtLeast("4.0.1") + assert.EqualError(t, err, "cannot find Python greater or equal to v4.0.1") +} diff --git a/libs/python/interpreters_win_test.go b/libs/python/interpreters_win_test.go new file mode 100644 index 00000000..f9998152 --- /dev/null +++ b/libs/python/interpreters_win_test.go @@ -0,0 +1,28 @@ +//go:build windows + +package python + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAtLeastOnePythonInstalled(t *testing.T) { + ctx := context.Background() + all, err := DetectInterpreters(ctx) + assert.NoError(t, err) + a := all.Latest() + t.Logf("latest is: %s", a) + assert.True(t, len(all) > 0) +} + +func TestNoInterpretersFound(t *testing.T) { + t.Setenv("PATH", t.TempDir()) + + ctx := context.Background() + _, err := DetectInterpreters(ctx) + assert.ErrorIs(t, err, ErrNoPythonInterpreters) + assert.ErrorContains(t, err, "python.org/downloads") +} diff --git a/libs/python/testdata/no-python3/python b/libs/python/testdata/no-python3/python new file mode 100755 index 00000000..8a4d6f7f --- /dev/null +++ b/libs/python/testdata/no-python3/python @@ -0,0 +1,6 @@ +#!/bin/sh + +# this is an emulation of Windows App Store stub +>&2 echo "Python was not found; run without arguments to install from the Microsoft Store, ..." + +echo "Python" diff --git a/libs/python/testdata/no-python3/python3.6 b/libs/python/testdata/no-python3/python3.6 new file mode 100755 index 00000000..1a1bfe6e --- /dev/null +++ b/libs/python/testdata/no-python3/python3.6 @@ -0,0 +1,3 @@ +#!/bin/sh + +echo "Python 3.6.4" diff --git a/libs/python/testdata/no-python3/pythonw b/libs/python/testdata/no-python3/pythonw new file mode 100755 index 00000000..a0cd07d9 --- /dev/null +++ b/libs/python/testdata/no-python3/pythonw @@ -0,0 +1,5 @@ +#!/bin/sh + +# pythonw is a gui app for launching gui/no-ui-at-all scripts, +# when no console window is opened on Windows +echo "Python 2.7.18" diff --git a/libs/python/testdata/other-binaries-filtered/python b/libs/python/testdata/other-binaries-filtered/python new file mode 100755 index 00000000..8a4d6f7f --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/python @@ -0,0 +1,6 @@ +#!/bin/sh + +# this is an emulation of Windows App Store stub +>&2 echo "Python was not found; run without arguments to install from the Microsoft Store, ..." + +echo "Python" diff --git a/libs/python/testdata/other-binaries-filtered/python3-whatever b/libs/python/testdata/other-binaries-filtered/python3-whatever new file mode 100755 index 00000000..a0ed54ac --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/python3-whatever @@ -0,0 +1,4 @@ +#!/bin/sh + +echo "Must not get executed!" +exit 1 diff --git a/libs/python/testdata/other-binaries-filtered/python3.10 b/libs/python/testdata/other-binaries-filtered/python3.10 new file mode 100755 index 00000000..060c051b --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/python3.10 @@ -0,0 +1,3 @@ +#!/bin/sh + +echo "Python 3.10.5" diff --git a/libs/python/testdata/other-binaries-filtered/python3.10.100 b/libs/python/testdata/other-binaries-filtered/python3.10.100 new file mode 100755 index 00000000..c47d0da1 --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/python3.10.100 @@ -0,0 +1,3 @@ +#!/bin/sh + +echo "Python 3.a.b" diff --git a/libs/python/testdata/other-binaries-filtered/python3.11 b/libs/python/testdata/other-binaries-filtered/python3.11 new file mode 120000 index 00000000..311e1513 --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/python3.11 @@ -0,0 +1 @@ +real-python3.11.4 \ No newline at end of file diff --git a/libs/python/testdata/other-binaries-filtered/python4.8 b/libs/python/testdata/other-binaries-filtered/python4.8 new file mode 120000 index 00000000..86f59439 --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/python4.8 @@ -0,0 +1 @@ +python3-deleted \ No newline at end of file diff --git a/libs/python/testdata/other-binaries-filtered/python5 b/libs/python/testdata/other-binaries-filtered/python5 new file mode 100755 index 00000000..eb48a407 --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/python5 @@ -0,0 +1,5 @@ +#!/bin/sh + +# this is an emulation of Windows App Store stub + +echo "Python" diff --git a/libs/python/testdata/other-binaries-filtered/python6 b/libs/python/testdata/other-binaries-filtered/python6 new file mode 100755 index 00000000..4a6b64b2 --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/python6 @@ -0,0 +1,3 @@ +#!/bin/sh + +echo "Snake v3.12.4" diff --git a/libs/python/testdata/other-binaries-filtered/python7 b/libs/python/testdata/other-binaries-filtered/python7 new file mode 100755 index 00000000..242da116 --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/python7 @@ -0,0 +1,4 @@ +#!/bin/sh + +>&2 echo "This version of Python does not exist" +exit 1 diff --git a/libs/python/testdata/other-binaries-filtered/pythonw b/libs/python/testdata/other-binaries-filtered/pythonw new file mode 100755 index 00000000..a0cd07d9 --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/pythonw @@ -0,0 +1,5 @@ +#!/bin/sh + +# pythonw is a gui app for launching gui/no-ui-at-all scripts, +# when no console window is opened on Windows +echo "Python 2.7.18" diff --git a/libs/python/testdata/other-binaries-filtered/real-python3.11.4 b/libs/python/testdata/other-binaries-filtered/real-python3.11.4 new file mode 100755 index 00000000..02cfa04c --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/real-python3.11.4 @@ -0,0 +1,3 @@ +#!/bin/sh + +echo "Python 3.11.4" diff --git a/libs/python/testdata/other-binaries-filtered/whatever b/libs/python/testdata/other-binaries-filtered/whatever new file mode 100755 index 00000000..a0ed54ac --- /dev/null +++ b/libs/python/testdata/other-binaries-filtered/whatever @@ -0,0 +1,4 @@ +#!/bin/sh + +echo "Must not get executed!" +exit 1 diff --git a/libs/python/testdata/some-dir-with-venv/.venv/pyvenv.cfg b/libs/python/testdata/some-dir-with-venv/.venv/pyvenv.cfg new file mode 100644 index 00000000..e2561203 --- /dev/null +++ b/libs/python/testdata/some-dir-with-venv/.venv/pyvenv.cfg @@ -0,0 +1,8 @@ +home = /opt/homebrew/opt/python@3.10/bin +implementation = CPython +version_info = 3.10.12.final.0 +virtualenv = 20.24.2 +include-system-site-packages = false +base-prefix = /opt/homebrew/opt/python@3.10/Frameworks/Python.framework/Versions/3.10 +base-exec-prefix = /opt/homebrew/opt/python@3.10/Frameworks/Python.framework/Versions/3.10 +base-executable = /opt/homebrew/opt/python@3.10/bin/python3.10 diff --git a/libs/python/testdata/some-dir-with-venv/__main__.py b/libs/python/testdata/some-dir-with-venv/__main__.py new file mode 100644 index 00000000..cace6aef --- /dev/null +++ b/libs/python/testdata/some-dir-with-venv/__main__.py @@ -0,0 +1,2 @@ +if __name__ == "__main__": + print(1) diff --git a/libs/python/testdata/world-writeable/python8.4 b/libs/python/testdata/world-writeable/python8.4 new file mode 100755 index 00000000..56ddc86a --- /dev/null +++ b/libs/python/testdata/world-writeable/python8.4 @@ -0,0 +1,3 @@ +#!/bin/sh + +echo "Python 8.4.10" diff --git a/python/utils.go b/libs/python/utils.go similarity index 95% rename from python/utils.go rename to libs/python/utils.go index 47d5462d..282775ff 100644 --- a/python/utils.go +++ b/libs/python/utils.go @@ -1,7 +1,5 @@ package python -// TODO: move this package into the libs - import ( "context" "os" diff --git a/python/utils_test.go b/libs/python/utils_test.go similarity index 100% rename from python/utils_test.go rename to libs/python/utils_test.go diff --git a/libs/python/venv.go b/libs/python/venv.go new file mode 100644 index 00000000..2af1bcdd --- /dev/null +++ b/libs/python/venv.go @@ -0,0 +1,35 @@ +package python + +import ( + "errors" + "os" + "path/filepath" +) + +var ErrNoVirtualEnvDetected = errors.New("no Python virtual environment detected") + +// DetectVirtualEnv scans direct subfolders in path to get a valid +// Virtual Environment installation, that is marked by pyvenv.cfg file. +// +// See: https://packaging.python.org/en/latest/tutorials/packaging-projects/ +func DetectVirtualEnvPath(path string) (string, error) { + files, err := os.ReadDir(path) + if err != nil { + return "", err + } + for _, v := range files { + if !v.IsDir() { + continue + } + candidate := filepath.Join(path, v.Name()) + _, err = os.Stat(filepath.Join(candidate, "pyvenv.cfg")) + if errors.Is(err, os.ErrNotExist) { + continue + } + if err != nil { + return "", err + } + return candidate, nil + } + return "", ErrNoVirtualEnvDetected +} diff --git a/libs/python/venv_test.go b/libs/python/venv_test.go new file mode 100644 index 00000000..2b3d94c3 --- /dev/null +++ b/libs/python/venv_test.go @@ -0,0 +1,33 @@ +package python + +import ( + "runtime" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDetectVirtualEnvPath_NoVirtualEnvDetected(t *testing.T) { + _, err := DetectVirtualEnvPath("testdata") + assert.Equal(t, ErrNoVirtualEnvDetected, err) +} + +func TestDetectVirtualEnvPath_invalid(t *testing.T) { + _, err := DetectVirtualEnvPath("testdata/__invalid__") + assert.Error(t, err) +} + +func TestDetectVirtualEnvPath_wrongDir(t *testing.T) { + _, err := DetectVirtualEnvPath("testdata/other-binaries-filtered") + assert.Error(t, err) +} + +func TestDetectVirtualEnvPath_happy(t *testing.T) { + venv, err := DetectVirtualEnvPath("testdata/some-dir-with-venv") + assert.NoError(t, err) + found := "testdata/some-dir-with-venv/.venv" + if runtime.GOOS == "windows" { + found = "testdata\\some-dir-with-venv\\.venv" + } + assert.Equal(t, found, venv) +} diff --git a/python/env.go b/python/env.go deleted file mode 100644 index 8a9e4330..00000000 --- a/python/env.go +++ /dev/null @@ -1,101 +0,0 @@ -package python - -import ( - "context" - "encoding/json" - "fmt" - "strings" - - "github.com/databricks/cli/libs/log" - "golang.org/x/mod/semver" -) - -type Dependency struct { - Name string - Operator string - Version string - Location string // @ file:///usr/loca -} - -func (d Dependency) CanonicalVersion() string { - return semver.Canonical(fmt.Sprintf("v%s", d.Version)) -} - -type Environment []Dependency - -func (e Environment) Has(name string) bool { - for _, d := range e { - if d.Name == name { - return true - } - } - return false -} - -func Freeze(ctx context.Context) (Environment, error) { - out, err := Py(ctx, "-m", "pip", "freeze") - if err != nil { - return nil, err - } - env := Environment{} - deps := strings.Split(out, "\n") - for _, raw := range deps { - env = append(env, DependencyFromSpec(raw)) - } - return env, nil -} - -func DependencyFromSpec(raw string) (d Dependency) { - // TODO: write a normal parser for this - rawSplit := strings.Split(raw, "==") - if len(rawSplit) != 2 { - log.Debugf(context.Background(), "Skipping invalid dep: %s", raw) - return - } - d.Name = rawSplit[0] - d.Operator = "==" - d.Version = rawSplit[1] - return -} - -// Distribution holds part of PEP426 metadata -// See https://peps.python.org/pep-0426/ -type Distribution struct { - Name string `json:"name"` - Version string `json:"version"` - Packages []string `json:"packages"` - InstallRequires []string `json:"install_requires,omitempty"` -} - -// InstallEnvironment returns only direct install dependencies -func (d Distribution) InstallEnvironment() (env Environment) { - for _, raw := range d.InstallRequires { - env = append(env, DependencyFromSpec(raw)) - } - return -} - -// NormalizedName returns PEP503-compatible Python Package Index project name. -// As per PEP 426 the only valid characters in a name are the ASCII alphabet, -// ASCII numbers, ., -, and _. The name should be lowercased with all runs of -// the characters ., -, or _ replaced with a single - character. -func (d Distribution) NormalizedName() string { - // TODO: implement https://peps.python.org/pep-0503/#normalized-names - return d.Name -} - -// ReadDistribution "parses" metadata from setup.py file. -func ReadDistribution(ctx context.Context) (d Distribution, err error) { - out, err := PyInline(ctx, ` - import setuptools, json, sys - setup_config = {} # actual args for setuptools.dist.Distribution - def capture(**kwargs): global setup_config; setup_config = kwargs - setuptools.setup = capture - import setup - json.dump(setup_config, sys.stdout)`) - if err != nil { - return - } - err = json.Unmarshal([]byte(out), &d) - return -} diff --git a/python/env_test.go b/python/env_test.go deleted file mode 100644 index 487e15b1..00000000 --- a/python/env_test.go +++ /dev/null @@ -1,41 +0,0 @@ -package python - -import ( - "context" - "runtime" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestFreeze(t *testing.T) { - t.Skip("Skipping test until fixing Python installation on GitHub Windows environment") - - // remove this once equivalent tests for windows have been set up - // or this test has been fixed for windows - // date: 28 Nov 2022 - if runtime.GOOS == "windows" { - t.Skip("skipping temperorilty to make windows unit tests green") - } - - // remove this once equivalent tests for macos have been set up - // or this test has been fixed for mac os - // date: 28 Nov 2022 - if runtime.GOOS == "darwin" { - t.Skip("skipping temperorilty to make macos unit tests green") - } - - env, err := Freeze(context.Background()) - assert.NoError(t, err) - assert.Greater(t, len(env), 1) - assert.True(t, env.Has("urllib3")) -} - -func TestPyInlineX(t *testing.T) { - defer chdirAndBack("testdata/simple-python-wheel")() - dist, err := ReadDistribution(context.Background()) - assert.NoError(t, err) - assert.Equal(t, "dummy", dist.Name) - assert.Equal(t, "dummy", dist.Packages[0]) - assert.True(t, dist.InstallEnvironment().Has("requests")) -} diff --git a/python/runner.go b/python/runner.go deleted file mode 100644 index ebf24717..00000000 --- a/python/runner.go +++ /dev/null @@ -1,149 +0,0 @@ -package python - -import ( - "context" - "errors" - "fmt" - "os" - "os/exec" - "runtime" - "strings" - - "github.com/databricks/cli/libs/process" -) - -func PyInline(ctx context.Context, inlinePy string) (string, error) { - return Py(ctx, "-c", TrimLeadingWhitespace(inlinePy)) -} - -func Py(ctx context.Context, script string, args ...string) (string, error) { - py, err := DetectExecutable(ctx) - if err != nil { - return "", err - } - out, err := execAndPassErr(ctx, py, append([]string{script}, args...)...) - if err != nil { - // current error message chain is longer: - // failed to call {pyExec} __non_existing__.py: {pyExec}: can't open - // ... file '{pwd}/__non_existing__.py': [Errno 2] No such file or directory" - // probably we'll need to make it shorter: - // can't open file '$PWD/__non_existing__.py': [Errno 2] No such file or directory - return "", err - } - return trimmedS(out), nil -} - -func createVirtualEnv(ctx context.Context) error { - _, err := Py(context.Background(), "-m", "venv", ".venv") - return err -} - -// python3 -m build -w -// https://packaging.python.org/en/latest/tutorials/packaging-projects/ -func detectVirtualEnv() (string, error) { - wd, err := os.Getwd() - if err != nil { - return "", err - } - wdf, err := os.Open(wd) - if err != nil { - return "", err - } - files, err := wdf.ReadDir(0) - if err != nil { - return "", err - } - for _, v := range files { - if !v.IsDir() { - continue - } - candidate := fmt.Sprintf("%s/%s", wd, v.Name()) - _, err = os.Stat(fmt.Sprintf("%s/pyvenv.cfg", candidate)) - if errors.Is(err, os.ErrNotExist) { - continue - } - if err != nil { - return "", err - } - return candidate, nil - } - return "", nil -} - -var pyExec string - -func DetectExecutable(ctx context.Context) (string, error) { - if pyExec != "" { - return pyExec, nil - } - detector := "which" - if runtime.GOOS == "windows" { - detector = "where.exe" - } - out, err := execAndPassErr(ctx, detector, "python3") - if err != nil { - return "", err - } - pyExec = getFirstMatch(string(out)) - return pyExec, nil -} - -func execAndPassErr(ctx context.Context, name string, args ...string) ([]byte, error) { - // TODO: move out to a separate package, once we have Maven integration - out, err := process.Background(ctx, append([]string{name}, args...)) - return []byte(out), nicerErr(err) -} - -func getFirstMatch(out string) string { - res := strings.Split(out, "\n") - return strings.Trim(res[0], "\n\r") -} - -func nicerErr(err error) error { - if err == nil { - return nil - } - if ee, ok := err.(*exec.ExitError); ok { - errMsg := trimmedS(ee.Stderr) - if errMsg == "" { - errMsg = err.Error() - } - return errors.New(errMsg) - } - return err -} - -func trimmedS(bytes []byte) string { - return strings.Trim(string(bytes), "\n\r") -} - -// TrimLeadingWhitespace removes leading whitespace -// function copied from Databricks Terraform provider -func TrimLeadingWhitespace(commandStr string) (newCommand string) { - lines := strings.Split(strings.ReplaceAll(commandStr, "\t", " "), "\n") - leadingWhitespace := 1<<31 - 1 - for _, line := range lines { - for pos, char := range line { - if char == ' ' || char == '\t' { - continue - } - // first non-whitespace character - if pos < leadingWhitespace { - leadingWhitespace = pos - } - // is not needed further - break - } - } - for i := 0; i < len(lines); i++ { - if lines[i] == "" || strings.Trim(lines[i], " \t") == "" { - continue - } - if len(lines[i]) < leadingWhitespace { - newCommand += lines[i] + "\n" // or not.. - } else { - newCommand += lines[i][leadingWhitespace:] + "\n" - } - } - return -} diff --git a/python/runner_test.go b/python/runner_test.go deleted file mode 100644 index fc8f2508..00000000 --- a/python/runner_test.go +++ /dev/null @@ -1,94 +0,0 @@ -package python - -import ( - "context" - "fmt" - "os" - "runtime" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestExecAndPassError(t *testing.T) { - - // remove this once equivalent tests for windows have been set up - // or this test has been fixed for windows - // date: 28 Nov 2022 - if runtime.GOOS == "windows" { - t.Skip("skipping temperorilty to make windows unit tests green") - } - - _, err := execAndPassErr(context.Background(), "which", "__non_existing__") - assert.EqualError(t, err, "which __non_existing__: exit status 1") -} - -func TestDetectPython(t *testing.T) { - pyExec = "" - py, err := DetectExecutable(context.Background()) - assert.NoError(t, err) - assert.Contains(t, py, "python3") -} - -func TestDetectPythonCache(t *testing.T) { - pyExec = "abc" - py, err := DetectExecutable(context.Background()) - assert.NoError(t, err) - assert.Equal(t, "abc", py) - pyExec = "" -} - -func TestDetectVirtualEnvFalse(t *testing.T) { - venvDir, err := detectVirtualEnv() - assert.NoError(t, err) - assert.Equal(t, "", venvDir) -} - -func TestGetFirstMatch(t *testing.T) { - matches := "C:\\hostedtoolcache\\windows\\Python\\3.9.13\\x64\\python3.exe\r\nC:\\ProgramData\\Chocolatey\\bin\\python3.exe" - assert.Equal(t, getFirstMatch(matches), "C:\\hostedtoolcache\\windows\\Python\\3.9.13\\x64\\python3.exe") -} - -func TestMakeDetectableVenv(t *testing.T) { - var temp string - defer testTempdir(t, &temp)() - - // TODO: rewrite with t.TempDir() and arguments - err := createVirtualEnv(context.Background()) - assert.NoError(t, err) - - venv, err := detectVirtualEnv() - assert.NoError(t, err) - assert.Equal(t, fmt.Sprintf("%s/.venv", temp), venv) -} - -func testTempdir(t *testing.T, dir *string) func() { - wd, _ := os.Getwd() - temp, err := os.MkdirTemp(os.TempDir(), "brickstest") - assert.NoError(t, err) - os.Chdir(temp) - wd2, _ := os.Getwd() - *dir = wd2 - return func() { - os.Chdir(wd) - os.RemoveAll(temp) - } -} - -func TestPyError(t *testing.T) { - _, err := Py(context.Background(), "__non_existing__.py") - assert.Contains(t, err.Error(), "exit status 2") -} - -func TestPyInline(t *testing.T) { - hello, err := PyInline(context.Background(), "print('Hello, world!')") - assert.NoError(t, err) - assert.Equal(t, "Hello, world!", hello) -} - -func TestPyInlineStderr(t *testing.T) { - DetectExecutable(context.Background()) - inline := "import sys; sys.stderr.write('___msg___'); sys.exit(1)" - _, err := PyInline(context.Background(), inline) - assert.ErrorContains(t, err, "___msg___") -} diff --git a/python/testdata/simple-python-wheel/databricks.yml b/python/testdata/simple-python-wheel/databricks.yml deleted file mode 100644 index 3b8eb81f..00000000 --- a/python/testdata/simple-python-wheel/databricks.yml +++ /dev/null @@ -1,4 +0,0 @@ -name: dev -profile: demo -dev_cluster: - cluster_name: Shared Autoscaling \ No newline at end of file diff --git a/python/testdata/simple-python-wheel/dummy/__init__.py b/python/testdata/simple-python-wheel/dummy/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/python/testdata/simple-python-wheel/dummy/transforms.py b/python/testdata/simple-python-wheel/dummy/transforms.py deleted file mode 100644 index d61605cb..00000000 --- a/python/testdata/simple-python-wheel/dummy/transforms.py +++ /dev/null @@ -1 +0,0 @@ -def something(): return True \ No newline at end of file diff --git a/python/testdata/simple-python-wheel/setup.py b/python/testdata/simple-python-wheel/setup.py deleted file mode 100644 index 53d795bc..00000000 --- a/python/testdata/simple-python-wheel/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name='dummy', - version='0.0.1', - packages=find_packages(exclude=['tests', 'tests.*']), - install_requires=['requests==2.31.1'] -) diff --git a/python/wheel.go b/python/wheel.go deleted file mode 100644 index 39c3d4cb..00000000 --- a/python/wheel.go +++ /dev/null @@ -1,93 +0,0 @@ -package python - -import ( - "context" - "fmt" - "io" - "os" - "path" - - "github.com/databricks/cli/libs/log" - "github.com/databricks/databricks-sdk-go" - "github.com/databricks/databricks-sdk-go/service/files" -) - -func BuildWheel(ctx context.Context, dir string) (string, error) { - defer chdirAndBack(dir)() - // remove previous dist leak - os.RemoveAll("dist") - // remove all other irrelevant traces - CleanupWheelFolder(".") - // call simple wheel builder. we may need to pip install wheel as well - out, err := Py(ctx, "setup.py", "bdist_wheel") - if err != nil { - return "", err - } - log.Debugf(ctx, "Built wheel: %s", out) - - // and cleanup afterwards - CleanupWheelFolder(".") - - wheels := FindFilesWithSuffixInPath("dist", ".whl") - if len(wheels) == 0 { - return "", fmt.Errorf("cannot find built wheel in %s", dir) - } - if len(wheels) != 1 { - return "", fmt.Errorf("more than 1 wheel file found in %s", dir) - } - return path.Join(dir, wheels[0]), nil -} - -const DBFSWheelLocation = "dbfs:/FileStore/wheels/simple" - -// TODO: research deeper if we make new data resource for terraform, like `databricks_latest_wheel` (preferred), -// or do we bypass the environment variable into terraform deployer. And make a decision. -// -// Whatever this method gets refactored to is intended to be used for two purposes: -// - uploading project's wheel archives: one per project or one per project/developer, depending on isolation -// - synchronising enterprise artifactories, jfrogs, azdo feeds, so that we fix the gap of private code artifact -// repository integration. -func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error) { - wheel, err := BuildWheel(ctx, dir) - if err != nil { - return "", err - } - defer chdirAndBack(dir)() - dist, err := ReadDistribution(ctx) - if err != nil { - return "", err - } - // TODO: figure out wheel naming criteria for Soft project isolation to allow multiple - // people workin on the same project to upload wheels and let them be deployed as independent jobs. - // we should also consider multiple PEP503 index stacking: per enterprise, per project, per developer. - // PEP503 indexes can be rolled out to clusters via checksummed global init script, that creates - // a driver/worker `/etc/pip.conf` with FUSE-mounted file:///dbfs/FileStore/wheels/simple/.. - // extra index URLs. See more pointers at https://stackoverflow.com/q/30889494/277035 - dbfsLoc := fmt.Sprintf("%s/%s/%s", DBFSWheelLocation, dist.NormalizedName(), path.Base(wheel)) - - wsc, err := databricks.NewWorkspaceClient(&databricks.Config{}) - if err != nil { - return "", err - } - wf, err := os.Open(wheel) - if err != nil { - return "", err - } - defer wf.Close() - h, err := wsc.Dbfs.Open(ctx, dbfsLoc, files.FileModeOverwrite|files.FileModeWrite) - if err != nil { - return "", err - } - _, err = io.Copy(h, wf) - // TODO: maintain PEP503 compliance and update meta-files: - // ${DBFSWheelLocation}/index.html and ${DBFSWheelLocation}/${NormalizedName}/index.html - return dbfsLoc, err -} - -func chdirAndBack(dir string) func() { - wd, _ := os.Getwd() - os.Chdir(dir) - return func() { - os.Chdir(wd) - } -} diff --git a/python/wheel_test.go b/python/wheel_test.go deleted file mode 100644 index 5524dfb8..00000000 --- a/python/wheel_test.go +++ /dev/null @@ -1,40 +0,0 @@ -package python - -import ( - "context" - "os" - "runtime" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestWheel(t *testing.T) { - - // remove this once equivalent tests for windows have been set up - // or this test has been fixed for windows - // date: 28 Nov 2022 - if runtime.GOOS == "windows" { - t.Skip("skipping temperorilty to make windows unit tests green") - } - - // remove this once equivalent tests for macos have been set up - // or this test has been fixed for mac os - // date: 28 Nov 2022 - if runtime.GOOS == "darwin" { - t.Skip("skipping temperorilty to make macos unit tests green") - } - - wheel, err := BuildWheel(context.Background(), "testdata/simple-python-wheel") - assert.NoError(t, err) - assert.Equal(t, "testdata/simple-python-wheel/dist/dummy-0.0.1-py3-none-any.whl", wheel) - - noFile(t, "testdata/simple-python-wheel/dummy.egg-info") - noFile(t, "testdata/simple-python-wheel/__pycache__") - noFile(t, "testdata/simple-python-wheel/build") -} - -func noFile(t *testing.T, name string) { - _, err := os.Stat(name) - assert.Error(t, err, "file %s should exist", name) -}