added essential Python metadata detection

This commit is contained in:
Serge Smertin 2022-05-20 21:40:03 +02:00
parent ae2dc104f9
commit b85e63684b
11 changed files with 449 additions and 6 deletions

3
.gitignore vendored
View File

@ -18,3 +18,6 @@ dist/
*.log *.log
coverage.txt coverage.txt
__pycache__
*.pyc

1
go.mod
View File

@ -11,4 +11,5 @@ require (
github.com/spf13/cobra v1.4.0 // Apache 2.0 github.com/spf13/cobra v1.4.0 // Apache 2.0
github.com/stretchr/testify v1.7.1 // MIT github.com/stretchr/testify v1.7.1 // MIT
github.com/whilp/git-urls v1.0.0 // MIT github.com/whilp/git-urls v1.0.0 // MIT
golang.org/x/mod v0.5.1 // BSD-3-Clause
) )

View File

@ -37,7 +37,7 @@ type Project struct {
Profile string `json:"profile,omitempty"` // rename? Profile string `json:"profile,omitempty"` // rename?
Isolation Isolation `json:"isolation,omitempty"` Isolation Isolation `json:"isolation,omitempty"`
// TODO: turn to pointer for the easy YAML marshalling // development-time vs deployment-time resources
DevCluster *clusters.Cluster `json:"dev_cluster,omitempty"` DevCluster *clusters.Cluster `json:"dev_cluster,omitempty"`
// Assertions defines a list of configurations expected to be applied // Assertions defines a list of configurations expected to be applied

View File

@ -11,7 +11,7 @@ import (
"github.com/databrickslabs/terraform-provider-databricks/scim" "github.com/databrickslabs/terraform-provider-databricks/scim"
) )
// Current CLI application state // Current CLI application state - fixure out
var Current inner var Current inner
type inner struct { type inner struct {
@ -27,7 +27,7 @@ func (i *inner) init() {
i.mu.Lock() i.mu.Lock()
defer i.mu.Unlock() defer i.mu.Unlock()
i.once.Do(func() { i.once.Do(func() {
client := common.CommonEnvironmentClient() client := &common.DatabricksClient{}
client.WithCommandExecutor(func( client.WithCommandExecutor(func(
ctx context.Context, c *common.DatabricksClient) common.CommandExecutor { ctx context.Context, c *common.DatabricksClient) common.CommandExecutor {
return commands.NewCommandsAPI(ctx, c) return commands.NewCommandsAPI(ctx, c)
@ -37,7 +37,12 @@ func (i *inner) init() {
if err != nil { if err != nil {
panic(err) panic(err)
} }
client.Profile = prj.Profile client.Profile = prj.Profile // Databricks CLI profile
err = client.Configure()
if err != nil {
panic(err)
}
i.project = &prj i.project = &prj
}) })
} }

90
python/env.go Normal file
View File

@ -0,0 +1,90 @@
package python
import (
"context"
"encoding/json"
"fmt"
"log"
"strings"
"golang.org/x/mod/semver"
)
type Dependency struct {
Name string
Operator string
Version string
Location string // @ file:///usr/loca
}
func (d Dependency) CanonicalVersion() string {
return semver.Canonical(fmt.Sprintf("v%s", d.Version))
}
type Environment []Dependency
func (e Environment) Has(name string) bool {
for _, d := range e {
if d.Name == name {
return true
}
}
return false
}
func Freeze(ctx context.Context) (Environment, error) {
out, err := Py(ctx, "-m", "pip", "freeze")
if err != nil {
return nil, err
}
env := Environment{}
deps := strings.Split(out, "\n")
for _, raw := range deps {
env = append(env, DependencyFromSpec(raw))
}
return env, nil
}
func DependencyFromSpec(raw string) (d Dependency) {
// TODO: write a normal parser for this
rawSplit := strings.Split(raw, "==")
if len(rawSplit) != 2 {
log.Printf("[DEBUG] Skipping invalid dep: %s", raw)
return
}
d.Name = rawSplit[0]
d.Operator = "=="
d.Version = rawSplit[1]
return
}
type Distribution struct {
Name string `json:"name"`
Version string `json:"version"`
Packages []string `json:"packages"`
InstallRequires []string `json:"install_requires,omitempty"`
}
// InstallEnvironment returns only direct install dependencies
func (d Distribution) InstallEnvironment() (env Environment) {
for _, raw := range d.InstallRequires {
env = append(env, DependencyFromSpec(raw))
}
return
}
// ReadDistribution "parses" metadata from setup.py file.
func ReadDistribution(ctx context.Context) (d Distribution, err error) {
out, err := PyInline(ctx, `
import setuptools, json, sys
setup_config = {} # actual args for setuptools.dist.Distribution
def capture(**kwargs): global setup_config; setup_config = kwargs
setuptools.setup = capture
import setup
json.dump(setup_config, sys.stdout)`)
if err != nil {
return
}
err = json.Unmarshal([]byte(out), &d)
return
}

24
python/env_test.go Normal file
View File

@ -0,0 +1,24 @@
package python
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
)
func TestFreeze(t *testing.T) {
env, err := Freeze(context.Background())
assert.NoError(t, err)
assert.Greater(t, len(env), 1)
assert.True(t, env.Has("urllib3"))
}
func TestPyInlineX(t *testing.T) {
defer chdirAndBack("internal/test/simple-python-wheel")()
dist, err := ReadDistribution(context.Background())
assert.NoError(t, err)
assert.Equal(t, "dummy", dist.Name)
assert.Equal(t, "dummy", dist.Packages[0])
assert.True(t, dist.InstallEnvironment().Has("requests"))
}

View File

@ -4,5 +4,5 @@ setup(
name='dummy', name='dummy',
version='0.0.1', version='0.0.1',
packages=find_packages(exclude=['tests', 'tests.*']), packages=find_packages(exclude=['tests', 'tests.*']),
install_requires=[] install_requires=['requests==2.27.1']
) )

142
python/runner.go Normal file
View File

@ -0,0 +1,142 @@
package python
import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"runtime"
"strings"
)
func PyInline(ctx context.Context, inlinePy string) (string, error) {
return Py(ctx, "-c", TrimLeadingWhitespace(inlinePy))
}
func Py(ctx context.Context, script string, args ...string) (string, error) {
py, err := detectExecutable(ctx)
if err != nil {
return "", err
}
out, err := execAndPassErr(ctx, py, append([]string{script}, args...)...)
if err != nil {
// current error message chain is longer:
// failed to call {pyExec} __non_existing__.py: {pyExec}: can't open
// ... file '{pwd}/__non_existing__.py': [Errno 2] No such file or directory"
// probably we'll need to make it shorter:
// can't open file '$PWD/__non_existing__.py': [Errno 2] No such file or directory
return "", err
}
return trimmedS(out), nil
}
func createVirtualEnv(ctx context.Context) error {
_, err := Py(context.Background(), "-m", "venv", ".venv")
return err
}
// python3 -m build -w
// https://packaging.python.org/en/latest/tutorials/packaging-projects/
func detectVirtualEnv() (string, error) {
wd, err := os.Getwd()
if err != nil {
return "", err
}
wdf, err := os.Open(wd)
if err != nil {
return "", err
}
files, err := wdf.ReadDir(0)
if err != nil {
return "", err
}
for _, v := range files {
if !v.IsDir() {
continue
}
candidate := fmt.Sprintf("%s/%s", wd, v.Name())
_, err = os.Stat(fmt.Sprintf("%s/pyvenv.cfg", candidate))
if errors.Is(err, os.ErrNotExist) {
continue
}
if err != nil {
return "", err
}
return candidate, nil
}
return "", nil
}
var pyExec string
func detectExecutable(ctx context.Context) (string, error) {
if pyExec != "" {
return pyExec, nil
}
detector := "which"
if runtime.GOOS == "windows" {
detector = "where.exe"
}
out, err := execAndPassErr(ctx, detector, "python3")
if err != nil {
return "", err
}
pyExec = trimmedS(out)
return pyExec, nil
}
func execAndPassErr(ctx context.Context, name string, args ...string) ([]byte, error) {
// TODO: move out to a separate package, once we have Maven integration
out, err := exec.CommandContext(ctx, name, args...).Output()
return out, nicerErr(err)
}
func nicerErr(err error) error {
if err == nil {
return nil
}
if ee, ok := err.(*exec.ExitError); ok {
errMsg := trimmedS(ee.Stderr)
if errMsg == "" {
errMsg = err.Error()
}
return errors.New(errMsg)
}
return err
}
func trimmedS(bytes []byte) string {
return strings.Trim(string(bytes), "\n\r")
}
// TrimLeadingWhitespace removes leading whitespace
// function copied from Databricks Terraform provider
func TrimLeadingWhitespace(commandStr string) (newCommand string) {
lines := strings.Split(strings.ReplaceAll(commandStr, "\t", " "), "\n")
leadingWhitespace := 1<<31 - 1
for _, line := range lines {
for pos, char := range line {
if char == ' ' || char == '\t' {
continue
}
// first non-whitespace character
if pos < leadingWhitespace {
leadingWhitespace = pos
}
// is not needed further
break
}
}
for i := 0; i < len(lines); i++ {
if lines[i] == "" || strings.Trim(lines[i], " \t") == "" {
continue
}
if len(lines[i]) < leadingWhitespace {
newCommand += lines[i] + "\n" // or not..
} else {
newCommand += lines[i][leadingWhitespace:] + "\n"
}
}
return
}

80
python/runner_test.go Normal file
View File

@ -0,0 +1,80 @@
package python
import (
"context"
"fmt"
"os"
"testing"
"github.com/stretchr/testify/assert"
)
func TestExecAndPassError(t *testing.T) {
_, err := execAndPassErr(context.Background(), "which", "__non_existing__")
assert.EqualError(t, err, "exit status 1")
}
func TestDetectPython(t *testing.T) {
pyExec = ""
py, err := detectExecutable(context.Background())
assert.NoError(t, err)
assert.Contains(t, py, "python3")
}
func TestDetectPythonCache(t *testing.T) {
pyExec = "abc"
py, err := detectExecutable(context.Background())
assert.NoError(t, err)
assert.Equal(t, "abc", py)
pyExec = ""
}
func TestDetectVirtualEnvFalse(t *testing.T) {
venvDir, err := detectVirtualEnv()
assert.NoError(t, err)
assert.Equal(t, "", venvDir)
}
func TestMakeDetectableVenv(t *testing.T) {
var temp string
defer testTempdir(t, &temp)()
// TODO: rewrite with t.TempDir() and arguments
err := createVirtualEnv(context.Background())
assert.NoError(t, err)
venv, err := detectVirtualEnv()
assert.NoError(t, err)
assert.Equal(t, fmt.Sprintf("%s/.venv", temp), venv)
}
func testTempdir(t *testing.T, dir *string) func() {
wd, _ := os.Getwd()
temp, err := os.MkdirTemp(os.TempDir(), "brickstest")
assert.NoError(t, err)
os.Chdir(temp)
wd2, _ := os.Getwd()
*dir = wd2
return func() {
os.Chdir(wd)
os.RemoveAll(temp)
}
}
func TestPyError(t *testing.T) {
_, err := Py(context.Background(), "__non_existing__.py")
assert.Contains(t, err.Error(), "can't open file")
}
func TestPyInline(t *testing.T) {
hello, err := PyInline(context.Background(), "print('Hello, world!')")
assert.NoError(t, err)
assert.Equal(t, "Hello, world!", hello)
}
func TestPyInlineStderr(t *testing.T) {
detectExecutable(context.Background())
inline := "import sys; sys.stderr.write('___msg___'); sys.exit(1)"
_, err := PyInline(context.Background(), inline)
assert.EqualError(t, err, "___msg___")
}

74
python/wheel.go Normal file
View File

@ -0,0 +1,74 @@
package python
import (
"context"
"fmt"
"log"
"os"
"path"
"strings"
)
func BuildWheel(ctx context.Context, dir string) (string, error) {
defer chdirAndBack(dir)()
// remove previous dist leak
os.RemoveAll("dist")
// remove all other irrelevant traces
silentlyCleanupWheelFolder(".")
// call simple wheel builder. we may need to pip install wheel as well
out, err := Py(ctx, "setup.py", "bdist_wheel")
if err != nil {
return "", err
}
log.Printf("[DEBUG] Built wheel: %s", out)
// and cleanup afterwards
silentlyCleanupWheelFolder(".")
wheel := silentChildWithSuffix("dist", ".whl")
if wheel == "" {
return "", fmt.Errorf("cannot find built wheel in %s", dir)
}
return path.Join(dir, wheel), nil
}
func silentlyCleanupWheelFolder(dir string) {
// there or not there - we don't care
os.RemoveAll(path.Join(dir, "__pycache__"))
os.RemoveAll(path.Join(dir, "build"))
eggInfo := silentChildWithSuffix(dir, ".egg-info")
if eggInfo == "" {
return
}
os.RemoveAll(eggInfo)
}
func silentChildWithSuffix(dir, suffix string) string {
f, err := os.Open(dir)
if err != nil {
log.Printf("[DEBUG] open dir %s: %s", dir, err)
return ""
}
entries, err := f.ReadDir(0)
if err != nil {
log.Printf("[DEBUG] read dir %s: %s", dir, err)
// todo: log
return ""
}
for _, child := range entries {
if !strings.HasSuffix(child.Name(), suffix) {
continue
}
return path.Join(dir, child.Name())
}
return ""
}
func chdirAndBack(dir string) func() {
wd, _ := os.Getwd()
os.Chdir(dir)
return func() {
os.Chdir(wd)
}
}

24
python/wheel_test.go Normal file
View File

@ -0,0 +1,24 @@
package python
import (
"context"
"os"
"testing"
"github.com/stretchr/testify/assert"
)
func TestWheel(t *testing.T) {
wheel, err := BuildWheel(context.Background(), "internal/test/simple-python-wheel")
assert.NoError(t, err)
assert.Equal(t, "internal/test/simple-python-wheel/dist/dummy-0.0.1-py3-none-any.whl", wheel)
noFile(t, "internal/test/simple-python-wheel/dummy.egg-info")
noFile(t, "internal/test/simple-python-wheel/__pycache__")
noFile(t, "internal/test/simple-python-wheel/build")
}
func noFile(t *testing.T, name string) {
_, err := os.Stat(name)
assert.Error(t, err, "file %s should exist", name)
}