From b85e63684bf342707e067dd12d6faed692aca94f Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Fri, 20 May 2022 21:40:03 +0200 Subject: [PATCH] added essential Python metadata detection --- .gitignore | 5 +- go.mod | 1 + project/config.go | 2 +- project/project.go | 11 +- python/env.go | 90 +++++++++++ python/env_test.go | 24 +++ .../test/simple-python-wheel/setup.py | 2 +- python/runner.go | 142 ++++++++++++++++++ python/runner_test.go | 80 ++++++++++ python/wheel.go | 74 +++++++++ python/wheel_test.go | 24 +++ 11 files changed, 449 insertions(+), 6 deletions(-) create mode 100644 python/env.go create mode 100644 python/env_test.go create mode 100644 python/runner.go create mode 100644 python/runner_test.go create mode 100644 python/wheel.go create mode 100644 python/wheel_test.go diff --git a/.gitignore b/.gitignore index 2af3ee7f..8bbb04e0 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,7 @@ vendor/ dist/ *.log -coverage.txt \ No newline at end of file +coverage.txt + +__pycache__ +*.pyc \ No newline at end of file diff --git a/go.mod b/go.mod index d15b76a1..f725137e 100644 --- a/go.mod +++ b/go.mod @@ -11,4 +11,5 @@ require ( github.com/spf13/cobra v1.4.0 // Apache 2.0 github.com/stretchr/testify v1.7.1 // MIT github.com/whilp/git-urls v1.0.0 // MIT + golang.org/x/mod v0.5.1 // BSD-3-Clause ) diff --git a/project/config.go b/project/config.go index 48331219..1256f136 100644 --- a/project/config.go +++ b/project/config.go @@ -37,7 +37,7 @@ type Project struct { Profile string `json:"profile,omitempty"` // rename? Isolation Isolation `json:"isolation,omitempty"` - // TODO: turn to pointer for the easy YAML marshalling + // development-time vs deployment-time resources DevCluster *clusters.Cluster `json:"dev_cluster,omitempty"` // Assertions defines a list of configurations expected to be applied diff --git a/project/project.go b/project/project.go index 67d7154e..38221f04 100644 --- a/project/project.go +++ b/project/project.go @@ -11,7 +11,7 @@ import ( "github.com/databrickslabs/terraform-provider-databricks/scim" ) -// Current CLI application state +// Current CLI application state - fixure out var Current inner type inner struct { @@ -27,7 +27,7 @@ func (i *inner) init() { i.mu.Lock() defer i.mu.Unlock() i.once.Do(func() { - client := common.CommonEnvironmentClient() + client := &common.DatabricksClient{} client.WithCommandExecutor(func( ctx context.Context, c *common.DatabricksClient) common.CommandExecutor { return commands.NewCommandsAPI(ctx, c) @@ -37,7 +37,12 @@ func (i *inner) init() { if err != nil { panic(err) } - client.Profile = prj.Profile + client.Profile = prj.Profile // Databricks CLI profile + err = client.Configure() + if err != nil { + panic(err) + } + i.project = &prj }) } diff --git a/python/env.go b/python/env.go new file mode 100644 index 00000000..8aec6131 --- /dev/null +++ b/python/env.go @@ -0,0 +1,90 @@ +package python + +import ( + "context" + "encoding/json" + "fmt" + "log" + "strings" + + "golang.org/x/mod/semver" +) + +type Dependency struct { + Name string + Operator string + Version string + Location string // @ file:///usr/loca +} + +func (d Dependency) CanonicalVersion() string { + return semver.Canonical(fmt.Sprintf("v%s", d.Version)) +} + +type Environment []Dependency + +func (e Environment) Has(name string) bool { + for _, d := range e { + if d.Name == name { + return true + } + } + return false +} + +func Freeze(ctx context.Context) (Environment, error) { + out, err := Py(ctx, "-m", "pip", "freeze") + if err != nil { + return nil, err + } + env := Environment{} + deps := strings.Split(out, "\n") + for _, raw := range deps { + env = append(env, DependencyFromSpec(raw)) + } + return env, nil +} + +func DependencyFromSpec(raw string) (d Dependency) { + // TODO: write a normal parser for this + rawSplit := strings.Split(raw, "==") + if len(rawSplit) != 2 { + log.Printf("[DEBUG] Skipping invalid dep: %s", raw) + return + } + d.Name = rawSplit[0] + d.Operator = "==" + d.Version = rawSplit[1] + return +} + +type Distribution struct { + Name string `json:"name"` + Version string `json:"version"` + Packages []string `json:"packages"` + InstallRequires []string `json:"install_requires,omitempty"` +} + +// InstallEnvironment returns only direct install dependencies +func (d Distribution) InstallEnvironment() (env Environment) { + for _, raw := range d.InstallRequires { + env = append(env, DependencyFromSpec(raw)) + } + return +} + +// ReadDistribution "parses" metadata from setup.py file. +func ReadDistribution(ctx context.Context) (d Distribution, err error) { + out, err := PyInline(ctx, ` + import setuptools, json, sys + setup_config = {} # actual args for setuptools.dist.Distribution + def capture(**kwargs): global setup_config; setup_config = kwargs + setuptools.setup = capture + import setup + json.dump(setup_config, sys.stdout)`) + if err != nil { + return + } + err = json.Unmarshal([]byte(out), &d) + return +} diff --git a/python/env_test.go b/python/env_test.go new file mode 100644 index 00000000..59071aed --- /dev/null +++ b/python/env_test.go @@ -0,0 +1,24 @@ +package python + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFreeze(t *testing.T) { + env, err := Freeze(context.Background()) + assert.NoError(t, err) + assert.Greater(t, len(env), 1) + assert.True(t, env.Has("urllib3")) +} + +func TestPyInlineX(t *testing.T) { + defer chdirAndBack("internal/test/simple-python-wheel")() + dist, err := ReadDistribution(context.Background()) + assert.NoError(t, err) + assert.Equal(t, "dummy", dist.Name) + assert.Equal(t, "dummy", dist.Packages[0]) + assert.True(t, dist.InstallEnvironment().Has("requests")) +} diff --git a/python/internal/test/simple-python-wheel/setup.py b/python/internal/test/simple-python-wheel/setup.py index 1e05fafa..7e2f3709 100644 --- a/python/internal/test/simple-python-wheel/setup.py +++ b/python/internal/test/simple-python-wheel/setup.py @@ -4,5 +4,5 @@ setup( name='dummy', version='0.0.1', packages=find_packages(exclude=['tests', 'tests.*']), - install_requires=[] + install_requires=['requests==2.27.1'] ) diff --git a/python/runner.go b/python/runner.go new file mode 100644 index 00000000..6145da27 --- /dev/null +++ b/python/runner.go @@ -0,0 +1,142 @@ +package python + +import ( + "context" + "errors" + "fmt" + "os" + "os/exec" + "runtime" + "strings" +) + +func PyInline(ctx context.Context, inlinePy string) (string, error) { + return Py(ctx, "-c", TrimLeadingWhitespace(inlinePy)) +} + +func Py(ctx context.Context, script string, args ...string) (string, error) { + py, err := detectExecutable(ctx) + if err != nil { + return "", err + } + out, err := execAndPassErr(ctx, py, append([]string{script}, args...)...) + if err != nil { + // current error message chain is longer: + // failed to call {pyExec} __non_existing__.py: {pyExec}: can't open + // ... file '{pwd}/__non_existing__.py': [Errno 2] No such file or directory" + // probably we'll need to make it shorter: + // can't open file '$PWD/__non_existing__.py': [Errno 2] No such file or directory + return "", err + } + return trimmedS(out), nil +} + +func createVirtualEnv(ctx context.Context) error { + _, err := Py(context.Background(), "-m", "venv", ".venv") + return err +} + +// python3 -m build -w +// https://packaging.python.org/en/latest/tutorials/packaging-projects/ +func detectVirtualEnv() (string, error) { + wd, err := os.Getwd() + if err != nil { + return "", err + } + wdf, err := os.Open(wd) + if err != nil { + return "", err + } + files, err := wdf.ReadDir(0) + if err != nil { + return "", err + } + for _, v := range files { + if !v.IsDir() { + continue + } + candidate := fmt.Sprintf("%s/%s", wd, v.Name()) + _, err = os.Stat(fmt.Sprintf("%s/pyvenv.cfg", candidate)) + if errors.Is(err, os.ErrNotExist) { + continue + } + if err != nil { + return "", err + } + return candidate, nil + } + return "", nil +} + +var pyExec string + +func detectExecutable(ctx context.Context) (string, error) { + if pyExec != "" { + return pyExec, nil + } + detector := "which" + if runtime.GOOS == "windows" { + detector = "where.exe" + } + out, err := execAndPassErr(ctx, detector, "python3") + if err != nil { + return "", err + } + pyExec = trimmedS(out) + return pyExec, nil +} + +func execAndPassErr(ctx context.Context, name string, args ...string) ([]byte, error) { + // TODO: move out to a separate package, once we have Maven integration + out, err := exec.CommandContext(ctx, name, args...).Output() + return out, nicerErr(err) +} + +func nicerErr(err error) error { + if err == nil { + return nil + } + if ee, ok := err.(*exec.ExitError); ok { + errMsg := trimmedS(ee.Stderr) + if errMsg == "" { + errMsg = err.Error() + } + return errors.New(errMsg) + } + return err +} + +func trimmedS(bytes []byte) string { + return strings.Trim(string(bytes), "\n\r") +} + +// TrimLeadingWhitespace removes leading whitespace +// function copied from Databricks Terraform provider +func TrimLeadingWhitespace(commandStr string) (newCommand string) { + lines := strings.Split(strings.ReplaceAll(commandStr, "\t", " "), "\n") + leadingWhitespace := 1<<31 - 1 + for _, line := range lines { + for pos, char := range line { + if char == ' ' || char == '\t' { + continue + } + // first non-whitespace character + if pos < leadingWhitespace { + leadingWhitespace = pos + } + // is not needed further + break + } + } + for i := 0; i < len(lines); i++ { + if lines[i] == "" || strings.Trim(lines[i], " \t") == "" { + continue + } + if len(lines[i]) < leadingWhitespace { + newCommand += lines[i] + "\n" // or not.. + } else { + newCommand += lines[i][leadingWhitespace:] + "\n" + } + } + return +} diff --git a/python/runner_test.go b/python/runner_test.go new file mode 100644 index 00000000..6eea2abd --- /dev/null +++ b/python/runner_test.go @@ -0,0 +1,80 @@ +package python + +import ( + "context" + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestExecAndPassError(t *testing.T) { + _, err := execAndPassErr(context.Background(), "which", "__non_existing__") + assert.EqualError(t, err, "exit status 1") +} + +func TestDetectPython(t *testing.T) { + pyExec = "" + py, err := detectExecutable(context.Background()) + assert.NoError(t, err) + assert.Contains(t, py, "python3") +} + +func TestDetectPythonCache(t *testing.T) { + pyExec = "abc" + py, err := detectExecutable(context.Background()) + assert.NoError(t, err) + assert.Equal(t, "abc", py) + pyExec = "" +} + +func TestDetectVirtualEnvFalse(t *testing.T) { + venvDir, err := detectVirtualEnv() + assert.NoError(t, err) + assert.Equal(t, "", venvDir) +} + +func TestMakeDetectableVenv(t *testing.T) { + var temp string + defer testTempdir(t, &temp)() + + // TODO: rewrite with t.TempDir() and arguments + err := createVirtualEnv(context.Background()) + assert.NoError(t, err) + + venv, err := detectVirtualEnv() + assert.NoError(t, err) + assert.Equal(t, fmt.Sprintf("%s/.venv", temp), venv) +} + +func testTempdir(t *testing.T, dir *string) func() { + wd, _ := os.Getwd() + temp, err := os.MkdirTemp(os.TempDir(), "brickstest") + assert.NoError(t, err) + os.Chdir(temp) + wd2, _ := os.Getwd() + *dir = wd2 + return func() { + os.Chdir(wd) + os.RemoveAll(temp) + } +} + +func TestPyError(t *testing.T) { + _, err := Py(context.Background(), "__non_existing__.py") + assert.Contains(t, err.Error(), "can't open file") +} + +func TestPyInline(t *testing.T) { + hello, err := PyInline(context.Background(), "print('Hello, world!')") + assert.NoError(t, err) + assert.Equal(t, "Hello, world!", hello) +} + +func TestPyInlineStderr(t *testing.T) { + detectExecutable(context.Background()) + inline := "import sys; sys.stderr.write('___msg___'); sys.exit(1)" + _, err := PyInline(context.Background(), inline) + assert.EqualError(t, err, "___msg___") +} diff --git a/python/wheel.go b/python/wheel.go new file mode 100644 index 00000000..2870d270 --- /dev/null +++ b/python/wheel.go @@ -0,0 +1,74 @@ +package python + +import ( + "context" + "fmt" + "log" + "os" + "path" + "strings" +) + +func BuildWheel(ctx context.Context, dir string) (string, error) { + defer chdirAndBack(dir)() + // remove previous dist leak + os.RemoveAll("dist") + // remove all other irrelevant traces + silentlyCleanupWheelFolder(".") + + // call simple wheel builder. we may need to pip install wheel as well + out, err := Py(ctx, "setup.py", "bdist_wheel") + if err != nil { + return "", err + } + log.Printf("[DEBUG] Built wheel: %s", out) + + // and cleanup afterwards + silentlyCleanupWheelFolder(".") + + wheel := silentChildWithSuffix("dist", ".whl") + if wheel == "" { + return "", fmt.Errorf("cannot find built wheel in %s", dir) + } + return path.Join(dir, wheel), nil +} + +func silentlyCleanupWheelFolder(dir string) { + // there or not there - we don't care + os.RemoveAll(path.Join(dir, "__pycache__")) + os.RemoveAll(path.Join(dir, "build")) + eggInfo := silentChildWithSuffix(dir, ".egg-info") + if eggInfo == "" { + return + } + os.RemoveAll(eggInfo) +} + +func silentChildWithSuffix(dir, suffix string) string { + f, err := os.Open(dir) + if err != nil { + log.Printf("[DEBUG] open dir %s: %s", dir, err) + return "" + } + entries, err := f.ReadDir(0) + if err != nil { + log.Printf("[DEBUG] read dir %s: %s", dir, err) + // todo: log + return "" + } + for _, child := range entries { + if !strings.HasSuffix(child.Name(), suffix) { + continue + } + return path.Join(dir, child.Name()) + } + return "" +} + +func chdirAndBack(dir string) func() { + wd, _ := os.Getwd() + os.Chdir(dir) + return func() { + os.Chdir(wd) + } +} diff --git a/python/wheel_test.go b/python/wheel_test.go new file mode 100644 index 00000000..b1f5c2cf --- /dev/null +++ b/python/wheel_test.go @@ -0,0 +1,24 @@ +package python + +import ( + "context" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWheel(t *testing.T) { + wheel, err := BuildWheel(context.Background(), "internal/test/simple-python-wheel") + assert.NoError(t, err) + assert.Equal(t, "internal/test/simple-python-wheel/dist/dummy-0.0.1-py3-none-any.whl", wheel) + + noFile(t, "internal/test/simple-python-wheel/dummy.egg-info") + noFile(t, "internal/test/simple-python-wheel/__pycache__") + noFile(t, "internal/test/simple-python-wheel/build") +} + +func noFile(t *testing.T, name string) { + _, err := os.Stat(name) + assert.Error(t, err, "file %s should exist", name) +} \ No newline at end of file