Auto detect Python wheel packages and infer build command (#603)

This commit is contained in:
Andrew Nester 2023-07-26 12:07:26 +02:00 committed by GitHub
parent f0ad28ab62
commit cfff140815
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 332 additions and 19 deletions

View File

@ -41,7 +41,7 @@ jobs:
- name: Pull external libraries
run: |
make vendor
pip install wheel
pip3 install wheel
- name: Run tests
run: make test

View File

@ -0,0 +1,32 @@
package artifacts
import (
"context"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/artifacts/whl"
"github.com/databricks/cli/libs/log"
)
func DetectPackages() bundle.Mutator {
return &autodetect{}
}
type autodetect struct {
}
func (m *autodetect) Name() string {
return "artifacts.DetectPackages"
}
func (m *autodetect) Apply(ctx context.Context, b *bundle.Bundle) error {
// If artifacts section explicitly defined, do not try to auto detect packages
if b.Config.Artifacts != nil {
log.Debugf(ctx, "artifacts block is defined, skipping auto-detecting")
return nil
}
return bundle.Apply(ctx, b, bundle.Seq(
whl.DetectPackage(),
))
}

View File

@ -33,12 +33,13 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) error {
return fmt.Errorf("artifact doesn't exist: %s", m.name)
}
if len(artifact.Files) == 0 && artifact.BuildCommand == "" {
return fmt.Errorf("artifact %s misconfigured: 'files' or 'build' property is required", m.name)
// Skip building if build command is not specified or infered
if artifact.BuildCommand == "" {
// If no build command was specified or infered and there is no
// artifact output files specified, artifact is misconfigured
if len(artifact.Files) == 0 {
return fmt.Errorf("misconfigured artifact: please specify 'build' or 'files' property")
}
// If artifact file is explicitly defined, skip building the artifact
if len(artifact.Files) != 0 {
return nil
}

60
bundle/artifacts/infer.go Normal file
View File

@ -0,0 +1,60 @@
package artifacts
import (
"context"
"fmt"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/artifacts/whl"
"github.com/databricks/cli/bundle/config"
)
var inferMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{
config.ArtifactPythonWheel: whl.InferBuildCommand,
}
func getInferMutator(t config.ArtifactType, name string) bundle.Mutator {
mutatorFactory, ok := inferMutators[t]
if !ok {
return nil
}
return mutatorFactory(name)
}
func InferMissingProperties() bundle.Mutator {
return &all{
name: "infer",
fn: inferArtifactByName,
}
}
func inferArtifactByName(name string) (bundle.Mutator, error) {
return &infer{name}, nil
}
type infer struct {
name string
}
func (m *infer) Name() string {
return fmt.Sprintf("artifacts.Infer(%s)", m.name)
}
func (m *infer) Apply(ctx context.Context, b *bundle.Bundle) error {
artifact, ok := b.Config.Artifacts[m.name]
if !ok {
return fmt.Errorf("artifact doesn't exist: %s", m.name)
}
if artifact.BuildCommand != "" {
return nil
}
inferMutator := getInferMutator(artifact.Type, m.name)
if inferMutator != nil {
return bundle.Apply(ctx, b, inferMutator)
}
return nil
}

View File

@ -0,0 +1,74 @@
package whl
import (
"context"
"fmt"
"os"
"path/filepath"
"regexp"
"time"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/cmdio"
)
type detectPkg struct {
}
func DetectPackage() bundle.Mutator {
return &detectPkg{}
}
func (m *detectPkg) Name() string {
return "artifacts.whl.AutoDetect"
}
func (m *detectPkg) Apply(ctx context.Context, b *bundle.Bundle) error {
cmdio.LogString(ctx, "artifacts.whl.AutoDetect: Detecting Python wheel project...")
// checking if there is setup.py in the bundle root
setupPy := filepath.Join(b.Config.Path, "setup.py")
_, err := os.Stat(setupPy)
if err != nil {
cmdio.LogString(ctx, "artifacts.whl.AutoDetect: No Python wheel project found at bundle root folder")
return nil
}
cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.AutoDetect: Found Python wheel project at %s", b.Config.Path))
module := extractModuleName(setupPy)
if b.Config.Artifacts == nil {
b.Config.Artifacts = make(map[string]*config.Artifact)
}
pkgPath, err := filepath.Abs(b.Config.Path)
if err != nil {
return err
}
b.Config.Artifacts[module] = &config.Artifact{
Path: pkgPath,
Type: config.ArtifactPythonWheel,
}
return nil
}
func extractModuleName(setupPy string) string {
bytes, err := os.ReadFile(setupPy)
if err != nil {
return randomName()
}
content := string(bytes)
r := regexp.MustCompile(`name=['"](.*)['"]`)
matches := r.FindStringSubmatch(content)
if len(matches) == 0 {
return randomName()
}
return matches[1]
}
func randomName() string {
return fmt.Sprintf("artifact%d", time.Now().Unix())
}

View File

@ -0,0 +1,22 @@
package whl
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestExtractModuleName(t *testing.T) {
moduleName := extractModuleName("./testdata/setup.py")
assert.Equal(t, "my_test_code", moduleName)
}
func TestExtractModuleNameMinimal(t *testing.T) {
moduleName := extractModuleName("./testdata/setup_minimal.py")
assert.Equal(t, "my_test_code", moduleName)
}
func TestExtractModuleNameIncorrect(t *testing.T) {
moduleName := extractModuleName("./testdata/setup_incorrect.py")
assert.Contains(t, moduleName, "artifact")
}

View File

@ -32,12 +32,6 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) error {
return fmt.Errorf("artifact doesn't exist: %s", m.name)
}
// TODO: If not set, BuildCommand should be infer prior to this
// via a mutator so that it can be observable.
if artifact.BuildCommand == "" {
return fmt.Errorf("artifacts.whl.Build(%s): missing build property for the artifact", m.name)
}
cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.Build(%s): Building...", m.name))
dir := artifact.Path

View File

@ -0,0 +1,34 @@
package whl
import (
"context"
"fmt"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/python"
)
type infer struct {
name string
}
func (m *infer) Apply(ctx context.Context, b *bundle.Bundle) error {
artifact := b.Config.Artifacts[m.name]
py, err := python.DetectExecutable(ctx)
if err != nil {
return err
}
artifact.BuildCommand = fmt.Sprintf("%s setup.py bdist_wheel", py)
return nil
}
func (m *infer) Name() string {
return fmt.Sprintf("artifacts.whl.Infer(%s)", m.name)
}
func InferBuildCommand(name string) bundle.Mutator {
return &infer{
name: name,
}
}

15
bundle/artifacts/whl/testdata/setup.py vendored Normal file
View File

@ -0,0 +1,15 @@
from setuptools import setup, find_packages
import my_test_code
setup(
name="my_test_code",
version=my_test_code.__version__,
author=my_test_code.__author__,
url="https://databricks.com",
author_email="john.doe@databricks.com",
description="my test wheel",
packages=find_packages(include=["my_test_code"]),
entry_points={"group_1": "run=my_test_code.__main__:main"},
install_requires=["setuptools"],
)

View File

@ -0,0 +1,14 @@
from setuptools import setup, find_packages
import my_test_code
setup(
version=my_test_code.__version__,
author=my_test_code.__author__,
url="https://databricks.com",
author_email="john.doe@databricks.com",
description="my test wheel",
packages=find_packages(include=["my_test_code"]),
entry_points={"group_1": "run=my_test_code.__main__:main"},
install_requires=["setuptools"],
)

View File

@ -0,0 +1,3 @@
from setuptools import setup
setup(name="my_test_code")

View File

@ -11,6 +11,8 @@ func Build() bundle.Mutator {
return newPhase(
"build",
[]bundle.Mutator{
artifacts.DetectPackages(),
artifacts.InferMissingProperties(),
artifacts.BuildAll(),
interpolation.Interpolate(
interpolation.IncludeLookupsInPath("artifacts"),

View File

@ -5,7 +5,7 @@ artifacts:
my_test_code:
type: whl
path: "./my_test_code"
build: "python setup.py bdist_wheel"
build: "python3 setup.py bdist_wheel"
resources:
jobs:

View File

@ -0,0 +1,3 @@
build/
*.egg-info
.databricks

View File

@ -0,0 +1,13 @@
bundle:
name: python-wheel
resources:
jobs:
test_job:
name: "[${bundle.environment}] My Wheel Job"
tasks:
- task_key: TestTask
existing_cluster_id: "0717-aaaaa-bbbbbb"
python_wheel_task:
package_name: "my_test_code"
entry_point: "run"

View File

@ -0,0 +1,2 @@
__version__ = "0.0.1"
__author__ = "Databricks"

View File

@ -0,0 +1,16 @@
"""
The entry point of the Python Wheel
"""
import sys
def main():
# This method will print the provided arguments
print('Hello from my func')
print('Got arguments:')
print(sys.argv)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,15 @@
from setuptools import setup, find_packages
import my_test_code
setup(
name="my_test_code",
version=my_test_code.__version__,
author=my_test_code.__author__,
url="https://databricks.com",
author_email="john.doe@databricks.com",
description="my test wheel",
packages=find_packages(include=["my_test_code"]),
entry_points={"group_1": "run=my_test_code.__main__:main"},
install_requires=["setuptools"],
)

View File

@ -22,3 +22,16 @@ func TestBundlePythonWheelBuild(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 1, len(matches))
}
func TestBundlePythonWheelBuildAutoDetect(t *testing.T) {
b, err := bundle.Load("./python_wheel_no_artifact")
require.NoError(t, err)
m := phases.Build()
err = m.Apply(context.Background(), b)
require.NoError(t, err)
matches, err := filepath.Glob("python_wheel/my_test_code/dist/my_test_code-*.whl")
require.NoError(t, err)
require.Equal(t, 1, len(matches))
}

View File

@ -15,7 +15,7 @@ func PyInline(ctx context.Context, inlinePy string) (string, error) {
}
func Py(ctx context.Context, script string, args ...string) (string, error) {
py, err := detectExecutable(ctx)
py, err := DetectExecutable(ctx)
if err != nil {
return "", err
}
@ -70,7 +70,7 @@ func detectVirtualEnv() (string, error) {
var pyExec string
func detectExecutable(ctx context.Context) (string, error) {
func DetectExecutable(ctx context.Context) (string, error) {
if pyExec != "" {
return pyExec, nil
}

View File

@ -25,14 +25,14 @@ func TestExecAndPassError(t *testing.T) {
func TestDetectPython(t *testing.T) {
pyExec = ""
py, err := detectExecutable(context.Background())
py, err := DetectExecutable(context.Background())
assert.NoError(t, err)
assert.Contains(t, py, "python3")
}
func TestDetectPythonCache(t *testing.T) {
pyExec = "abc"
py, err := detectExecutable(context.Background())
py, err := DetectExecutable(context.Background())
assert.NoError(t, err)
assert.Equal(t, "abc", py)
pyExec = ""
@ -82,7 +82,7 @@ func TestPyInline(t *testing.T) {
}
func TestPyInlineStderr(t *testing.T) {
detectExecutable(context.Background())
DetectExecutable(context.Background())
inline := "import sys; sys.stderr.write('___msg___'); sys.exit(1)"
_, err := PyInline(context.Background(), inline)
assert.EqualError(t, err, "___msg___")