From 14f0292598e84009e786a9606ae5d0d4b99dcb6b Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 25 Feb 2025 14:10:25 +0100 Subject: [PATCH] Simplify whl artifact autodetection code (#2371) ## Changes - Get rid of artifacts.DetectPackages which is a thin wrapper around artifacts/whl.DetectPackage - Get rid of parsing name out of setup.py. Do not randomize either, use a static one. ## Tests Existing tests. --- bundle/artifacts/autodetect.go | 32 ---------------- bundle/artifacts/whl/autodetect.go | 38 +++++-------------- bundle/artifacts/whl/autodetect_test.go | 22 ----------- bundle/phases/build.go | 3 +- .../testdata/default_python/bundle_deploy.txt | 2 +- 5 files changed, 13 insertions(+), 84 deletions(-) delete mode 100644 bundle/artifacts/autodetect.go delete mode 100644 bundle/artifacts/whl/autodetect_test.go diff --git a/bundle/artifacts/autodetect.go b/bundle/artifacts/autodetect.go deleted file mode 100644 index c8d235616..000000000 --- a/bundle/artifacts/autodetect.go +++ /dev/null @@ -1,32 +0,0 @@ -package artifacts - -import ( - "context" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/artifacts/whl" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/log" -) - -func DetectPackages() bundle.Mutator { - return &autodetect{} -} - -type autodetect struct{} - -func (m *autodetect) Name() string { - return "artifacts.DetectPackages" -} - -func (m *autodetect) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - // If artifacts section explicitly defined, do not try to auto detect packages - if b.Config.Artifacts != nil { - log.Debugf(ctx, "artifacts block is defined, skipping auto-detecting") - return nil - } - - return bundle.Apply(ctx, b, bundle.Seq( - whl.DetectPackage(), - )) -} diff --git a/bundle/artifacts/whl/autodetect.go b/bundle/artifacts/whl/autodetect.go index 202ea12bc..9eead83b7 100644 --- a/bundle/artifacts/whl/autodetect.go +++ b/bundle/artifacts/whl/autodetect.go @@ -2,11 +2,8 @@ package whl import ( "context" - "fmt" "os" "path/filepath" - "regexp" - "time" "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" @@ -26,11 +23,17 @@ func (m *detectPkg) Name() string { } func (m *detectPkg) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if b.Config.Artifacts != nil { + log.Debugf(ctx, "artifacts block is defined, skipping auto-detecting") + return nil + } + tasks := libraries.FindTasksWithLocalLibraries(b) if len(tasks) == 0 { log.Infof(ctx, "No local tasks in databricks.yml config, skipping auto detect") return nil } + log.Infof(ctx, "Detecting Python wheel project...") // checking if there is setup.py in the bundle root @@ -42,39 +45,18 @@ func (m *detectPkg) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic } log.Infof(ctx, "Found Python wheel project at %s", b.BundleRootPath) - module := extractModuleName(setupPy) - - if b.Config.Artifacts == nil { - b.Config.Artifacts = make(map[string]*config.Artifact) - } pkgPath, err := filepath.Abs(b.BundleRootPath) if err != nil { return diag.FromErr(err) } - b.Config.Artifacts[module] = &config.Artifact{ + + b.Config.Artifacts = make(map[string]*config.Artifact) + b.Config.Artifacts["python_artifact"] = &config.Artifact{ Path: pkgPath, Type: config.ArtifactPythonWheel, + // BuildCommand will be set by bundle/artifacts/whl/infer.go to "python3 setup.py bdist_wheel" } return nil } - -func extractModuleName(setupPy string) string { - bytes, err := os.ReadFile(setupPy) - if err != nil { - return randomName() - } - - content := string(bytes) - r := regexp.MustCompile(`name=['"](.*)['"]`) - matches := r.FindStringSubmatch(content) - if len(matches) == 0 { - return randomName() - } - return matches[1] -} - -func randomName() string { - return fmt.Sprintf("artifact%d", time.Now().Unix()) -} diff --git a/bundle/artifacts/whl/autodetect_test.go b/bundle/artifacts/whl/autodetect_test.go deleted file mode 100644 index b53289b2a..000000000 --- a/bundle/artifacts/whl/autodetect_test.go +++ /dev/null @@ -1,22 +0,0 @@ -package whl - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestExtractModuleName(t *testing.T) { - moduleName := extractModuleName("./testdata/setup.py") - assert.Equal(t, "my_test_code", moduleName) -} - -func TestExtractModuleNameMinimal(t *testing.T) { - moduleName := extractModuleName("./testdata/setup_minimal.py") - assert.Equal(t, "my_test_code", moduleName) -} - -func TestExtractModuleNameIncorrect(t *testing.T) { - moduleName := extractModuleName("./testdata/setup_incorrect.py") - assert.Contains(t, moduleName, "artifact") -} diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 3ddc6b181..cc35983ec 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -3,6 +3,7 @@ package phases import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/artifacts" + "github.com/databricks/cli/bundle/artifacts/whl" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/scripts" @@ -14,7 +15,7 @@ func Build() bundle.Mutator { "build", []bundle.Mutator{ scripts.Execute(config.ScriptPreBuild), - artifacts.DetectPackages(), + whl.DetectPackage(), artifacts.InferMissingProperties(), artifacts.PrepareAll(), artifacts.BuildAll(), diff --git a/integration/bundle/testdata/default_python/bundle_deploy.txt b/integration/bundle/testdata/default_python/bundle_deploy.txt index 076e7618f..fe1cc4fac 100644 --- a/integration/bundle/testdata/default_python/bundle_deploy.txt +++ b/integration/bundle/testdata/default_python/bundle_deploy.txt @@ -1,4 +1,4 @@ -Building project_name_$UNIQUE_PRJ... +Building python_artifact... Uploading project_name_$UNIQUE_PRJ-0.0.1+[NUMID].[NUMID]-py3-none-any.whl... Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/project_name_$UNIQUE_PRJ/dev/files... Deploying resources...