From 39fc86e83b473384ec55c4fee4eae0ac0cd8bd3b Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 24 Jul 2024 11:13:49 +0200 Subject: [PATCH] Split artifact cleanup into prepare step before build (#1618) ## Changes Now prepare stage which does cleanup is execute once before every build, so artifacts built into the same folder are correctly kept Fixes workaround 2 from this issue #1602 ## Tests Added unit test --- bundle/artifacts/artifacts.go | 16 ++++++ bundle/artifacts/build.go | 19 ------- bundle/artifacts/prepare.go | 57 +++++++++++++++++++ bundle/artifacts/upload_test.go | 7 ++- bundle/artifacts/whl/build.go | 9 +-- bundle/artifacts/whl/prepare.go | 48 ++++++++++++++++ bundle/phases/build.go | 1 + .../python_wheel_multiple/.gitignore | 3 + .../python_wheel_multiple/bundle.yml | 25 ++++++++ .../my_test_code/setup.py | 15 +++++ .../my_test_code/setup2.py | 15 +++++ .../my_test_code/src/__init__.py | 2 + .../my_test_code/src/__main__.py | 16 ++++++ bundle/tests/python_wheel_test.go | 17 ++++++ 14 files changed, 223 insertions(+), 27 deletions(-) create mode 100644 bundle/artifacts/prepare.go create mode 100644 bundle/artifacts/whl/prepare.go create mode 100644 bundle/tests/python_wheel/python_wheel_multiple/.gitignore create mode 100644 bundle/tests/python_wheel/python_wheel_multiple/bundle.yml create mode 100644 bundle/tests/python_wheel/python_wheel_multiple/my_test_code/setup.py create mode 100644 bundle/tests/python_wheel/python_wheel_multiple/my_test_code/setup2.py create mode 100644 bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__init__.py create mode 100644 bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py diff --git a/bundle/artifacts/artifacts.go b/bundle/artifacts/artifacts.go index 15565cd6..3060d08d 100644 --- a/bundle/artifacts/artifacts.go +++ b/bundle/artifacts/artifacts.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/artifacts/whl" "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" @@ -29,6 +30,10 @@ var buildMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactTy var uploadMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{} +var prepareMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{ + config.ArtifactPythonWheel: whl.Prepare, +} + func getBuildMutator(t config.ArtifactType, name string) bundle.Mutator { mutatorFactory, ok := buildMutators[t] if !ok { @@ -47,6 +52,17 @@ func getUploadMutator(t config.ArtifactType, name string) bundle.Mutator { return mutatorFactory(name) } +func getPrepareMutator(t config.ArtifactType, name string) bundle.Mutator { + mutatorFactory, ok := prepareMutators[t] + if !ok { + mutatorFactory = func(_ string) bundle.Mutator { + return mutator.NoOp() + } + } + + return mutatorFactory(name) +} + // Basic Build defines a general build mutator which builds artifact based on artifact.BuildCommand type basicBuild struct { name string diff --git a/bundle/artifacts/build.go b/bundle/artifacts/build.go index c8c3bf67..47c2f24d 100644 --- a/bundle/artifacts/build.go +++ b/bundle/artifacts/build.go @@ -35,15 +35,6 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return diag.Errorf("artifact doesn't exist: %s", m.name) } - // Check if source paths are absolute, if not, make them absolute - for k := range artifact.Files { - f := &artifact.Files[k] - if !filepath.IsAbs(f.Source) { - dirPath := filepath.Dir(artifact.ConfigFilePath) - f.Source = filepath.Join(dirPath, f.Source) - } - } - // Skip building if build command is not specified or infered if artifact.BuildCommand == "" { // If no build command was specified or infered and there is no @@ -58,16 +49,6 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return diags } - // If artifact path is not provided, use bundle root dir - if artifact.Path == "" { - artifact.Path = b.RootPath - } - - if !filepath.IsAbs(artifact.Path) { - dirPath := filepath.Dir(artifact.ConfigFilePath) - artifact.Path = filepath.Join(dirPath, artifact.Path) - } - diags := bundle.Apply(ctx, b, getBuildMutator(artifact.Type, m.name)) if diags.HasError() { return diags diff --git a/bundle/artifacts/prepare.go b/bundle/artifacts/prepare.go new file mode 100644 index 00000000..493e8f7a --- /dev/null +++ b/bundle/artifacts/prepare.go @@ -0,0 +1,57 @@ +package artifacts + +import ( + "context" + "fmt" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +func PrepareAll() bundle.Mutator { + return &all{ + name: "Prepare", + fn: prepareArtifactByName, + } +} + +type prepare struct { + name string +} + +func prepareArtifactByName(name string) (bundle.Mutator, error) { + return &prepare{name}, nil +} + +func (m *prepare) Name() string { + return fmt.Sprintf("artifacts.Prepare(%s)", m.name) +} + +func (m *prepare) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + artifact, ok := b.Config.Artifacts[m.name] + if !ok { + return diag.Errorf("artifact doesn't exist: %s", m.name) + } + + // Check if source paths are absolute, if not, make them absolute + for k := range artifact.Files { + f := &artifact.Files[k] + if !filepath.IsAbs(f.Source) { + dirPath := filepath.Dir(artifact.ConfigFilePath) + f.Source = filepath.Join(dirPath, f.Source) + } + } + + // If artifact path is not provided, use bundle root dir + if artifact.Path == "" { + artifact.Path = b.RootPath + } + + if !filepath.IsAbs(artifact.Path) { + dirPath := filepath.Dir(artifact.ConfigFilePath) + artifact.Path = filepath.Join(dirPath, artifact.Path) + } + + return bundle.Apply(ctx, b, getPrepareMutator(artifact.Type, m.name)) +} diff --git a/bundle/artifacts/upload_test.go b/bundle/artifacts/upload_test.go index cf08843a..a71610b0 100644 --- a/bundle/artifacts/upload_test.go +++ b/bundle/artifacts/upload_test.go @@ -63,7 +63,12 @@ func TestExpandGlobFilesSource(t *testing.T) { return &noop{} } - diags := bundle.Apply(context.Background(), b, bundle.Seq(bm, u)) + pm := &prepare{"test"} + prepareMutators[config.ArtifactType("custom")] = func(name string) bundle.Mutator { + return &noop{} + } + + diags := bundle.Apply(context.Background(), b, bundle.Seq(pm, bm, u)) require.NoError(t, diags.Error()) require.Equal(t, 2, len(b.Config.Artifacts["test"].Files)) diff --git a/bundle/artifacts/whl/build.go b/bundle/artifacts/whl/build.go index 992ade29..18d4b8ed 100644 --- a/bundle/artifacts/whl/build.go +++ b/bundle/artifacts/whl/build.go @@ -3,7 +3,6 @@ package whl import ( "context" "fmt" - "os" "path/filepath" "github.com/databricks/cli/bundle" @@ -36,18 +35,14 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { cmdio.LogString(ctx, fmt.Sprintf("Building %s...", m.name)) - dir := artifact.Path - - distPath := filepath.Join(dir, "dist") - os.RemoveAll(distPath) - python.CleanupWheelFolder(dir) - out, err := artifact.Build(ctx) if err != nil { return diag.Errorf("build failed %s, error: %v, output: %s", m.name, err, out) } log.Infof(ctx, "Build succeeded") + dir := artifact.Path + distPath := filepath.Join(artifact.Path, "dist") wheels := python.FindFilesWithSuffixInPath(distPath, ".whl") if len(wheels) == 0 { return diag.Errorf("cannot find built wheel in %s for package %s", dir, m.name) diff --git a/bundle/artifacts/whl/prepare.go b/bundle/artifacts/whl/prepare.go new file mode 100644 index 00000000..7284b11e --- /dev/null +++ b/bundle/artifacts/whl/prepare.go @@ -0,0 +1,48 @@ +package whl + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/python" +) + +type prepare struct { + name string +} + +func Prepare(name string) bundle.Mutator { + return &prepare{ + name: name, + } +} + +func (m *prepare) Name() string { + return fmt.Sprintf("artifacts.whl.Prepare(%s)", m.name) +} + +func (m *prepare) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + artifact, ok := b.Config.Artifacts[m.name] + if !ok { + return diag.Errorf("artifact doesn't exist: %s", m.name) + } + + dir := artifact.Path + + distPath := filepath.Join(dir, "dist") + + // If we have multiple artifacts con figured, prepare will be called multiple times + // The first time we will remove the folders, other times will be no-op. + err := os.RemoveAll(distPath) + if err != nil { + log.Infof(ctx, "Failed to remove dist folder: %v", err) + } + python.CleanupWheelFolder(dir) + + return nil +} diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 362d23be..3ddc6b18 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -16,6 +16,7 @@ func Build() bundle.Mutator { scripts.Execute(config.ScriptPreBuild), artifacts.DetectPackages(), artifacts.InferMissingProperties(), + artifacts.PrepareAll(), artifacts.BuildAll(), scripts.Execute(config.ScriptPostBuild), mutator.ResolveVariableReferences( diff --git a/bundle/tests/python_wheel/python_wheel_multiple/.gitignore b/bundle/tests/python_wheel/python_wheel_multiple/.gitignore new file mode 100644 index 00000000..f03e23bc --- /dev/null +++ b/bundle/tests/python_wheel/python_wheel_multiple/.gitignore @@ -0,0 +1,3 @@ +build/ +*.egg-info +.databricks diff --git a/bundle/tests/python_wheel/python_wheel_multiple/bundle.yml b/bundle/tests/python_wheel/python_wheel_multiple/bundle.yml new file mode 100644 index 00000000..6964c58a --- /dev/null +++ b/bundle/tests/python_wheel/python_wheel_multiple/bundle.yml @@ -0,0 +1,25 @@ +bundle: + name: python-wheel + +artifacts: + my_test_code: + type: whl + path: "./my_test_code" + build: "python3 setup.py bdist_wheel" + my_test_code_2: + type: whl + path: "./my_test_code" + build: "python3 setup2.py bdist_wheel" + +resources: + jobs: + test_job: + name: "[${bundle.environment}] My Wheel Job" + tasks: + - task_key: TestTask + existing_cluster_id: "0717-132531-5opeqon1" + python_wheel_task: + package_name: "my_test_code" + entry_point: "run" + libraries: + - whl: ./my_test_code/dist/*.whl diff --git a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/setup.py b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/setup.py new file mode 100644 index 00000000..0bd871dd --- /dev/null +++ b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/setup.py @@ -0,0 +1,15 @@ +from setuptools import setup, find_packages + +import src + +setup( + name="my_test_code", + version=src.__version__, + author=src.__author__, + url="https://databricks.com", + author_email="john.doe@databricks.com", + description="my test wheel", + packages=find_packages(include=["src"]), + entry_points={"group_1": "run=src.__main__:main"}, + install_requires=["setuptools"], +) diff --git a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/setup2.py b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/setup2.py new file mode 100644 index 00000000..424bec9f --- /dev/null +++ b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/setup2.py @@ -0,0 +1,15 @@ +from setuptools import setup, find_packages + +import src + +setup( + name="my_test_code_2", + version=src.__version__, + author=src.__author__, + url="https://databricks.com", + author_email="john.doe@databricks.com", + description="my test wheel", + packages=find_packages(include=["src"]), + entry_points={"group_1": "run=src.__main__:main"}, + install_requires=["setuptools"], +) diff --git a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__init__.py b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__init__.py new file mode 100644 index 00000000..909f1f32 --- /dev/null +++ b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__init__.py @@ -0,0 +1,2 @@ +__version__ = "0.0.1" +__author__ = "Databricks" diff --git a/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py new file mode 100644 index 00000000..73d045af --- /dev/null +++ b/bundle/tests/python_wheel/python_wheel_multiple/my_test_code/src/__main__.py @@ -0,0 +1,16 @@ +""" +The entry point of the Python Wheel +""" + +import sys + + +def main(): + # This method will print the provided arguments + print('Hello from my func') + print('Got arguments:') + print(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/bundle/tests/python_wheel_test.go b/bundle/tests/python_wheel_test.go index 8d0036a7..52b3d6e0 100644 --- a/bundle/tests/python_wheel_test.go +++ b/bundle/tests/python_wheel_test.go @@ -96,3 +96,20 @@ func TestPythonWheelBuildWithEnvironmentKey(t *testing.T) { diags = bundle.Apply(ctx, b, match) require.NoError(t, diags.Error()) } + +func TestPythonWheelBuildMultiple(t *testing.T) { + ctx := context.Background() + b, err := bundle.Load(ctx, "./python_wheel/python_wheel_multiple") + require.NoError(t, err) + + diags := bundle.Apply(ctx, b, bundle.Seq(phases.Load(), phases.Build())) + require.NoError(t, diags.Error()) + + matches, err := filepath.Glob("./python_wheel/python_wheel_multiple/my_test_code/dist/my_test_code*.whl") + require.NoError(t, err) + require.Equal(t, 2, len(matches)) + + match := libraries.ValidateLocalLibrariesExist() + diags = bundle.Apply(ctx, b, match) + require.NoError(t, diags.Error()) +}