From 6a07e05e9b6ea7b8a80b2eb7afddff10c620dcc9 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 27 Feb 2025 16:32:50 +0000 Subject: [PATCH] Raise an error when there are multiple local libraries with the same basename used (#2382) ## Changes It could happen that there are multiple artifacts defined in the bundle which build and therefore deploy wheel packages with the same name. This leads to conflict between these packages, they will overwrite each other and therefore they should have different names instead Fixes https://github.com/databricks/cli/issues/1674 Previous attempt (https://github.com/databricks/cli/pull/2297 + https://github.com/databricks/cli/pull/2341) led to the breakage, this PR fixes both issues. ## Tests Added acceptance test --- .../same_name_libraries/databricks.yml | 54 +++++++++ .../artifacts/same_name_libraries/output.txt | 14 +++ .../artifacts/same_name_libraries/script | 2 + .../artifacts/same_name_libraries/test.toml | 3 + .../same_name_libraries/whl1/setup.py | 28 +++++ .../whl1/src/my_default_python/__init__.py | 1 + .../whl1/src/my_default_python/main.py | 1 + .../same_name_libraries/whl2/setup.py | 28 +++++ .../whl2/src/my_default_python/__init__.py | 1 + .../whl2/src/my_default_python/main.py | 1 + .../unique_name_libraries/databricks.yml | 56 ++++++++++ .../unique_name_libraries/output.txt | 10 ++ .../artifacts/unique_name_libraries/script | 2 + .../artifacts/unique_name_libraries/test.toml | 6 + .../unique_name_libraries/whl1/setup.py | 28 +++++ .../whl1/src/my_package/__init__.py | 1 + .../whl1/src/my_package/main.py | 1 + .../unique_name_libraries/whl2/setup.py | 28 +++++ .../whl2/src/my_other_package/__init__.py | 1 + .../whl2/src/my_other_package/main.py | 1 + bundle/libraries/expand_glob_references.go | 2 +- bundle/libraries/same_name_libraries.go | 104 ++++++++++++++++++ bundle/phases/deploy.go | 5 + 23 files changed, 377 insertions(+), 1 deletion(-) create mode 100644 acceptance/bundle/artifacts/same_name_libraries/databricks.yml create mode 100644 acceptance/bundle/artifacts/same_name_libraries/output.txt create mode 100644 acceptance/bundle/artifacts/same_name_libraries/script create mode 100644 acceptance/bundle/artifacts/same_name_libraries/test.toml create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py create mode 100644 acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/databricks.yml create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/output.txt create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/script create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/test.toml create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py create mode 100644 acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py create mode 100644 bundle/libraries/same_name_libraries.go diff --git a/acceptance/bundle/artifacts/same_name_libraries/databricks.yml b/acceptance/bundle/artifacts/same_name_libraries/databricks.yml new file mode 100644 index 000000000..d58674a64 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/databricks.yml @@ -0,0 +1,54 @@ +bundle: + name: same_name_libraries + +variables: + cluster: + default: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode + +artifacts: + whl1: + type: whl + path: ./whl1 + whl2: + type: whl + path: ./whl2 + +resources: + jobs: + test: + name: "test" + tasks: + - task_key: task1 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl1/dist/*.whl + - pypi: + package: test_package + - task_key: task2 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl2/dist/*.whl + - maven: + coordinates: org.apache.spark:spark-sql_2.12:3.1.1 + - task_key: task3 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl1/dist/*.whl diff --git a/acceptance/bundle/artifacts/same_name_libraries/output.txt b/acceptance/bundle/artifacts/same_name_libraries/output.txt new file mode 100644 index 000000000..ee6c9d566 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/output.txt @@ -0,0 +1,14 @@ + +>>> errcode [CLI] bundle deploy +Building whl1... +Building whl2... +Error: Duplicate local library names: my_default_python-0.0.1-py3-none-any.whl + at resources.jobs.test.tasks[0].libraries[0].whl + resources.jobs.test.tasks[1].libraries[0].whl + in databricks.yml:36:15 + databricks.yml:45:15 + +Local library names must be unique but found libraries with the same name: whl1/dist/my_default_python-0.0.1-py3-none-any.whl, whl2/dist/my_default_python-0.0.1-py3-none-any.whl + + +Exit code: 1 diff --git a/acceptance/bundle/artifacts/same_name_libraries/script b/acceptance/bundle/artifacts/same_name_libraries/script new file mode 100644 index 000000000..6c899df07 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle deploy +rm -rf whl1 whl2 diff --git a/acceptance/bundle/artifacts/same_name_libraries/test.toml b/acceptance/bundle/artifacts/same_name_libraries/test.toml new file mode 100644 index 000000000..a298217f2 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/test.toml @@ -0,0 +1,3 @@ +[[Repls]] +Old = '\\' +New = '/' diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py b/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py new file mode 100644 index 000000000..ddd81295e --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_default_python + +setup( + name="my_default_python", + version=my_default_python.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py new file mode 100644 index 000000000..11b15b1a4 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py b/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py new file mode 100644 index 000000000..ddd81295e --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_default_python + +setup( + name="my_default_python", + version=my_default_python.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py new file mode 100644 index 000000000..11b15b1a4 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/artifacts/unique_name_libraries/databricks.yml b/acceptance/bundle/artifacts/unique_name_libraries/databricks.yml new file mode 100644 index 000000000..dd13c1918 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/databricks.yml @@ -0,0 +1,56 @@ +bundle: + name: unique_name_libraries + +variables: + cluster: + default: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode + +artifacts: + whl1: + type: whl + path: ./whl1 + whl2: + type: whl + path: ./whl2 + +resources: + jobs: + test: + name: "test" + tasks: + - task_key: task1 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_package + libraries: + - whl: ./whl1/dist/*.whl + - whl: cowsay + - pypi: + package: test_package + - task_key: task2 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_other_package + libraries: + - whl: ./whl2/dist/*.whl + - whl: cowsay + - maven: + coordinates: org.apache.spark:spark-sql_2.12:3.1.1 + - task_key: task3 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl1/dist/*.whl diff --git a/acceptance/bundle/artifacts/unique_name_libraries/output.txt b/acceptance/bundle/artifacts/unique_name_libraries/output.txt new file mode 100644 index 000000000..ecc7bf57b --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/output.txt @@ -0,0 +1,10 @@ + +>>> errcode [CLI] bundle deploy +Building whl1... +Building whl2... +Uploading [package name] +Uploading [package name] +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/unique_name_libraries/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/acceptance/bundle/artifacts/unique_name_libraries/script b/acceptance/bundle/artifacts/unique_name_libraries/script new file mode 100644 index 000000000..6c899df07 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle deploy +rm -rf whl1 whl2 diff --git a/acceptance/bundle/artifacts/unique_name_libraries/test.toml b/acceptance/bundle/artifacts/unique_name_libraries/test.toml new file mode 100644 index 000000000..280338bd6 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/test.toml @@ -0,0 +1,6 @@ +Cloud = false + +# The order in which files are uploaded can be different, so we just replace the name +[[Repls]] +Old="Uploading .*-0.0.1-py3-none-any.whl..." +New="Uploading [package name]" diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py b/acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py new file mode 100644 index 000000000..ca85def32 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_package + +setup( + name="my_package", + version=my_package.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_package/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_package.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py new file mode 100644 index 000000000..11b15b1a4 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py b/acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py new file mode 100644 index 000000000..5e5b34788 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_other_package + +setup( + name="my_other_package", + version=my_other_package.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_other_package/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_other_package.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py new file mode 100644 index 000000000..11b15b1a4 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/bundle/libraries/expand_glob_references.go b/bundle/libraries/expand_glob_references.go index bb1905045..7a808f627 100644 --- a/bundle/libraries/expand_glob_references.go +++ b/bundle/libraries/expand_glob_references.go @@ -92,7 +92,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic for _, match := range matches { output = append(output, dyn.NewValue(map[string]dyn.Value{ - libType: dyn.V(match), + libType: dyn.NewValue(match, lib.Locations()), }, lib.Locations())) } } diff --git a/bundle/libraries/same_name_libraries.go b/bundle/libraries/same_name_libraries.go new file mode 100644 index 000000000..ab869d3d2 --- /dev/null +++ b/bundle/libraries/same_name_libraries.go @@ -0,0 +1,104 @@ +package libraries + +import ( + "context" + "path/filepath" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type checkForSameNameLibraries struct{} + +var patterns = []dyn.Pattern{ + taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()), + forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()), + envDepsPattern.Append(dyn.AnyIndex()), +} + +type libData struct { + fullPath string + locations []dyn.Location + paths []dyn.Path + otherPaths []string +} + +func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + libs := make(map[string]*libData) + + err := b.Config.Mutate(func(rootConfig dyn.Value) (dyn.Value, error) { + var err error + for _, pattern := range patterns { + rootConfig, err = dyn.MapByPattern(rootConfig, pattern, func(p dyn.Path, libraryValue dyn.Value) (dyn.Value, error) { + libPath, ok := libraryValue.AsString() + if !ok { + return libraryValue, nil + } + + // If not local library, skip the check + if !IsLibraryLocal(libPath) { + return libraryValue, nil + } + + lib := filepath.Base(libPath) + // If the same basename was seen already but full path is different + // then it's a duplicate. Add the location to the location list. + lp, ok := libs[lib] + if !ok { + libs[lib] = &libData{ + fullPath: libPath, + locations: []dyn.Location{libraryValue.Location()}, + paths: []dyn.Path{p}, + otherPaths: []string{}, + } + } else if lp.fullPath != libPath { + lp.locations = append(lp.locations, libraryValue.Location()) + lp.paths = append(lp.paths, p) + lp.otherPaths = append(lp.otherPaths, libPath) + } + + return libraryValue, nil + }) + if err != nil { + return dyn.InvalidValue, err + } + } + + if err != nil { + return dyn.InvalidValue, err + } + + return rootConfig, nil + }) + + // Iterate over all the libraries and check if there are any duplicates. + // Duplicates will have more than one location. + // If there are duplicates, add a diagnostic. + for lib, lv := range libs { + if len(lv.locations) > 1 { + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: "Duplicate local library names: " + lib, + Detail: "Local library names must be unique but found libraries with the same name: " + lv.fullPath + ", " + strings.Join(lv.otherPaths, ", "), + Locations: lv.locations, + Paths: lv.paths, + }) + } + } + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + } + + return diags +} + +func (c checkForSameNameLibraries) Name() string { + return "CheckForSameNameLibraries" +} + +func CheckForSameNameLibraries() bundle.Mutator { + return checkForSameNameLibraries{} +} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index b31ed682f..02e0e9bd8 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -177,7 +177,12 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand deploy.StatePull(), mutator.ValidateGitDetails(), artifacts.CleanUp(), + // libraries.CheckForSameNameLibraries() needs to be run after we expand glob references so we + // know what are the actual library paths. + // libraries.ExpandGlobReferences() has to be run after the libraries are built and thus this + // mutator is part of the deploy step rather than validate. libraries.ExpandGlobReferences(), + libraries.CheckForSameNameLibraries(), libraries.Upload(), trampoline.TransformWheelTask(), files.Upload(outputHandler),