Raise an error when there are multiple local libraries with the same basename used (#2382)

## Changes
It could happen that there are multiple artifacts defined in the bundle
which build and therefore deploy wheel packages with the same name. This
leads to conflict between these packages, they will overwrite each other
and therefore they should have different names instead

Fixes https://github.com/databricks/cli/issues/1674

Previous attempt (https://github.com/databricks/cli/pull/2297 +
https://github.com/databricks/cli/pull/2341) led to the breakage, this
PR fixes both issues.

## Tests
Added acceptance test
This commit is contained in:
Andrew Nester 2025-02-27 16:32:50 +00:00 committed by GitHub
parent bc299cafb8
commit 6a07e05e9b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 377 additions and 1 deletions

View File

@ -0,0 +1,54 @@
bundle:
name: same_name_libraries
variables:
cluster:
default:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
num_workers: 0
spark_conf:
spark.master: "local[*, 4]"
spark.databricks.cluster.profile: singleNode
custom_tags:
ResourceClass: SingleNode
artifacts:
whl1:
type: whl
path: ./whl1
whl2:
type: whl
path: ./whl2
resources:
jobs:
test:
name: "test"
tasks:
- task_key: task1
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl1/dist/*.whl
- pypi:
package: test_package
- task_key: task2
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl2/dist/*.whl
- maven:
coordinates: org.apache.spark:spark-sql_2.12:3.1.1
- task_key: task3
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl1/dist/*.whl

View File

@ -0,0 +1,14 @@
>>> errcode [CLI] bundle deploy
Building whl1...
Building whl2...
Error: Duplicate local library names: my_default_python-0.0.1-py3-none-any.whl
at resources.jobs.test.tasks[0].libraries[0].whl
resources.jobs.test.tasks[1].libraries[0].whl
in databricks.yml:36:15
databricks.yml:45:15
Local library names must be unique but found libraries with the same name: whl1/dist/my_default_python-0.0.1-py3-none-any.whl, whl2/dist/my_default_python-0.0.1-py3-none-any.whl
Exit code: 1

View File

@ -0,0 +1,2 @@
trace errcode $CLI bundle deploy
rm -rf whl1 whl2

View File

@ -0,0 +1,3 @@
[[Repls]]
Old = '\\'
New = '/'

View File

@ -0,0 +1,28 @@
from setuptools import setup, find_packages
import sys
sys.path.append("./src")
import my_default_python
setup(
name="my_default_python",
version=my_default_python.__version__,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_default_python/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_default_python.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)

View File

@ -0,0 +1 @@
__version__ = "0.0.1"

View File

@ -0,0 +1 @@
print("hello")

View File

@ -0,0 +1,28 @@
from setuptools import setup, find_packages
import sys
sys.path.append("./src")
import my_default_python
setup(
name="my_default_python",
version=my_default_python.__version__,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_default_python/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_default_python.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)

View File

@ -0,0 +1 @@
__version__ = "0.0.1"

View File

@ -0,0 +1 @@
print("hello")

View File

@ -0,0 +1,56 @@
bundle:
name: unique_name_libraries
variables:
cluster:
default:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
num_workers: 0
spark_conf:
spark.master: "local[*, 4]"
spark.databricks.cluster.profile: singleNode
custom_tags:
ResourceClass: SingleNode
artifacts:
whl1:
type: whl
path: ./whl1
whl2:
type: whl
path: ./whl2
resources:
jobs:
test:
name: "test"
tasks:
- task_key: task1
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_package
libraries:
- whl: ./whl1/dist/*.whl
- whl: cowsay
- pypi:
package: test_package
- task_key: task2
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_other_package
libraries:
- whl: ./whl2/dist/*.whl
- whl: cowsay
- maven:
coordinates: org.apache.spark:spark-sql_2.12:3.1.1
- task_key: task3
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl1/dist/*.whl

View File

@ -0,0 +1,10 @@
>>> errcode [CLI] bundle deploy
Building whl1...
Building whl2...
Uploading [package name]
Uploading [package name]
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/unique_name_libraries/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

View File

@ -0,0 +1,2 @@
trace errcode $CLI bundle deploy
rm -rf whl1 whl2

View File

@ -0,0 +1,6 @@
Cloud = false
# The order in which files are uploaded can be different, so we just replace the name
[[Repls]]
Old="Uploading .*-0.0.1-py3-none-any.whl..."
New="Uploading [package name]"

View File

@ -0,0 +1,28 @@
from setuptools import setup, find_packages
import sys
sys.path.append("./src")
import my_package
setup(
name="my_package",
version=my_package.__version__,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_package/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_package.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)

View File

@ -0,0 +1 @@
__version__ = "0.0.1"

View File

@ -0,0 +1 @@
print("hello")

View File

@ -0,0 +1,28 @@
from setuptools import setup, find_packages
import sys
sys.path.append("./src")
import my_other_package
setup(
name="my_other_package",
version=my_other_package.__version__,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_other_package/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_other_package.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)

View File

@ -0,0 +1 @@
__version__ = "0.0.1"

View File

@ -92,7 +92,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic
for _, match := range matches {
output = append(output, dyn.NewValue(map[string]dyn.Value{
libType: dyn.V(match),
libType: dyn.NewValue(match, lib.Locations()),
}, lib.Locations()))
}
}

View File

@ -0,0 +1,104 @@
package libraries
import (
"context"
"path/filepath"
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)
type checkForSameNameLibraries struct{}
var patterns = []dyn.Pattern{
taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
envDepsPattern.Append(dyn.AnyIndex()),
}
type libData struct {
fullPath string
locations []dyn.Location
paths []dyn.Path
otherPaths []string
}
func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
var diags diag.Diagnostics
libs := make(map[string]*libData)
err := b.Config.Mutate(func(rootConfig dyn.Value) (dyn.Value, error) {
var err error
for _, pattern := range patterns {
rootConfig, err = dyn.MapByPattern(rootConfig, pattern, func(p dyn.Path, libraryValue dyn.Value) (dyn.Value, error) {
libPath, ok := libraryValue.AsString()
if !ok {
return libraryValue, nil
}
// If not local library, skip the check
if !IsLibraryLocal(libPath) {
return libraryValue, nil
}
lib := filepath.Base(libPath)
// If the same basename was seen already but full path is different
// then it's a duplicate. Add the location to the location list.
lp, ok := libs[lib]
if !ok {
libs[lib] = &libData{
fullPath: libPath,
locations: []dyn.Location{libraryValue.Location()},
paths: []dyn.Path{p},
otherPaths: []string{},
}
} else if lp.fullPath != libPath {
lp.locations = append(lp.locations, libraryValue.Location())
lp.paths = append(lp.paths, p)
lp.otherPaths = append(lp.otherPaths, libPath)
}
return libraryValue, nil
})
if err != nil {
return dyn.InvalidValue, err
}
}
if err != nil {
return dyn.InvalidValue, err
}
return rootConfig, nil
})
// Iterate over all the libraries and check if there are any duplicates.
// Duplicates will have more than one location.
// If there are duplicates, add a diagnostic.
for lib, lv := range libs {
if len(lv.locations) > 1 {
diags = append(diags, diag.Diagnostic{
Severity: diag.Error,
Summary: "Duplicate local library names: " + lib,
Detail: "Local library names must be unique but found libraries with the same name: " + lv.fullPath + ", " + strings.Join(lv.otherPaths, ", "),
Locations: lv.locations,
Paths: lv.paths,
})
}
}
if err != nil {
diags = diags.Extend(diag.FromErr(err))
}
return diags
}
func (c checkForSameNameLibraries) Name() string {
return "CheckForSameNameLibraries"
}
func CheckForSameNameLibraries() bundle.Mutator {
return checkForSameNameLibraries{}
}

View File

@ -177,7 +177,12 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand
deploy.StatePull(),
mutator.ValidateGitDetails(),
artifacts.CleanUp(),
// libraries.CheckForSameNameLibraries() needs to be run after we expand glob references so we
// know what are the actual library paths.
// libraries.ExpandGlobReferences() has to be run after the libraries are built and thus this
// mutator is part of the deploy step rather than validate.
libraries.ExpandGlobReferences(),
libraries.CheckForSameNameLibraries(),
libraries.Upload(),
trampoline.TransformWheelTask(),
files.Upload(outputHandler),