mirror of https://github.com/databricks/cli.git
Raise an error when there are multiple local libraries with the same basename used (#2382)
## Changes It could happen that there are multiple artifacts defined in the bundle which build and therefore deploy wheel packages with the same name. This leads to conflict between these packages, they will overwrite each other and therefore they should have different names instead Fixes https://github.com/databricks/cli/issues/1674 Previous attempt (https://github.com/databricks/cli/pull/2297 + https://github.com/databricks/cli/pull/2341) led to the breakage, this PR fixes both issues. ## Tests Added acceptance test
This commit is contained in:
parent
bc299cafb8
commit
6a07e05e9b
|
@ -0,0 +1,54 @@
|
||||||
|
bundle:
|
||||||
|
name: same_name_libraries
|
||||||
|
|
||||||
|
variables:
|
||||||
|
cluster:
|
||||||
|
default:
|
||||||
|
spark_version: 15.4.x-scala2.12
|
||||||
|
node_type_id: i3.xlarge
|
||||||
|
data_security_mode: SINGLE_USER
|
||||||
|
num_workers: 0
|
||||||
|
spark_conf:
|
||||||
|
spark.master: "local[*, 4]"
|
||||||
|
spark.databricks.cluster.profile: singleNode
|
||||||
|
custom_tags:
|
||||||
|
ResourceClass: SingleNode
|
||||||
|
|
||||||
|
artifacts:
|
||||||
|
whl1:
|
||||||
|
type: whl
|
||||||
|
path: ./whl1
|
||||||
|
whl2:
|
||||||
|
type: whl
|
||||||
|
path: ./whl2
|
||||||
|
|
||||||
|
resources:
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: "test"
|
||||||
|
tasks:
|
||||||
|
- task_key: task1
|
||||||
|
new_cluster: ${var.cluster}
|
||||||
|
python_wheel_task:
|
||||||
|
entry_point: main
|
||||||
|
package_name: my_default_python
|
||||||
|
libraries:
|
||||||
|
- whl: ./whl1/dist/*.whl
|
||||||
|
- pypi:
|
||||||
|
package: test_package
|
||||||
|
- task_key: task2
|
||||||
|
new_cluster: ${var.cluster}
|
||||||
|
python_wheel_task:
|
||||||
|
entry_point: main
|
||||||
|
package_name: my_default_python
|
||||||
|
libraries:
|
||||||
|
- whl: ./whl2/dist/*.whl
|
||||||
|
- maven:
|
||||||
|
coordinates: org.apache.spark:spark-sql_2.12:3.1.1
|
||||||
|
- task_key: task3
|
||||||
|
new_cluster: ${var.cluster}
|
||||||
|
python_wheel_task:
|
||||||
|
entry_point: main
|
||||||
|
package_name: my_default_python
|
||||||
|
libraries:
|
||||||
|
- whl: ./whl1/dist/*.whl
|
|
@ -0,0 +1,14 @@
|
||||||
|
|
||||||
|
>>> errcode [CLI] bundle deploy
|
||||||
|
Building whl1...
|
||||||
|
Building whl2...
|
||||||
|
Error: Duplicate local library names: my_default_python-0.0.1-py3-none-any.whl
|
||||||
|
at resources.jobs.test.tasks[0].libraries[0].whl
|
||||||
|
resources.jobs.test.tasks[1].libraries[0].whl
|
||||||
|
in databricks.yml:36:15
|
||||||
|
databricks.yml:45:15
|
||||||
|
|
||||||
|
Local library names must be unique but found libraries with the same name: whl1/dist/my_default_python-0.0.1-py3-none-any.whl, whl2/dist/my_default_python-0.0.1-py3-none-any.whl
|
||||||
|
|
||||||
|
|
||||||
|
Exit code: 1
|
|
@ -0,0 +1,2 @@
|
||||||
|
trace errcode $CLI bundle deploy
|
||||||
|
rm -rf whl1 whl2
|
|
@ -0,0 +1,3 @@
|
||||||
|
[[Repls]]
|
||||||
|
Old = '\\'
|
||||||
|
New = '/'
|
|
@ -0,0 +1,28 @@
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append("./src")
|
||||||
|
|
||||||
|
import my_default_python
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="my_default_python",
|
||||||
|
version=my_default_python.__version__,
|
||||||
|
url="https://databricks.com",
|
||||||
|
author="[USERNAME]",
|
||||||
|
description="wheel file based on my_default_python/src",
|
||||||
|
packages=find_packages(where="./src"),
|
||||||
|
package_dir={"": "src"},
|
||||||
|
entry_points={
|
||||||
|
"packages": [
|
||||||
|
"main=my_default_python.main:main",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
install_requires=[
|
||||||
|
# Dependencies in case the output wheel file is used as a library dependency.
|
||||||
|
# For defining dependencies, when this package is used in Databricks, see:
|
||||||
|
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||||
|
"setuptools"
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1 @@
|
||||||
|
__version__ = "0.0.1"
|
|
@ -0,0 +1 @@
|
||||||
|
print("hello")
|
|
@ -0,0 +1,28 @@
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append("./src")
|
||||||
|
|
||||||
|
import my_default_python
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="my_default_python",
|
||||||
|
version=my_default_python.__version__,
|
||||||
|
url="https://databricks.com",
|
||||||
|
author="[USERNAME]",
|
||||||
|
description="wheel file based on my_default_python/src",
|
||||||
|
packages=find_packages(where="./src"),
|
||||||
|
package_dir={"": "src"},
|
||||||
|
entry_points={
|
||||||
|
"packages": [
|
||||||
|
"main=my_default_python.main:main",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
install_requires=[
|
||||||
|
# Dependencies in case the output wheel file is used as a library dependency.
|
||||||
|
# For defining dependencies, when this package is used in Databricks, see:
|
||||||
|
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||||
|
"setuptools"
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1 @@
|
||||||
|
__version__ = "0.0.1"
|
|
@ -0,0 +1 @@
|
||||||
|
print("hello")
|
|
@ -0,0 +1,56 @@
|
||||||
|
bundle:
|
||||||
|
name: unique_name_libraries
|
||||||
|
|
||||||
|
variables:
|
||||||
|
cluster:
|
||||||
|
default:
|
||||||
|
spark_version: 15.4.x-scala2.12
|
||||||
|
node_type_id: i3.xlarge
|
||||||
|
data_security_mode: SINGLE_USER
|
||||||
|
num_workers: 0
|
||||||
|
spark_conf:
|
||||||
|
spark.master: "local[*, 4]"
|
||||||
|
spark.databricks.cluster.profile: singleNode
|
||||||
|
custom_tags:
|
||||||
|
ResourceClass: SingleNode
|
||||||
|
|
||||||
|
artifacts:
|
||||||
|
whl1:
|
||||||
|
type: whl
|
||||||
|
path: ./whl1
|
||||||
|
whl2:
|
||||||
|
type: whl
|
||||||
|
path: ./whl2
|
||||||
|
|
||||||
|
resources:
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: "test"
|
||||||
|
tasks:
|
||||||
|
- task_key: task1
|
||||||
|
new_cluster: ${var.cluster}
|
||||||
|
python_wheel_task:
|
||||||
|
entry_point: main
|
||||||
|
package_name: my_package
|
||||||
|
libraries:
|
||||||
|
- whl: ./whl1/dist/*.whl
|
||||||
|
- whl: cowsay
|
||||||
|
- pypi:
|
||||||
|
package: test_package
|
||||||
|
- task_key: task2
|
||||||
|
new_cluster: ${var.cluster}
|
||||||
|
python_wheel_task:
|
||||||
|
entry_point: main
|
||||||
|
package_name: my_other_package
|
||||||
|
libraries:
|
||||||
|
- whl: ./whl2/dist/*.whl
|
||||||
|
- whl: cowsay
|
||||||
|
- maven:
|
||||||
|
coordinates: org.apache.spark:spark-sql_2.12:3.1.1
|
||||||
|
- task_key: task3
|
||||||
|
new_cluster: ${var.cluster}
|
||||||
|
python_wheel_task:
|
||||||
|
entry_point: main
|
||||||
|
package_name: my_default_python
|
||||||
|
libraries:
|
||||||
|
- whl: ./whl1/dist/*.whl
|
|
@ -0,0 +1,10 @@
|
||||||
|
|
||||||
|
>>> errcode [CLI] bundle deploy
|
||||||
|
Building whl1...
|
||||||
|
Building whl2...
|
||||||
|
Uploading [package name]
|
||||||
|
Uploading [package name]
|
||||||
|
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/unique_name_libraries/default/files...
|
||||||
|
Deploying resources...
|
||||||
|
Updating deployment state...
|
||||||
|
Deployment complete!
|
|
@ -0,0 +1,2 @@
|
||||||
|
trace errcode $CLI bundle deploy
|
||||||
|
rm -rf whl1 whl2
|
|
@ -0,0 +1,6 @@
|
||||||
|
Cloud = false
|
||||||
|
|
||||||
|
# The order in which files are uploaded can be different, so we just replace the name
|
||||||
|
[[Repls]]
|
||||||
|
Old="Uploading .*-0.0.1-py3-none-any.whl..."
|
||||||
|
New="Uploading [package name]"
|
|
@ -0,0 +1,28 @@
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append("./src")
|
||||||
|
|
||||||
|
import my_package
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="my_package",
|
||||||
|
version=my_package.__version__,
|
||||||
|
url="https://databricks.com",
|
||||||
|
author="[USERNAME]",
|
||||||
|
description="wheel file based on my_package/src",
|
||||||
|
packages=find_packages(where="./src"),
|
||||||
|
package_dir={"": "src"},
|
||||||
|
entry_points={
|
||||||
|
"packages": [
|
||||||
|
"main=my_package.main:main",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
install_requires=[
|
||||||
|
# Dependencies in case the output wheel file is used as a library dependency.
|
||||||
|
# For defining dependencies, when this package is used in Databricks, see:
|
||||||
|
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||||
|
"setuptools"
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1 @@
|
||||||
|
__version__ = "0.0.1"
|
|
@ -0,0 +1 @@
|
||||||
|
print("hello")
|
|
@ -0,0 +1,28 @@
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append("./src")
|
||||||
|
|
||||||
|
import my_other_package
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="my_other_package",
|
||||||
|
version=my_other_package.__version__,
|
||||||
|
url="https://databricks.com",
|
||||||
|
author="[USERNAME]",
|
||||||
|
description="wheel file based on my_other_package/src",
|
||||||
|
packages=find_packages(where="./src"),
|
||||||
|
package_dir={"": "src"},
|
||||||
|
entry_points={
|
||||||
|
"packages": [
|
||||||
|
"main=my_other_package.main:main",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
install_requires=[
|
||||||
|
# Dependencies in case the output wheel file is used as a library dependency.
|
||||||
|
# For defining dependencies, when this package is used in Databricks, see:
|
||||||
|
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||||
|
"setuptools"
|
||||||
|
],
|
||||||
|
)
|
|
@ -0,0 +1 @@
|
||||||
|
__version__ = "0.0.1"
|
|
@ -0,0 +1 @@
|
||||||
|
print("hello")
|
|
@ -92,7 +92,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic
|
||||||
|
|
||||||
for _, match := range matches {
|
for _, match := range matches {
|
||||||
output = append(output, dyn.NewValue(map[string]dyn.Value{
|
output = append(output, dyn.NewValue(map[string]dyn.Value{
|
||||||
libType: dyn.V(match),
|
libType: dyn.NewValue(match, lib.Locations()),
|
||||||
}, lib.Locations()))
|
}, lib.Locations()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
package libraries
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/databricks/cli/bundle"
|
||||||
|
"github.com/databricks/cli/libs/diag"
|
||||||
|
"github.com/databricks/cli/libs/dyn"
|
||||||
|
)
|
||||||
|
|
||||||
|
type checkForSameNameLibraries struct{}
|
||||||
|
|
||||||
|
var patterns = []dyn.Pattern{
|
||||||
|
taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
|
||||||
|
forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
|
||||||
|
envDepsPattern.Append(dyn.AnyIndex()),
|
||||||
|
}
|
||||||
|
|
||||||
|
type libData struct {
|
||||||
|
fullPath string
|
||||||
|
locations []dyn.Location
|
||||||
|
paths []dyn.Path
|
||||||
|
otherPaths []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
|
||||||
|
var diags diag.Diagnostics
|
||||||
|
libs := make(map[string]*libData)
|
||||||
|
|
||||||
|
err := b.Config.Mutate(func(rootConfig dyn.Value) (dyn.Value, error) {
|
||||||
|
var err error
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
rootConfig, err = dyn.MapByPattern(rootConfig, pattern, func(p dyn.Path, libraryValue dyn.Value) (dyn.Value, error) {
|
||||||
|
libPath, ok := libraryValue.AsString()
|
||||||
|
if !ok {
|
||||||
|
return libraryValue, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If not local library, skip the check
|
||||||
|
if !IsLibraryLocal(libPath) {
|
||||||
|
return libraryValue, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
lib := filepath.Base(libPath)
|
||||||
|
// If the same basename was seen already but full path is different
|
||||||
|
// then it's a duplicate. Add the location to the location list.
|
||||||
|
lp, ok := libs[lib]
|
||||||
|
if !ok {
|
||||||
|
libs[lib] = &libData{
|
||||||
|
fullPath: libPath,
|
||||||
|
locations: []dyn.Location{libraryValue.Location()},
|
||||||
|
paths: []dyn.Path{p},
|
||||||
|
otherPaths: []string{},
|
||||||
|
}
|
||||||
|
} else if lp.fullPath != libPath {
|
||||||
|
lp.locations = append(lp.locations, libraryValue.Location())
|
||||||
|
lp.paths = append(lp.paths, p)
|
||||||
|
lp.otherPaths = append(lp.otherPaths, libPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
return libraryValue, nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return dyn.InvalidValue, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return dyn.InvalidValue, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return rootConfig, nil
|
||||||
|
})
|
||||||
|
|
||||||
|
// Iterate over all the libraries and check if there are any duplicates.
|
||||||
|
// Duplicates will have more than one location.
|
||||||
|
// If there are duplicates, add a diagnostic.
|
||||||
|
for lib, lv := range libs {
|
||||||
|
if len(lv.locations) > 1 {
|
||||||
|
diags = append(diags, diag.Diagnostic{
|
||||||
|
Severity: diag.Error,
|
||||||
|
Summary: "Duplicate local library names: " + lib,
|
||||||
|
Detail: "Local library names must be unique but found libraries with the same name: " + lv.fullPath + ", " + strings.Join(lv.otherPaths, ", "),
|
||||||
|
Locations: lv.locations,
|
||||||
|
Paths: lv.paths,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
diags = diags.Extend(diag.FromErr(err))
|
||||||
|
}
|
||||||
|
|
||||||
|
return diags
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c checkForSameNameLibraries) Name() string {
|
||||||
|
return "CheckForSameNameLibraries"
|
||||||
|
}
|
||||||
|
|
||||||
|
func CheckForSameNameLibraries() bundle.Mutator {
|
||||||
|
return checkForSameNameLibraries{}
|
||||||
|
}
|
|
@ -177,7 +177,12 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand
|
||||||
deploy.StatePull(),
|
deploy.StatePull(),
|
||||||
mutator.ValidateGitDetails(),
|
mutator.ValidateGitDetails(),
|
||||||
artifacts.CleanUp(),
|
artifacts.CleanUp(),
|
||||||
|
// libraries.CheckForSameNameLibraries() needs to be run after we expand glob references so we
|
||||||
|
// know what are the actual library paths.
|
||||||
|
// libraries.ExpandGlobReferences() has to be run after the libraries are built and thus this
|
||||||
|
// mutator is part of the deploy step rather than validate.
|
||||||
libraries.ExpandGlobReferences(),
|
libraries.ExpandGlobReferences(),
|
||||||
|
libraries.CheckForSameNameLibraries(),
|
||||||
libraries.Upload(),
|
libraries.Upload(),
|
||||||
trampoline.TransformWheelTask(),
|
trampoline.TransformWheelTask(),
|
||||||
files.Upload(outputHandler),
|
files.Upload(outputHandler),
|
||||||
|
|
Loading…
Reference in New Issue