Raise an error when there are multiple local libraries with the same basename used (#2297)

## Changes
Raise an error when there are multiple local libraries with the same
basename used

Fixes #1674

## Tests
Added an unit test
This commit is contained in:
Andrew Nester 2025-02-07 17:55:16 +00:00 committed by GitHub
parent 6d83ffd109
commit 2a97dcaa45
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 366 additions and 1 deletions

View File

@ -0,0 +1,50 @@
bundle:
name: same_name_libraries
variables:
cluster:
default:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
num_workers: 0
spark_conf:
spark.master: "local[*, 4]"
spark.databricks.cluster.profile: singleNode
custom_tags:
ResourceClass: SingleNode
artifacts:
whl1:
type: whl
path: ./whl1
whl2:
type: whl
path: ./whl2
resources:
jobs:
test:
name: "test"
tasks:
- task_key: task1
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl1/dist/*.whl
- task_key: task2
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl2/dist/*.whl
- task_key: task3
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl1/dist/*.whl

View File

@ -0,0 +1,14 @@
>>> errcode [CLI] bundle deploy
Building whl1...
Building whl2...
Error: Duplicate local library name my_default_python-0.0.1-py3-none-any.whl
at resources.jobs.test.tasks[0].libraries[0].whl
resources.jobs.test.tasks[1].libraries[0].whl
in databricks.yml:36:15
databricks.yml:43:15
Local library names must be unique
Exit code: 1

View File

@ -0,0 +1,2 @@
trace errcode $CLI bundle deploy
rm -rf whl1 whl2

View File

@ -0,0 +1,36 @@
"""
setup.py configuration script describing how to build and package this project.
This file is primarily used by the setuptools library and typically should not
be executed directly. See README.md for how to deploy, test, and run
the my_default_python project.
"""
from setuptools import setup, find_packages
import sys
sys.path.append("./src")
import my_default_python
setup(
name="my_default_python",
version=my_default_python.__version__,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_default_python/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_default_python.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)

View File

@ -0,0 +1 @@
__version__ = "0.0.1"

View File

@ -0,0 +1 @@
print("hello")

View File

@ -0,0 +1,36 @@
"""
setup.py configuration script describing how to build and package this project.
This file is primarily used by the setuptools library and typically should not
be executed directly. See README.md for how to deploy, test, and run
the my_default_python project.
"""
from setuptools import setup, find_packages
import sys
sys.path.append("./src")
import my_default_python
setup(
name="my_default_python",
version=my_default_python.__version__,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_default_python/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_default_python.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)

View File

@ -0,0 +1 @@
__version__ = "0.0.1"

View File

@ -0,0 +1 @@
print("hello")

View File

@ -92,7 +92,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic
for _, match := range matches {
output = append(output, dyn.NewValue(map[string]dyn.Value{
libType: dyn.V(match),
libType: dyn.NewValue(match, lib.Locations()),
}, lib.Locations()))
}
}

View File

@ -0,0 +1,97 @@
package libraries
import (
"context"
"path/filepath"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)
type checkForSameNameLibraries struct{}
var patterns = []dyn.Pattern{
taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
envDepsPattern.Append(dyn.AnyIndex()),
}
type libData struct {
fullPath string
locations []dyn.Location
paths []dyn.Path
}
func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
var diags diag.Diagnostics
libs := make(map[string]*libData)
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
var err error
for _, pattern := range patterns {
v, err = dyn.MapByPattern(v, pattern, func(p dyn.Path, lv dyn.Value) (dyn.Value, error) {
libPath := lv.MustString()
// If not local library, skip the check
if !IsLibraryLocal(libPath) {
return lv, nil
}
libFullPath := lv.MustString()
lib := filepath.Base(libFullPath)
// If the same basename was seen already but full path is different
// then it's a duplicate. Add the location to the location list.
lp, ok := libs[lib]
if !ok {
libs[lib] = &libData{
fullPath: libFullPath,
locations: []dyn.Location{lv.Location()},
paths: []dyn.Path{p},
}
} else if lp.fullPath != libFullPath {
lp.locations = append(lp.locations, lv.Location())
lp.paths = append(lp.paths, p)
}
return lv, nil
})
if err != nil {
return dyn.InvalidValue, err
}
}
if err != nil {
return dyn.InvalidValue, err
}
return v, nil
})
// Iterate over all the libraries and check if there are any duplicates.
// Duplicates will have more than one location.
// If there are duplicates, add a diagnostic.
for lib, lv := range libs {
if len(lv.locations) > 1 {
diags = append(diags, diag.Diagnostic{
Severity: diag.Error,
Summary: "Duplicate local library name " + lib,
Detail: "Local library names must be unique",
Locations: lv.locations,
Paths: lv.paths,
})
}
}
if err != nil {
diags = diags.Extend(diag.FromErr(err))
}
return diags
}
func (c checkForSameNameLibraries) Name() string {
return "CheckForSameNameLibraries"
}
func CheckForSameNameLibraries() bundle.Mutator {
return checkForSameNameLibraries{}
}

View File

@ -0,0 +1,121 @@
package libraries
import (
"context"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/bundle/internal/bundletest"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/require"
)
func TestSameNameLibraries(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"test": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
Libraries: []compute.Library{
{
Whl: "full/path/test.whl",
},
},
},
{
Libraries: []compute.Library{
{
Whl: "other/path/test.whl",
},
},
},
},
},
},
},
},
},
}
bundletest.SetLocation(b, "resources.jobs.test.tasks[0]", []dyn.Location{
{File: "databricks.yml", Line: 10, Column: 1},
})
bundletest.SetLocation(b, "resources.jobs.test.tasks[1]", []dyn.Location{
{File: "databricks.yml", Line: 20, Column: 1},
})
diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries())
require.Len(t, diags, 1)
require.Equal(t, diag.Error, diags[0].Severity)
require.Equal(t, "Duplicate local library name test.whl", diags[0].Summary)
require.Equal(t, []dyn.Location{
{File: "databricks.yml", Line: 10, Column: 1},
{File: "databricks.yml", Line: 20, Column: 1},
}, diags[0].Locations)
paths := make([]string, 0)
for _, p := range diags[0].Paths {
paths = append(paths, p.String())
}
require.Equal(t, []string{
"resources.jobs.test.tasks[0].libraries[0].whl",
"resources.jobs.test.tasks[1].libraries[0].whl",
}, paths)
}
func TestSameNameLibrariesWithUniqueLibraries(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"test": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
Libraries: []compute.Library{
{
Whl: "full/path/test-0.1.1.whl",
},
{
Whl: "cowsay",
},
},
},
{
Libraries: []compute.Library{
{
Whl: "other/path/test-0.1.0.whl",
},
{
Whl: "cowsay",
},
},
},
{
Libraries: []compute.Library{
{
Whl: "full/path/test-0.1.1.whl", // Use the same library as the first task
},
},
},
},
},
},
},
},
},
}
diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries())
require.Empty(t, diags)
}

View File

@ -155,6 +155,11 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator {
mutator.ValidateGitDetails(),
artifacts.CleanUp(),
libraries.ExpandGlobReferences(),
// libraries.CheckForSameNameLibraries() needs to be run after we expand glob references so we
// know what are the actual library paths.
// libraries.ExpandGlobReferences() has to be run after the libraries are built and thus this
// mutator is part of the deploy step rather than validate.
libraries.CheckForSameNameLibraries(),
libraries.Upload(),
trampoline.TransformWheelTask(),
files.Upload(outputHandler),