mirror of https://github.com/databricks/cli.git
Raise an error when there are multiple local libraries with the same basename used (#2297)
## Changes Raise an error when there are multiple local libraries with the same basename used Fixes #1674 ## Tests Added an unit test
This commit is contained in:
parent
6d83ffd109
commit
2a97dcaa45
|
@ -0,0 +1,50 @@
|
|||
bundle:
|
||||
name: same_name_libraries
|
||||
|
||||
variables:
|
||||
cluster:
|
||||
default:
|
||||
spark_version: 15.4.x-scala2.12
|
||||
node_type_id: i3.xlarge
|
||||
data_security_mode: SINGLE_USER
|
||||
num_workers: 0
|
||||
spark_conf:
|
||||
spark.master: "local[*, 4]"
|
||||
spark.databricks.cluster.profile: singleNode
|
||||
custom_tags:
|
||||
ResourceClass: SingleNode
|
||||
|
||||
artifacts:
|
||||
whl1:
|
||||
type: whl
|
||||
path: ./whl1
|
||||
whl2:
|
||||
type: whl
|
||||
path: ./whl2
|
||||
|
||||
resources:
|
||||
jobs:
|
||||
test:
|
||||
name: "test"
|
||||
tasks:
|
||||
- task_key: task1
|
||||
new_cluster: ${var.cluster}
|
||||
python_wheel_task:
|
||||
entry_point: main
|
||||
package_name: my_default_python
|
||||
libraries:
|
||||
- whl: ./whl1/dist/*.whl
|
||||
- task_key: task2
|
||||
new_cluster: ${var.cluster}
|
||||
python_wheel_task:
|
||||
entry_point: main
|
||||
package_name: my_default_python
|
||||
libraries:
|
||||
- whl: ./whl2/dist/*.whl
|
||||
- task_key: task3
|
||||
new_cluster: ${var.cluster}
|
||||
python_wheel_task:
|
||||
entry_point: main
|
||||
package_name: my_default_python
|
||||
libraries:
|
||||
- whl: ./whl1/dist/*.whl
|
|
@ -0,0 +1,14 @@
|
|||
|
||||
>>> errcode [CLI] bundle deploy
|
||||
Building whl1...
|
||||
Building whl2...
|
||||
Error: Duplicate local library name my_default_python-0.0.1-py3-none-any.whl
|
||||
at resources.jobs.test.tasks[0].libraries[0].whl
|
||||
resources.jobs.test.tasks[1].libraries[0].whl
|
||||
in databricks.yml:36:15
|
||||
databricks.yml:43:15
|
||||
|
||||
Local library names must be unique
|
||||
|
||||
|
||||
Exit code: 1
|
|
@ -0,0 +1,2 @@
|
|||
trace errcode $CLI bundle deploy
|
||||
rm -rf whl1 whl2
|
|
@ -0,0 +1,36 @@
|
|||
"""
|
||||
setup.py configuration script describing how to build and package this project.
|
||||
|
||||
This file is primarily used by the setuptools library and typically should not
|
||||
be executed directly. See README.md for how to deploy, test, and run
|
||||
the my_default_python project.
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
import sys
|
||||
|
||||
sys.path.append("./src")
|
||||
|
||||
import my_default_python
|
||||
|
||||
setup(
|
||||
name="my_default_python",
|
||||
version=my_default_python.__version__,
|
||||
url="https://databricks.com",
|
||||
author="[USERNAME]",
|
||||
description="wheel file based on my_default_python/src",
|
||||
packages=find_packages(where="./src"),
|
||||
package_dir={"": "src"},
|
||||
entry_points={
|
||||
"packages": [
|
||||
"main=my_default_python.main:main",
|
||||
],
|
||||
},
|
||||
install_requires=[
|
||||
# Dependencies in case the output wheel file is used as a library dependency.
|
||||
# For defining dependencies, when this package is used in Databricks, see:
|
||||
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||
"setuptools"
|
||||
],
|
||||
)
|
|
@ -0,0 +1 @@
|
|||
__version__ = "0.0.1"
|
|
@ -0,0 +1 @@
|
|||
print("hello")
|
|
@ -0,0 +1,36 @@
|
|||
"""
|
||||
setup.py configuration script describing how to build and package this project.
|
||||
|
||||
This file is primarily used by the setuptools library and typically should not
|
||||
be executed directly. See README.md for how to deploy, test, and run
|
||||
the my_default_python project.
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
import sys
|
||||
|
||||
sys.path.append("./src")
|
||||
|
||||
import my_default_python
|
||||
|
||||
setup(
|
||||
name="my_default_python",
|
||||
version=my_default_python.__version__,
|
||||
url="https://databricks.com",
|
||||
author="[USERNAME]",
|
||||
description="wheel file based on my_default_python/src",
|
||||
packages=find_packages(where="./src"),
|
||||
package_dir={"": "src"},
|
||||
entry_points={
|
||||
"packages": [
|
||||
"main=my_default_python.main:main",
|
||||
],
|
||||
},
|
||||
install_requires=[
|
||||
# Dependencies in case the output wheel file is used as a library dependency.
|
||||
# For defining dependencies, when this package is used in Databricks, see:
|
||||
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||
"setuptools"
|
||||
],
|
||||
)
|
|
@ -0,0 +1 @@
|
|||
__version__ = "0.0.1"
|
|
@ -0,0 +1 @@
|
|||
print("hello")
|
|
@ -92,7 +92,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic
|
|||
|
||||
for _, match := range matches {
|
||||
output = append(output, dyn.NewValue(map[string]dyn.Value{
|
||||
libType: dyn.V(match),
|
||||
libType: dyn.NewValue(match, lib.Locations()),
|
||||
}, lib.Locations()))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
package libraries
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/libs/diag"
|
||||
"github.com/databricks/cli/libs/dyn"
|
||||
)
|
||||
|
||||
type checkForSameNameLibraries struct{}
|
||||
|
||||
var patterns = []dyn.Pattern{
|
||||
taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
|
||||
forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
|
||||
envDepsPattern.Append(dyn.AnyIndex()),
|
||||
}
|
||||
|
||||
type libData struct {
|
||||
fullPath string
|
||||
locations []dyn.Location
|
||||
paths []dyn.Path
|
||||
}
|
||||
|
||||
func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
|
||||
var diags diag.Diagnostics
|
||||
libs := make(map[string]*libData)
|
||||
|
||||
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
|
||||
var err error
|
||||
for _, pattern := range patterns {
|
||||
v, err = dyn.MapByPattern(v, pattern, func(p dyn.Path, lv dyn.Value) (dyn.Value, error) {
|
||||
libPath := lv.MustString()
|
||||
// If not local library, skip the check
|
||||
if !IsLibraryLocal(libPath) {
|
||||
return lv, nil
|
||||
}
|
||||
|
||||
libFullPath := lv.MustString()
|
||||
lib := filepath.Base(libFullPath)
|
||||
// If the same basename was seen already but full path is different
|
||||
// then it's a duplicate. Add the location to the location list.
|
||||
lp, ok := libs[lib]
|
||||
if !ok {
|
||||
libs[lib] = &libData{
|
||||
fullPath: libFullPath,
|
||||
locations: []dyn.Location{lv.Location()},
|
||||
paths: []dyn.Path{p},
|
||||
}
|
||||
} else if lp.fullPath != libFullPath {
|
||||
lp.locations = append(lp.locations, lv.Location())
|
||||
lp.paths = append(lp.paths, p)
|
||||
}
|
||||
|
||||
return lv, nil
|
||||
})
|
||||
if err != nil {
|
||||
return dyn.InvalidValue, err
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return dyn.InvalidValue, err
|
||||
}
|
||||
|
||||
return v, nil
|
||||
})
|
||||
|
||||
// Iterate over all the libraries and check if there are any duplicates.
|
||||
// Duplicates will have more than one location.
|
||||
// If there are duplicates, add a diagnostic.
|
||||
for lib, lv := range libs {
|
||||
if len(lv.locations) > 1 {
|
||||
diags = append(diags, diag.Diagnostic{
|
||||
Severity: diag.Error,
|
||||
Summary: "Duplicate local library name " + lib,
|
||||
Detail: "Local library names must be unique",
|
||||
Locations: lv.locations,
|
||||
Paths: lv.paths,
|
||||
})
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
diags = diags.Extend(diag.FromErr(err))
|
||||
}
|
||||
|
||||
return diags
|
||||
}
|
||||
|
||||
func (c checkForSameNameLibraries) Name() string {
|
||||
return "CheckForSameNameLibraries"
|
||||
}
|
||||
|
||||
func CheckForSameNameLibraries() bundle.Mutator {
|
||||
return checkForSameNameLibraries{}
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
package libraries
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
"github.com/databricks/cli/bundle/config/resources"
|
||||
"github.com/databricks/cli/bundle/internal/bundletest"
|
||||
"github.com/databricks/cli/libs/diag"
|
||||
"github.com/databricks/cli/libs/dyn"
|
||||
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSameNameLibraries(t *testing.T) {
|
||||
b := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Resources: config.Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"test": {
|
||||
JobSettings: &jobs.JobSettings{
|
||||
Tasks: []jobs.Task{
|
||||
{
|
||||
Libraries: []compute.Library{
|
||||
{
|
||||
Whl: "full/path/test.whl",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Libraries: []compute.Library{
|
||||
{
|
||||
Whl: "other/path/test.whl",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
bundletest.SetLocation(b, "resources.jobs.test.tasks[0]", []dyn.Location{
|
||||
{File: "databricks.yml", Line: 10, Column: 1},
|
||||
})
|
||||
bundletest.SetLocation(b, "resources.jobs.test.tasks[1]", []dyn.Location{
|
||||
{File: "databricks.yml", Line: 20, Column: 1},
|
||||
})
|
||||
|
||||
diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries())
|
||||
require.Len(t, diags, 1)
|
||||
require.Equal(t, diag.Error, diags[0].Severity)
|
||||
require.Equal(t, "Duplicate local library name test.whl", diags[0].Summary)
|
||||
require.Equal(t, []dyn.Location{
|
||||
{File: "databricks.yml", Line: 10, Column: 1},
|
||||
{File: "databricks.yml", Line: 20, Column: 1},
|
||||
}, diags[0].Locations)
|
||||
|
||||
paths := make([]string, 0)
|
||||
for _, p := range diags[0].Paths {
|
||||
paths = append(paths, p.String())
|
||||
}
|
||||
require.Equal(t, []string{
|
||||
"resources.jobs.test.tasks[0].libraries[0].whl",
|
||||
"resources.jobs.test.tasks[1].libraries[0].whl",
|
||||
}, paths)
|
||||
}
|
||||
|
||||
func TestSameNameLibrariesWithUniqueLibraries(t *testing.T) {
|
||||
b := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Resources: config.Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"test": {
|
||||
JobSettings: &jobs.JobSettings{
|
||||
Tasks: []jobs.Task{
|
||||
{
|
||||
Libraries: []compute.Library{
|
||||
{
|
||||
Whl: "full/path/test-0.1.1.whl",
|
||||
},
|
||||
|
||||
{
|
||||
Whl: "cowsay",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Libraries: []compute.Library{
|
||||
{
|
||||
Whl: "other/path/test-0.1.0.whl",
|
||||
},
|
||||
|
||||
{
|
||||
Whl: "cowsay",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Libraries: []compute.Library{
|
||||
{
|
||||
Whl: "full/path/test-0.1.1.whl", // Use the same library as the first task
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries())
|
||||
require.Empty(t, diags)
|
||||
}
|
|
@ -155,6 +155,11 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator {
|
|||
mutator.ValidateGitDetails(),
|
||||
artifacts.CleanUp(),
|
||||
libraries.ExpandGlobReferences(),
|
||||
// libraries.CheckForSameNameLibraries() needs to be run after we expand glob references so we
|
||||
// know what are the actual library paths.
|
||||
// libraries.ExpandGlobReferences() has to be run after the libraries are built and thus this
|
||||
// mutator is part of the deploy step rather than validate.
|
||||
libraries.CheckForSameNameLibraries(),
|
||||
libraries.Upload(),
|
||||
trampoline.TransformWheelTask(),
|
||||
files.Upload(outputHandler),
|
||||
|
|
Loading…
Reference in New Issue