Revert changes related to basename check for local libraries (#2345)

## Changes

These changes break the use of non-local libraries (such as PyPI
libraries).

This reverts the set so we can cut a patch release and take a closer
look later.

Original PRs are #2297 and #2341.

Issue reported in #2343.

## Tests

Manually confirmed that a bundle with PyPI package in libraries now
deploys fine.
This commit is contained in:
Pieter Noordhuis 2025-02-12 20:05:49 +01:00 committed by GitHub
parent 9c90984688
commit 96302c7415
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 1 additions and 373 deletions

View File

@ -1,52 +0,0 @@
bundle:
name: same_name_libraries
variables:
cluster:
default:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
num_workers: 0
spark_conf:
spark.master: "local[*, 4]"
spark.databricks.cluster.profile: singleNode
custom_tags:
ResourceClass: SingleNode
artifacts:
whl1:
type: whl
path: ./whl1
whl2:
type: whl
path: ./whl2
resources:
jobs:
test:
name: "test"
tasks:
- task_key: task1
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl1/dist/*.whl
- pypi:
package: test_package
- task_key: task2
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl2/dist/*.whl
- task_key: task3
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl1/dist/*.whl

View File

@ -1,14 +0,0 @@
>>> errcode [CLI] bundle deploy
Building whl1...
Building whl2...
Error: Duplicate local library name my_default_python-0.0.1-py3-none-any.whl
at resources.jobs.test.tasks[0].libraries[0].whl
resources.jobs.test.tasks[1].libraries[0].whl
in databricks.yml:36:15
databricks.yml:45:15
Local library names must be unique
Exit code: 1

View File

@ -1,2 +0,0 @@
trace errcode $CLI bundle deploy
rm -rf whl1 whl2

View File

@ -1,36 +0,0 @@
"""
setup.py configuration script describing how to build and package this project.
This file is primarily used by the setuptools library and typically should not
be executed directly. See README.md for how to deploy, test, and run
the my_default_python project.
"""
from setuptools import setup, find_packages
import sys
sys.path.append("./src")
import my_default_python
setup(
name="my_default_python",
version=my_default_python.__version__,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_default_python/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_default_python.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)

View File

@ -1,36 +0,0 @@
"""
setup.py configuration script describing how to build and package this project.
This file is primarily used by the setuptools library and typically should not
be executed directly. See README.md for how to deploy, test, and run
the my_default_python project.
"""
from setuptools import setup, find_packages
import sys
sys.path.append("./src")
import my_default_python
setup(
name="my_default_python",
version=my_default_python.__version__,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_default_python/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_default_python.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)

View File

@ -92,7 +92,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic
for _, match := range matches {
output = append(output, dyn.NewValue(map[string]dyn.Value{
libType: dyn.NewValue(match, lib.Locations()),
libType: dyn.V(match),
}, lib.Locations()))
}
}

View File

@ -1,102 +0,0 @@
package libraries
import (
"context"
"path/filepath"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)
type checkForSameNameLibraries struct{}
var patterns = []dyn.Pattern{
taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
envDepsPattern.Append(dyn.AnyIndex()),
}
type libData struct {
fullPath string
locations []dyn.Location
paths []dyn.Path
}
func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
var diags diag.Diagnostics
libs := make(map[string]*libData)
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
var err error
for _, pattern := range patterns {
v, err = dyn.MapByPattern(v, pattern, func(p dyn.Path, lv dyn.Value) (dyn.Value, error) {
libFullPath, ok := lv.AsString()
// If the value is not a string, skip the check because it's not whl or jar type which defines the library
// as a string versus PyPi or Maven which defines the library as a map.
if !ok {
return v, nil
}
// If not local library, skip the check
if !IsLibraryLocal(libFullPath) {
return lv, nil
}
lib := filepath.Base(libFullPath)
// If the same basename was seen already but full path is different
// then it's a duplicate. Add the location to the location list.
lp, ok := libs[lib]
if !ok {
libs[lib] = &libData{
fullPath: libFullPath,
locations: []dyn.Location{lv.Location()},
paths: []dyn.Path{p},
}
} else if lp.fullPath != libFullPath {
lp.locations = append(lp.locations, lv.Location())
lp.paths = append(lp.paths, p)
}
return lv, nil
})
if err != nil {
return dyn.InvalidValue, err
}
}
if err != nil {
return dyn.InvalidValue, err
}
return v, nil
})
// Iterate over all the libraries and check if there are any duplicates.
// Duplicates will have more than one location.
// If there are duplicates, add a diagnostic.
for lib, lv := range libs {
if len(lv.locations) > 1 {
diags = append(diags, diag.Diagnostic{
Severity: diag.Error,
Summary: "Duplicate local library name " + lib,
Detail: "Local library names must be unique",
Locations: lv.locations,
Paths: lv.paths,
})
}
}
if err != nil {
diags = diags.Extend(diag.FromErr(err))
}
return diags
}
func (c checkForSameNameLibraries) Name() string {
return "CheckForSameNameLibraries"
}
func CheckForSameNameLibraries() bundle.Mutator {
return checkForSameNameLibraries{}
}

View File

@ -1,121 +0,0 @@
package libraries
import (
"context"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/bundle/internal/bundletest"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/require"
)
func TestSameNameLibraries(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"test": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
Libraries: []compute.Library{
{
Whl: "full/path/test.whl",
},
},
},
{
Libraries: []compute.Library{
{
Whl: "other/path/test.whl",
},
},
},
},
},
},
},
},
},
}
bundletest.SetLocation(b, "resources.jobs.test.tasks[0]", []dyn.Location{
{File: "databricks.yml", Line: 10, Column: 1},
})
bundletest.SetLocation(b, "resources.jobs.test.tasks[1]", []dyn.Location{
{File: "databricks.yml", Line: 20, Column: 1},
})
diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries())
require.Len(t, diags, 1)
require.Equal(t, diag.Error, diags[0].Severity)
require.Equal(t, "Duplicate local library name test.whl", diags[0].Summary)
require.Equal(t, []dyn.Location{
{File: "databricks.yml", Line: 10, Column: 1},
{File: "databricks.yml", Line: 20, Column: 1},
}, diags[0].Locations)
paths := make([]string, 0)
for _, p := range diags[0].Paths {
paths = append(paths, p.String())
}
require.Equal(t, []string{
"resources.jobs.test.tasks[0].libraries[0].whl",
"resources.jobs.test.tasks[1].libraries[0].whl",
}, paths)
}
func TestSameNameLibrariesWithUniqueLibraries(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"test": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
Libraries: []compute.Library{
{
Whl: "full/path/test-0.1.1.whl",
},
{
Whl: "cowsay",
},
},
},
{
Libraries: []compute.Library{
{
Whl: "other/path/test-0.1.0.whl",
},
{
Whl: "cowsay",
},
},
},
{
Libraries: []compute.Library{
{
Whl: "full/path/test-0.1.1.whl", // Use the same library as the first task
},
},
},
},
},
},
},
},
},
}
diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries())
require.Empty(t, diags)
}

View File

@ -155,11 +155,6 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator {
mutator.ValidateGitDetails(),
artifacts.CleanUp(),
libraries.ExpandGlobReferences(),
// libraries.CheckForSameNameLibraries() needs to be run after we expand glob references so we
// know what are the actual library paths.
// libraries.ExpandGlobReferences() has to be run after the libraries are built and thus this
// mutator is part of the deploy step rather than validate.
libraries.CheckForSameNameLibraries(),
libraries.Upload(),
trampoline.TransformWheelTask(),
files.Upload(outputHandler),