diff --git a/acceptance/bundle/artifacts/same_name_libraries/databricks.yml b/acceptance/bundle/artifacts/same_name_libraries/databricks.yml new file mode 100644 index 000000000..d58674a64 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/databricks.yml @@ -0,0 +1,54 @@ +bundle: + name: same_name_libraries + +variables: + cluster: + default: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode + +artifacts: + whl1: + type: whl + path: ./whl1 + whl2: + type: whl + path: ./whl2 + +resources: + jobs: + test: + name: "test" + tasks: + - task_key: task1 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl1/dist/*.whl + - pypi: + package: test_package + - task_key: task2 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl2/dist/*.whl + - maven: + coordinates: org.apache.spark:spark-sql_2.12:3.1.1 + - task_key: task3 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl1/dist/*.whl diff --git a/acceptance/bundle/artifacts/same_name_libraries/output.txt b/acceptance/bundle/artifacts/same_name_libraries/output.txt new file mode 100644 index 000000000..ee6c9d566 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/output.txt @@ -0,0 +1,14 @@ + +>>> errcode [CLI] bundle deploy +Building whl1... +Building whl2... +Error: Duplicate local library names: my_default_python-0.0.1-py3-none-any.whl + at resources.jobs.test.tasks[0].libraries[0].whl + resources.jobs.test.tasks[1].libraries[0].whl + in databricks.yml:36:15 + databricks.yml:45:15 + +Local library names must be unique but found libraries with the same name: whl1/dist/my_default_python-0.0.1-py3-none-any.whl, whl2/dist/my_default_python-0.0.1-py3-none-any.whl + + +Exit code: 1 diff --git a/acceptance/bundle/artifacts/same_name_libraries/script b/acceptance/bundle/artifacts/same_name_libraries/script new file mode 100644 index 000000000..6c899df07 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle deploy +rm -rf whl1 whl2 diff --git a/acceptance/bundle/artifacts/same_name_libraries/test.toml b/acceptance/bundle/artifacts/same_name_libraries/test.toml new file mode 100644 index 000000000..a298217f2 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/test.toml @@ -0,0 +1,3 @@ +[[Repls]] +Old = '\\' +New = '/' diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py b/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py new file mode 100644 index 000000000..ddd81295e --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_default_python + +setup( + name="my_default_python", + version=my_default_python.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py new file mode 100644 index 000000000..11b15b1a4 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl1/src/my_default_python/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py b/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py new file mode 100644 index 000000000..ddd81295e --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_default_python + +setup( + name="my_default_python", + version=my_default_python.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_default_python/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_default_python.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py new file mode 100644 index 000000000..11b15b1a4 --- /dev/null +++ b/acceptance/bundle/artifacts/same_name_libraries/whl2/src/my_default_python/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/artifacts/unique_name_libraries/databricks.yml b/acceptance/bundle/artifacts/unique_name_libraries/databricks.yml new file mode 100644 index 000000000..dd13c1918 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/databricks.yml @@ -0,0 +1,56 @@ +bundle: + name: unique_name_libraries + +variables: + cluster: + default: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode + +artifacts: + whl1: + type: whl + path: ./whl1 + whl2: + type: whl + path: ./whl2 + +resources: + jobs: + test: + name: "test" + tasks: + - task_key: task1 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_package + libraries: + - whl: ./whl1/dist/*.whl + - whl: cowsay + - pypi: + package: test_package + - task_key: task2 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_other_package + libraries: + - whl: ./whl2/dist/*.whl + - whl: cowsay + - maven: + coordinates: org.apache.spark:spark-sql_2.12:3.1.1 + - task_key: task3 + new_cluster: ${var.cluster} + python_wheel_task: + entry_point: main + package_name: my_default_python + libraries: + - whl: ./whl1/dist/*.whl diff --git a/acceptance/bundle/artifacts/unique_name_libraries/output.txt b/acceptance/bundle/artifacts/unique_name_libraries/output.txt new file mode 100644 index 000000000..ecc7bf57b --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/output.txt @@ -0,0 +1,10 @@ + +>>> errcode [CLI] bundle deploy +Building whl1... +Building whl2... +Uploading [package name] +Uploading [package name] +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/unique_name_libraries/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/acceptance/bundle/artifacts/unique_name_libraries/script b/acceptance/bundle/artifacts/unique_name_libraries/script new file mode 100644 index 000000000..6c899df07 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/script @@ -0,0 +1,2 @@ +trace errcode $CLI bundle deploy +rm -rf whl1 whl2 diff --git a/acceptance/bundle/artifacts/unique_name_libraries/test.toml b/acceptance/bundle/artifacts/unique_name_libraries/test.toml new file mode 100644 index 000000000..280338bd6 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/test.toml @@ -0,0 +1,6 @@ +Cloud = false + +# The order in which files are uploaded can be different, so we just replace the name +[[Repls]] +Old="Uploading .*-0.0.1-py3-none-any.whl..." +New="Uploading [package name]" diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py b/acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py new file mode 100644 index 000000000..ca85def32 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl1/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_package + +setup( + name="my_package", + version=my_package.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_package/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_package.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py new file mode 100644 index 000000000..11b15b1a4 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl1/src/my_package/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py b/acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py new file mode 100644 index 000000000..5e5b34788 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl2/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +import sys + +sys.path.append("./src") + +import my_other_package + +setup( + name="my_other_package", + version=my_other_package.__version__, + url="https://databricks.com", + author="[USERNAME]", + description="wheel file based on my_other_package/src", + packages=find_packages(where="./src"), + package_dir={"": "src"}, + entry_points={ + "packages": [ + "main=my_other_package.main:main", + ], + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], +) diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py new file mode 100644 index 000000000..11b15b1a4 --- /dev/null +++ b/acceptance/bundle/artifacts/unique_name_libraries/whl2/src/my_other_package/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/bundle/libraries/expand_glob_references.go b/bundle/libraries/expand_glob_references.go index bb1905045..7a808f627 100644 --- a/bundle/libraries/expand_glob_references.go +++ b/bundle/libraries/expand_glob_references.go @@ -92,7 +92,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic for _, match := range matches { output = append(output, dyn.NewValue(map[string]dyn.Value{ - libType: dyn.V(match), + libType: dyn.NewValue(match, lib.Locations()), }, lib.Locations())) } } diff --git a/bundle/libraries/same_name_libraries.go b/bundle/libraries/same_name_libraries.go new file mode 100644 index 000000000..ab869d3d2 --- /dev/null +++ b/bundle/libraries/same_name_libraries.go @@ -0,0 +1,104 @@ +package libraries + +import ( + "context" + "path/filepath" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type checkForSameNameLibraries struct{} + +var patterns = []dyn.Pattern{ + taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()), + forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()), + envDepsPattern.Append(dyn.AnyIndex()), +} + +type libData struct { + fullPath string + locations []dyn.Location + paths []dyn.Path + otherPaths []string +} + +func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + libs := make(map[string]*libData) + + err := b.Config.Mutate(func(rootConfig dyn.Value) (dyn.Value, error) { + var err error + for _, pattern := range patterns { + rootConfig, err = dyn.MapByPattern(rootConfig, pattern, func(p dyn.Path, libraryValue dyn.Value) (dyn.Value, error) { + libPath, ok := libraryValue.AsString() + if !ok { + return libraryValue, nil + } + + // If not local library, skip the check + if !IsLibraryLocal(libPath) { + return libraryValue, nil + } + + lib := filepath.Base(libPath) + // If the same basename was seen already but full path is different + // then it's a duplicate. Add the location to the location list. + lp, ok := libs[lib] + if !ok { + libs[lib] = &libData{ + fullPath: libPath, + locations: []dyn.Location{libraryValue.Location()}, + paths: []dyn.Path{p}, + otherPaths: []string{}, + } + } else if lp.fullPath != libPath { + lp.locations = append(lp.locations, libraryValue.Location()) + lp.paths = append(lp.paths, p) + lp.otherPaths = append(lp.otherPaths, libPath) + } + + return libraryValue, nil + }) + if err != nil { + return dyn.InvalidValue, err + } + } + + if err != nil { + return dyn.InvalidValue, err + } + + return rootConfig, nil + }) + + // Iterate over all the libraries and check if there are any duplicates. + // Duplicates will have more than one location. + // If there are duplicates, add a diagnostic. + for lib, lv := range libs { + if len(lv.locations) > 1 { + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: "Duplicate local library names: " + lib, + Detail: "Local library names must be unique but found libraries with the same name: " + lv.fullPath + ", " + strings.Join(lv.otherPaths, ", "), + Locations: lv.locations, + Paths: lv.paths, + }) + } + } + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + } + + return diags +} + +func (c checkForSameNameLibraries) Name() string { + return "CheckForSameNameLibraries" +} + +func CheckForSameNameLibraries() bundle.Mutator { + return checkForSameNameLibraries{} +} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index b31ed682f..02e0e9bd8 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -177,7 +177,12 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand deploy.StatePull(), mutator.ValidateGitDetails(), artifacts.CleanUp(), + // libraries.CheckForSameNameLibraries() needs to be run after we expand glob references so we + // know what are the actual library paths. + // libraries.ExpandGlobReferences() has to be run after the libraries are built and thus this + // mutator is part of the deploy step rather than validate. libraries.ExpandGlobReferences(), + libraries.CheckForSameNameLibraries(), libraries.Upload(), trampoline.TransformWheelTask(), files.Upload(outputHandler),