mirror of https://github.com/databricks/cli.git
Detect duplicate identifiers in bundle config (#332)
## Changes This PR adds checks during bundle config load and merge to error out if there are duplicate keys for resource definitions ## Tests Using unit tests and manually
This commit is contained in:
parent
eab29603fc
commit
93d57dd00f
|
@ -1,6 +1,8 @@
|
||||||
package config
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
"github.com/databricks/bricks/bundle/config/resources"
|
"github.com/databricks/bricks/bundle/config/resources"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -13,6 +15,87 @@ type Resources struct {
|
||||||
Experiments map[string]*resources.MlflowExperiment `json:"experiments,omitempty"`
|
Experiments map[string]*resources.MlflowExperiment `json:"experiments,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type UniqueResourceIdTracker struct {
|
||||||
|
Type map[string]string
|
||||||
|
ConfigPath map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
// verifies merging is safe by checking no duplicate identifiers exist
|
||||||
|
func (r *Resources) VerifySafeMerge(other *Resources) error {
|
||||||
|
rootTracker, err := r.VerifyUniqueResourceIdentifiers()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
otherTracker, err := other.VerifyUniqueResourceIdentifiers()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for k := range otherTracker.Type {
|
||||||
|
if _, ok := rootTracker.Type[k]; ok {
|
||||||
|
return fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
||||||
|
k,
|
||||||
|
rootTracker.Type[k],
|
||||||
|
rootTracker.ConfigPath[k],
|
||||||
|
otherTracker.Type[k],
|
||||||
|
otherTracker.ConfigPath[k],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function verifies there are no duplicate names used for the resource definations
|
||||||
|
func (r *Resources) VerifyUniqueResourceIdentifiers() (*UniqueResourceIdTracker, error) {
|
||||||
|
tracker := &UniqueResourceIdTracker{
|
||||||
|
Type: make(map[string]string),
|
||||||
|
ConfigPath: make(map[string]string),
|
||||||
|
}
|
||||||
|
for k := range r.Jobs {
|
||||||
|
tracker.Type[k] = "job"
|
||||||
|
tracker.ConfigPath[k] = r.Jobs[k].ConfigFilePath
|
||||||
|
}
|
||||||
|
for k := range r.Pipelines {
|
||||||
|
if _, ok := tracker.Type[k]; ok {
|
||||||
|
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
||||||
|
k,
|
||||||
|
tracker.Type[k],
|
||||||
|
tracker.ConfigPath[k],
|
||||||
|
"pipeline",
|
||||||
|
r.Pipelines[k].ConfigFilePath,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
tracker.Type[k] = "pipeline"
|
||||||
|
tracker.ConfigPath[k] = r.Pipelines[k].ConfigFilePath
|
||||||
|
}
|
||||||
|
for k := range r.Models {
|
||||||
|
if _, ok := tracker.Type[k]; ok {
|
||||||
|
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
||||||
|
k,
|
||||||
|
tracker.Type[k],
|
||||||
|
tracker.ConfigPath[k],
|
||||||
|
"mlflow_model",
|
||||||
|
r.Models[k].ConfigFilePath,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
tracker.Type[k] = "mlflow_model"
|
||||||
|
tracker.ConfigPath[k] = r.Models[k].ConfigFilePath
|
||||||
|
}
|
||||||
|
for k := range r.Experiments {
|
||||||
|
if _, ok := tracker.Type[k]; ok {
|
||||||
|
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
||||||
|
k,
|
||||||
|
tracker.Type[k],
|
||||||
|
tracker.ConfigPath[k],
|
||||||
|
"mlflow_experiment",
|
||||||
|
r.Experiments[k].ConfigFilePath,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
tracker.Type[k] = "mlflow_experiment"
|
||||||
|
tracker.ConfigPath[k] = r.Experiments[k].ConfigFilePath
|
||||||
|
}
|
||||||
|
return tracker, nil
|
||||||
|
}
|
||||||
|
|
||||||
// SetConfigFilePath sets the specified path for all resources contained in this instance.
|
// SetConfigFilePath sets the specified path for all resources contained in this instance.
|
||||||
// This property is used to correctly resolve paths relative to the path
|
// This property is used to correctly resolve paths relative to the path
|
||||||
// of the configuration file they were defined in.
|
// of the configuration file they were defined in.
|
||||||
|
|
|
@ -0,0 +1,96 @@
|
||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/databricks/bricks/bundle/config/resources"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestVerifyUniqueResourceIdentifiers(t *testing.T) {
|
||||||
|
r := Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"foo": {
|
||||||
|
Paths: resources.Paths{
|
||||||
|
ConfigFilePath: "foo.yml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Models: map[string]*resources.MlflowModel{
|
||||||
|
"bar": {
|
||||||
|
Paths: resources.Paths{
|
||||||
|
ConfigFilePath: "bar.yml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Experiments: map[string]*resources.MlflowExperiment{
|
||||||
|
"foo": {
|
||||||
|
Paths: resources.Paths{
|
||||||
|
ConfigFilePath: "foo2.yml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_, err := r.VerifyUniqueResourceIdentifiers()
|
||||||
|
assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, mlflow_experiment at foo2.yml)")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifySafeMerge(t *testing.T) {
|
||||||
|
r := Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"foo": {
|
||||||
|
Paths: resources.Paths{
|
||||||
|
ConfigFilePath: "foo.yml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Models: map[string]*resources.MlflowModel{
|
||||||
|
"bar": {
|
||||||
|
Paths: resources.Paths{
|
||||||
|
ConfigFilePath: "bar.yml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
other := Resources{
|
||||||
|
Pipelines: map[string]*resources.Pipeline{
|
||||||
|
"foo": {
|
||||||
|
Paths: resources.Paths{
|
||||||
|
ConfigFilePath: "foo2.yml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
err := r.VerifySafeMerge(&other)
|
||||||
|
assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, pipeline at foo2.yml)")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVerifySafeMergeForSameResourceType(t *testing.T) {
|
||||||
|
r := Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"foo": {
|
||||||
|
Paths: resources.Paths{
|
||||||
|
ConfigFilePath: "foo.yml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Models: map[string]*resources.MlflowModel{
|
||||||
|
"bar": {
|
||||||
|
Paths: resources.Paths{
|
||||||
|
ConfigFilePath: "bar.yml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
other := Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"foo": {
|
||||||
|
Paths: resources.Paths{
|
||||||
|
ConfigFilePath: "foo2.yml",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
err := r.VerifySafeMerge(&other)
|
||||||
|
assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, job at foo2.yml)")
|
||||||
|
}
|
|
@ -88,15 +88,24 @@ func (r *Root) Load(path string) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
r.Path = filepath.Dir(path)
|
r.Path = filepath.Dir(path)
|
||||||
r.SetConfigFilePath(path)
|
r.SetConfigFilePath(path)
|
||||||
return nil
|
|
||||||
|
_, err = r.Resources.VerifyUniqueResourceIdentifiers()
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *Root) Merge(other *Root) error {
|
func (r *Root) Merge(other *Root) error {
|
||||||
// TODO: when hooking into merge semantics, disallow setting path on the target instance.
|
// TODO: when hooking into merge semantics, disallow setting path on the target instance.
|
||||||
other.Path = ""
|
other.Path = ""
|
||||||
|
|
||||||
|
// Check for safe merge, protecting against duplicate resource identifiers
|
||||||
|
err := r.Resources.VerifySafeMerge(&other.Resources)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: define and test semantics for merging.
|
// TODO: define and test semantics for merging.
|
||||||
return mergo.MergeWithOverwrite(r, other)
|
return mergo.MergeWithOverwrite(r, other)
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,3 +74,22 @@ func TestRootMergeMap(t *testing.T) {
|
||||||
assert.NoError(t, root.Merge(other))
|
assert.NoError(t, root.Merge(other))
|
||||||
assert.Equal(t, &Workspace{Host: "bar", Profile: "profile"}, root.Environments["development"].Workspace)
|
assert.Equal(t, &Workspace{Host: "bar", Profile: "profile"}, root.Environments["development"].Workspace)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDuplicateIdOnLoadReturnsError(t *testing.T) {
|
||||||
|
root := &Root{}
|
||||||
|
err := root.Load("./testdata/duplicate_resource_names_in_root/bundle.yml")
|
||||||
|
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/bundle.yml, pipeline at ./testdata/duplicate_resource_names_in_root/bundle.yml)")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDuplicateIdOnMergeReturnsError(t *testing.T) {
|
||||||
|
root := &Root{}
|
||||||
|
err := root.Load("./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
other := &Root{}
|
||||||
|
err = other.Load("./testdata/duplicate_resource_name_in_subconfiguration/resources.yml")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
err = root.Merge(other)
|
||||||
|
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)")
|
||||||
|
}
|
||||||
|
|
10
bundle/config/testdata/duplicate_resource_name_in_subconfiguration/bundle.yml
vendored
Normal file
10
bundle/config/testdata/duplicate_resource_name_in_subconfiguration/bundle.yml
vendored
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
bundle:
|
||||||
|
name: test
|
||||||
|
|
||||||
|
workspace:
|
||||||
|
profile: test
|
||||||
|
|
||||||
|
resources:
|
||||||
|
jobs:
|
||||||
|
foo:
|
||||||
|
name: job foo
|
4
bundle/config/testdata/duplicate_resource_name_in_subconfiguration/resources.yml
vendored
Normal file
4
bundle/config/testdata/duplicate_resource_name_in_subconfiguration/resources.yml
vendored
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
resources:
|
||||||
|
pipelines:
|
||||||
|
foo:
|
||||||
|
name: pipeline foo
|
|
@ -0,0 +1,13 @@
|
||||||
|
bundle:
|
||||||
|
name: test
|
||||||
|
|
||||||
|
workspace:
|
||||||
|
profile: test
|
||||||
|
|
||||||
|
resources:
|
||||||
|
jobs:
|
||||||
|
foo:
|
||||||
|
name: job foo
|
||||||
|
pipelines:
|
||||||
|
foo:
|
||||||
|
name: pipeline foo
|
|
@ -0,0 +1,13 @@
|
||||||
|
bundle:
|
||||||
|
name: test
|
||||||
|
|
||||||
|
workspace:
|
||||||
|
profile: test
|
||||||
|
|
||||||
|
resources:
|
||||||
|
jobs:
|
||||||
|
foo:
|
||||||
|
name: job foo
|
||||||
|
pipelines:
|
||||||
|
foo:
|
||||||
|
name: pipeline foo
|
|
@ -0,0 +1,10 @@
|
||||||
|
bundle:
|
||||||
|
name: test
|
||||||
|
|
||||||
|
workspace:
|
||||||
|
profile: test
|
||||||
|
|
||||||
|
resources:
|
||||||
|
jobs:
|
||||||
|
foo:
|
||||||
|
name: job foo
|
|
@ -0,0 +1,4 @@
|
||||||
|
resources:
|
||||||
|
pipelines:
|
||||||
|
foo:
|
||||||
|
name: pipeline foo
|
|
@ -0,0 +1,5 @@
|
||||||
|
bundle:
|
||||||
|
name: test
|
||||||
|
|
||||||
|
workspace:
|
||||||
|
profile: test
|
|
@ -0,0 +1,4 @@
|
||||||
|
resources:
|
||||||
|
jobs:
|
||||||
|
foo:
|
||||||
|
name: job foo
|
|
@ -0,0 +1,4 @@
|
||||||
|
resources:
|
||||||
|
pipelines:
|
||||||
|
foo:
|
||||||
|
name: pipeline foo
|
|
@ -0,0 +1,37 @@
|
||||||
|
package config_tests
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/databricks/bricks/bundle"
|
||||||
|
"github.com/databricks/bricks/bundle/config/mutator"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestConflictingResourceIdsNoSubconfig(t *testing.T) {
|
||||||
|
_, err := bundle.Load("./conflicting_resource_ids/no_subconfigurations")
|
||||||
|
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/no_subconfigurations/bundle.yml")
|
||||||
|
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, bundleConfigPath))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConflictingResourceIdsOneSubconfig(t *testing.T) {
|
||||||
|
b, err := bundle.Load("./conflicting_resource_ids/one_subconfiguration")
|
||||||
|
require.NoError(t, err)
|
||||||
|
err = bundle.Apply(context.Background(), b, mutator.DefaultMutators())
|
||||||
|
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/bundle.yml")
|
||||||
|
resourcesConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/resources.yml")
|
||||||
|
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, resourcesConfigPath))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConflictingResourceIdsTwoSubconfigs(t *testing.T) {
|
||||||
|
b, err := bundle.Load("./conflicting_resource_ids/two_subconfigurations")
|
||||||
|
require.NoError(t, err)
|
||||||
|
err = bundle.Apply(context.Background(), b, mutator.DefaultMutators())
|
||||||
|
resources1ConfigPath := filepath.FromSlash("conflicting_resource_ids/two_subconfigurations/resources1.yml")
|
||||||
|
resources2ConfigPath := filepath.FromSlash("conflicting_resource_ids/two_subconfigurations/resources2.yml")
|
||||||
|
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", resources1ConfigPath, resources2ConfigPath))
|
||||||
|
}
|
Loading…
Reference in New Issue