mirror of https://github.com/databricks/cli.git
Detect duplicate identifiers in bundle config (#332)
## Changes This PR adds checks during bundle config load and merge to error out if there are duplicate keys for resource definitions ## Tests Using unit tests and manually
This commit is contained in:
parent
eab29603fc
commit
93d57dd00f
|
@ -1,6 +1,8 @@
|
|||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/databricks/bricks/bundle/config/resources"
|
||||
)
|
||||
|
||||
|
@ -13,6 +15,87 @@ type Resources struct {
|
|||
Experiments map[string]*resources.MlflowExperiment `json:"experiments,omitempty"`
|
||||
}
|
||||
|
||||
type UniqueResourceIdTracker struct {
|
||||
Type map[string]string
|
||||
ConfigPath map[string]string
|
||||
}
|
||||
|
||||
// verifies merging is safe by checking no duplicate identifiers exist
|
||||
func (r *Resources) VerifySafeMerge(other *Resources) error {
|
||||
rootTracker, err := r.VerifyUniqueResourceIdentifiers()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
otherTracker, err := other.VerifyUniqueResourceIdentifiers()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for k := range otherTracker.Type {
|
||||
if _, ok := rootTracker.Type[k]; ok {
|
||||
return fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
||||
k,
|
||||
rootTracker.Type[k],
|
||||
rootTracker.ConfigPath[k],
|
||||
otherTracker.Type[k],
|
||||
otherTracker.ConfigPath[k],
|
||||
)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// This function verifies there are no duplicate names used for the resource definations
|
||||
func (r *Resources) VerifyUniqueResourceIdentifiers() (*UniqueResourceIdTracker, error) {
|
||||
tracker := &UniqueResourceIdTracker{
|
||||
Type: make(map[string]string),
|
||||
ConfigPath: make(map[string]string),
|
||||
}
|
||||
for k := range r.Jobs {
|
||||
tracker.Type[k] = "job"
|
||||
tracker.ConfigPath[k] = r.Jobs[k].ConfigFilePath
|
||||
}
|
||||
for k := range r.Pipelines {
|
||||
if _, ok := tracker.Type[k]; ok {
|
||||
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
||||
k,
|
||||
tracker.Type[k],
|
||||
tracker.ConfigPath[k],
|
||||
"pipeline",
|
||||
r.Pipelines[k].ConfigFilePath,
|
||||
)
|
||||
}
|
||||
tracker.Type[k] = "pipeline"
|
||||
tracker.ConfigPath[k] = r.Pipelines[k].ConfigFilePath
|
||||
}
|
||||
for k := range r.Models {
|
||||
if _, ok := tracker.Type[k]; ok {
|
||||
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
||||
k,
|
||||
tracker.Type[k],
|
||||
tracker.ConfigPath[k],
|
||||
"mlflow_model",
|
||||
r.Models[k].ConfigFilePath,
|
||||
)
|
||||
}
|
||||
tracker.Type[k] = "mlflow_model"
|
||||
tracker.ConfigPath[k] = r.Models[k].ConfigFilePath
|
||||
}
|
||||
for k := range r.Experiments {
|
||||
if _, ok := tracker.Type[k]; ok {
|
||||
return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)",
|
||||
k,
|
||||
tracker.Type[k],
|
||||
tracker.ConfigPath[k],
|
||||
"mlflow_experiment",
|
||||
r.Experiments[k].ConfigFilePath,
|
||||
)
|
||||
}
|
||||
tracker.Type[k] = "mlflow_experiment"
|
||||
tracker.ConfigPath[k] = r.Experiments[k].ConfigFilePath
|
||||
}
|
||||
return tracker, nil
|
||||
}
|
||||
|
||||
// SetConfigFilePath sets the specified path for all resources contained in this instance.
|
||||
// This property is used to correctly resolve paths relative to the path
|
||||
// of the configuration file they were defined in.
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/databricks/bricks/bundle/config/resources"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestVerifyUniqueResourceIdentifiers(t *testing.T) {
|
||||
r := Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"foo": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: "foo.yml",
|
||||
},
|
||||
},
|
||||
},
|
||||
Models: map[string]*resources.MlflowModel{
|
||||
"bar": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: "bar.yml",
|
||||
},
|
||||
},
|
||||
},
|
||||
Experiments: map[string]*resources.MlflowExperiment{
|
||||
"foo": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: "foo2.yml",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
_, err := r.VerifyUniqueResourceIdentifiers()
|
||||
assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, mlflow_experiment at foo2.yml)")
|
||||
}
|
||||
|
||||
func TestVerifySafeMerge(t *testing.T) {
|
||||
r := Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"foo": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: "foo.yml",
|
||||
},
|
||||
},
|
||||
},
|
||||
Models: map[string]*resources.MlflowModel{
|
||||
"bar": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: "bar.yml",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
other := Resources{
|
||||
Pipelines: map[string]*resources.Pipeline{
|
||||
"foo": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: "foo2.yml",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
err := r.VerifySafeMerge(&other)
|
||||
assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, pipeline at foo2.yml)")
|
||||
}
|
||||
|
||||
func TestVerifySafeMergeForSameResourceType(t *testing.T) {
|
||||
r := Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"foo": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: "foo.yml",
|
||||
},
|
||||
},
|
||||
},
|
||||
Models: map[string]*resources.MlflowModel{
|
||||
"bar": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: "bar.yml",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
other := Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"foo": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: "foo2.yml",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
err := r.VerifySafeMerge(&other)
|
||||
assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, job at foo2.yml)")
|
||||
}
|
|
@ -88,15 +88,24 @@ func (r *Root) Load(path string) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
r.Path = filepath.Dir(path)
|
||||
r.SetConfigFilePath(path)
|
||||
return nil
|
||||
|
||||
_, err = r.Resources.VerifyUniqueResourceIdentifiers()
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *Root) Merge(other *Root) error {
|
||||
// TODO: when hooking into merge semantics, disallow setting path on the target instance.
|
||||
other.Path = ""
|
||||
|
||||
// Check for safe merge, protecting against duplicate resource identifiers
|
||||
err := r.Resources.VerifySafeMerge(&other.Resources)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: define and test semantics for merging.
|
||||
return mergo.MergeWithOverwrite(r, other)
|
||||
}
|
||||
|
|
|
@ -74,3 +74,22 @@ func TestRootMergeMap(t *testing.T) {
|
|||
assert.NoError(t, root.Merge(other))
|
||||
assert.Equal(t, &Workspace{Host: "bar", Profile: "profile"}, root.Environments["development"].Workspace)
|
||||
}
|
||||
|
||||
func TestDuplicateIdOnLoadReturnsError(t *testing.T) {
|
||||
root := &Root{}
|
||||
err := root.Load("./testdata/duplicate_resource_names_in_root/bundle.yml")
|
||||
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/bundle.yml, pipeline at ./testdata/duplicate_resource_names_in_root/bundle.yml)")
|
||||
}
|
||||
|
||||
func TestDuplicateIdOnMergeReturnsError(t *testing.T) {
|
||||
root := &Root{}
|
||||
err := root.Load("./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml")
|
||||
require.NoError(t, err)
|
||||
|
||||
other := &Root{}
|
||||
err = other.Load("./testdata/duplicate_resource_name_in_subconfiguration/resources.yml")
|
||||
require.NoError(t, err)
|
||||
|
||||
err = root.Merge(other)
|
||||
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)")
|
||||
}
|
||||
|
|
10
bundle/config/testdata/duplicate_resource_name_in_subconfiguration/bundle.yml
vendored
Normal file
10
bundle/config/testdata/duplicate_resource_name_in_subconfiguration/bundle.yml
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
bundle:
|
||||
name: test
|
||||
|
||||
workspace:
|
||||
profile: test
|
||||
|
||||
resources:
|
||||
jobs:
|
||||
foo:
|
||||
name: job foo
|
4
bundle/config/testdata/duplicate_resource_name_in_subconfiguration/resources.yml
vendored
Normal file
4
bundle/config/testdata/duplicate_resource_name_in_subconfiguration/resources.yml
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
resources:
|
||||
pipelines:
|
||||
foo:
|
||||
name: pipeline foo
|
|
@ -0,0 +1,13 @@
|
|||
bundle:
|
||||
name: test
|
||||
|
||||
workspace:
|
||||
profile: test
|
||||
|
||||
resources:
|
||||
jobs:
|
||||
foo:
|
||||
name: job foo
|
||||
pipelines:
|
||||
foo:
|
||||
name: pipeline foo
|
|
@ -0,0 +1,13 @@
|
|||
bundle:
|
||||
name: test
|
||||
|
||||
workspace:
|
||||
profile: test
|
||||
|
||||
resources:
|
||||
jobs:
|
||||
foo:
|
||||
name: job foo
|
||||
pipelines:
|
||||
foo:
|
||||
name: pipeline foo
|
|
@ -0,0 +1,10 @@
|
|||
bundle:
|
||||
name: test
|
||||
|
||||
workspace:
|
||||
profile: test
|
||||
|
||||
resources:
|
||||
jobs:
|
||||
foo:
|
||||
name: job foo
|
|
@ -0,0 +1,4 @@
|
|||
resources:
|
||||
pipelines:
|
||||
foo:
|
||||
name: pipeline foo
|
|
@ -0,0 +1,5 @@
|
|||
bundle:
|
||||
name: test
|
||||
|
||||
workspace:
|
||||
profile: test
|
|
@ -0,0 +1,4 @@
|
|||
resources:
|
||||
jobs:
|
||||
foo:
|
||||
name: job foo
|
|
@ -0,0 +1,4 @@
|
|||
resources:
|
||||
pipelines:
|
||||
foo:
|
||||
name: pipeline foo
|
|
@ -0,0 +1,37 @@
|
|||
package config_tests
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/databricks/bricks/bundle"
|
||||
"github.com/databricks/bricks/bundle/config/mutator"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestConflictingResourceIdsNoSubconfig(t *testing.T) {
|
||||
_, err := bundle.Load("./conflicting_resource_ids/no_subconfigurations")
|
||||
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/no_subconfigurations/bundle.yml")
|
||||
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, bundleConfigPath))
|
||||
}
|
||||
|
||||
func TestConflictingResourceIdsOneSubconfig(t *testing.T) {
|
||||
b, err := bundle.Load("./conflicting_resource_ids/one_subconfiguration")
|
||||
require.NoError(t, err)
|
||||
err = bundle.Apply(context.Background(), b, mutator.DefaultMutators())
|
||||
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/bundle.yml")
|
||||
resourcesConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/resources.yml")
|
||||
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, resourcesConfigPath))
|
||||
}
|
||||
|
||||
func TestConflictingResourceIdsTwoSubconfigs(t *testing.T) {
|
||||
b, err := bundle.Load("./conflicting_resource_ids/two_subconfigurations")
|
||||
require.NoError(t, err)
|
||||
err = bundle.Apply(context.Background(), b, mutator.DefaultMutators())
|
||||
resources1ConfigPath := filepath.FromSlash("conflicting_resource_ids/two_subconfigurations/resources1.yml")
|
||||
resources2ConfigPath := filepath.FromSlash("conflicting_resource_ids/two_subconfigurations/resources2.yml")
|
||||
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", resources1ConfigPath, resources2ConfigPath))
|
||||
}
|
Loading…
Reference in New Issue