From a52b188e9952c5b99bb59589fc8469c71983fa26 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 29 Jul 2024 18:34:02 +0530 Subject: [PATCH] Use dynamic walking to validate unique resource keys (#1614) ## Changes This PR: 1. Uses dynamic walking (via the `dyn.MapByPattern` func) to validate no two resources have the same resource key. The allows us to remove this validation at merge time. 2. Modifies `dyn.Mapping` to always return a sorted slice of pairs. This makes traversal functions like `dyn.Walk` or `dyn.MapByPattern` deterministic. ## Tests Unit tests. Also manually. --- bundle/config/mutator/mutator.go | 5 + bundle/config/resources.go | 120 --------------- bundle/config/resources_test.go | 120 --------------- bundle/config/root.go | 11 -- bundle/config/root_test.go | 16 -- .../config/validate/unique_resource_keys.go | 116 +++++++++++++++ bundle/tests/conflicting_resource_ids_test.go | 42 ------ .../databricks.yml | 8 +- .../resources1.yml} | 4 + .../resources2.yml | 8 + .../databricks.yml | 2 +- .../resources.yml | 0 .../databricks.yml | 3 + .../resources.yml | 4 + .../databricks.yml | 0 .../resources1.yml | 0 .../resources2.yml | 0 .../databricks.yml | 18 +++ .../databricks.yml | 0 bundle/tests/validate_test.go | 139 ++++++++++++++++++ libs/dyn/mapping.go | 3 +- 21 files changed, 304 insertions(+), 315 deletions(-) create mode 100644 bundle/config/validate/unique_resource_keys.go delete mode 100644 bundle/tests/conflicting_resource_ids_test.go rename bundle/tests/{conflicting_resource_ids/no_subconfigurations => validate/duplicate_resource_name_in_multiple_locations}/databricks.yml (53%) rename bundle/tests/{conflicting_resource_ids/one_subconfiguration/resources.yml => validate/duplicate_resource_name_in_multiple_locations/resources1.yml} (59%) create mode 100644 bundle/tests/validate/duplicate_resource_name_in_multiple_locations/resources2.yml rename bundle/tests/{conflicting_resource_ids/one_subconfiguration => validate/duplicate_resource_name_in_subconfiguration}/databricks.yml (84%) rename bundle/{config/testdata => tests/validate}/duplicate_resource_name_in_subconfiguration/resources.yml (100%) rename bundle/{config/testdata/duplicate_resource_name_in_subconfiguration => tests/validate/duplicate_resource_name_in_subconfiguration_job_and_job}/databricks.yml (76%) create mode 100644 bundle/tests/validate/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml rename bundle/tests/{conflicting_resource_ids/two_subconfigurations => validate/duplicate_resource_names_in_different_subconfiguations}/databricks.yml (100%) rename bundle/tests/{conflicting_resource_ids/two_subconfigurations => validate/duplicate_resource_names_in_different_subconfiguations}/resources1.yml (100%) rename bundle/tests/{conflicting_resource_ids/two_subconfigurations => validate/duplicate_resource_names_in_different_subconfiguations}/resources2.yml (100%) create mode 100644 bundle/tests/validate/duplicate_resource_names_in_root_job_and_experiment/databricks.yml rename bundle/{config/testdata/duplicate_resource_names_in_root => tests/validate/duplicate_resource_names_in_root_job_and_pipeline}/databricks.yml (100%) create mode 100644 bundle/tests/validate_test.go diff --git a/bundle/config/mutator/mutator.go b/bundle/config/mutator/mutator.go index 52f85eeb..0458beff 100644 --- a/bundle/config/mutator/mutator.go +++ b/bundle/config/mutator/mutator.go @@ -5,6 +5,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/loader" pythonmutator "github.com/databricks/cli/bundle/config/mutator/python" + "github.com/databricks/cli/bundle/config/validate" "github.com/databricks/cli/bundle/scripts" ) @@ -26,5 +27,9 @@ func DefaultMutators() []bundle.Mutator { DefineDefaultTarget(), LoadGitDetails(), pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseLoad), + + // Note: This mutator must run before the target overrides are merged. + // See the mutator for more details. + validate.UniqueResourceKeys(), } } diff --git a/bundle/config/resources.go b/bundle/config/resources.go index f70052ec..062e38ed 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -20,126 +20,6 @@ type Resources struct { QualityMonitors map[string]*resources.QualityMonitor `json:"quality_monitors,omitempty"` } -type UniqueResourceIdTracker struct { - Type map[string]string - ConfigPath map[string]string -} - -// verifies merging is safe by checking no duplicate identifiers exist -func (r *Resources) VerifySafeMerge(other *Resources) error { - rootTracker, err := r.VerifyUniqueResourceIdentifiers() - if err != nil { - return err - } - otherTracker, err := other.VerifyUniqueResourceIdentifiers() - if err != nil { - return err - } - for k := range otherTracker.Type { - if _, ok := rootTracker.Type[k]; ok { - return fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - rootTracker.Type[k], - rootTracker.ConfigPath[k], - otherTracker.Type[k], - otherTracker.ConfigPath[k], - ) - } - } - return nil -} - -// This function verifies there are no duplicate names used for the resource definations -func (r *Resources) VerifyUniqueResourceIdentifiers() (*UniqueResourceIdTracker, error) { - tracker := &UniqueResourceIdTracker{ - Type: make(map[string]string), - ConfigPath: make(map[string]string), - } - for k := range r.Jobs { - tracker.Type[k] = "job" - tracker.ConfigPath[k] = r.Jobs[k].ConfigFilePath - } - for k := range r.Pipelines { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "pipeline", - r.Pipelines[k].ConfigFilePath, - ) - } - tracker.Type[k] = "pipeline" - tracker.ConfigPath[k] = r.Pipelines[k].ConfigFilePath - } - for k := range r.Models { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "mlflow_model", - r.Models[k].ConfigFilePath, - ) - } - tracker.Type[k] = "mlflow_model" - tracker.ConfigPath[k] = r.Models[k].ConfigFilePath - } - for k := range r.Experiments { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "mlflow_experiment", - r.Experiments[k].ConfigFilePath, - ) - } - tracker.Type[k] = "mlflow_experiment" - tracker.ConfigPath[k] = r.Experiments[k].ConfigFilePath - } - for k := range r.ModelServingEndpoints { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "model_serving_endpoint", - r.ModelServingEndpoints[k].ConfigFilePath, - ) - } - tracker.Type[k] = "model_serving_endpoint" - tracker.ConfigPath[k] = r.ModelServingEndpoints[k].ConfigFilePath - } - for k := range r.RegisteredModels { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "registered_model", - r.RegisteredModels[k].ConfigFilePath, - ) - } - tracker.Type[k] = "registered_model" - tracker.ConfigPath[k] = r.RegisteredModels[k].ConfigFilePath - } - for k := range r.QualityMonitors { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "quality_monitor", - r.QualityMonitors[k].ConfigFilePath, - ) - } - tracker.Type[k] = "quality_monitor" - tracker.ConfigPath[k] = r.QualityMonitors[k].ConfigFilePath - } - return tracker, nil -} - type resource struct { resource ConfigResource resource_type string diff --git a/bundle/config/resources_test.go b/bundle/config/resources_test.go index 7415029b..6860d73d 100644 --- a/bundle/config/resources_test.go +++ b/bundle/config/resources_test.go @@ -5,129 +5,9 @@ import ( "reflect" "testing" - "github.com/databricks/cli/bundle/config/paths" - "github.com/databricks/cli/bundle/config/resources" "github.com/stretchr/testify/assert" ) -func TestVerifyUniqueResourceIdentifiers(t *testing.T) { - r := Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo.yml", - }, - }, - }, - Models: map[string]*resources.MlflowModel{ - "bar": { - Paths: paths.Paths{ - ConfigFilePath: "bar.yml", - }, - }, - }, - Experiments: map[string]*resources.MlflowExperiment{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo2.yml", - }, - }, - }, - } - _, err := r.VerifyUniqueResourceIdentifiers() - assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, mlflow_experiment at foo2.yml)") -} - -func TestVerifySafeMerge(t *testing.T) { - r := Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo.yml", - }, - }, - }, - Models: map[string]*resources.MlflowModel{ - "bar": { - Paths: paths.Paths{ - ConfigFilePath: "bar.yml", - }, - }, - }, - } - other := Resources{ - Pipelines: map[string]*resources.Pipeline{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo2.yml", - }, - }, - }, - } - err := r.VerifySafeMerge(&other) - assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, pipeline at foo2.yml)") -} - -func TestVerifySafeMergeForSameResourceType(t *testing.T) { - r := Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo.yml", - }, - }, - }, - Models: map[string]*resources.MlflowModel{ - "bar": { - Paths: paths.Paths{ - ConfigFilePath: "bar.yml", - }, - }, - }, - } - other := Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo2.yml", - }, - }, - }, - } - err := r.VerifySafeMerge(&other) - assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, job at foo2.yml)") -} - -func TestVerifySafeMergeForRegisteredModels(t *testing.T) { - r := Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo.yml", - }, - }, - }, - RegisteredModels: map[string]*resources.RegisteredModel{ - "bar": { - Paths: paths.Paths{ - ConfigFilePath: "bar.yml", - }, - }, - }, - } - other := Resources{ - RegisteredModels: map[string]*resources.RegisteredModel{ - "bar": { - Paths: paths.Paths{ - ConfigFilePath: "bar2.yml", - }, - }, - }, - } - err := r.VerifySafeMerge(&other) - assert.ErrorContains(t, err, "multiple resources named bar (registered_model at bar.yml, registered_model at bar2.yml)") -} - // This test ensures that all resources have a custom marshaller and unmarshaller. // This is required because DABs resources map to Databricks APIs, and they do so // by embedding the corresponding Go SDK structs. diff --git a/bundle/config/root.go b/bundle/config/root.go index 4239a34d..cace2215 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -100,11 +100,6 @@ func LoadFromBytes(path string, raw []byte) (*Root, diag.Diagnostics) { if err != nil { return nil, diag.Errorf("failed to load %s: %v", path, err) } - - _, err = r.Resources.VerifyUniqueResourceIdentifiers() - if err != nil { - diags = diags.Extend(diag.FromErr(err)) - } return &r, diags } @@ -281,12 +276,6 @@ func (r *Root) InitializeVariables(vars []string) error { } func (r *Root) Merge(other *Root) error { - // Check for safe merge, protecting against duplicate resource identifiers - err := r.Resources.VerifySafeMerge(&other.Resources) - if err != nil { - return err - } - // Merge dynamic configuration values. return r.Mutate(func(root dyn.Value) (dyn.Value, error) { return merge.Merge(root, other.value) diff --git a/bundle/config/root_test.go b/bundle/config/root_test.go index aed670d6..c95e6e86 100644 --- a/bundle/config/root_test.go +++ b/bundle/config/root_test.go @@ -30,22 +30,6 @@ func TestRootLoad(t *testing.T) { assert.Equal(t, "basic", root.Bundle.Name) } -func TestDuplicateIdOnLoadReturnsError(t *testing.T) { - _, diags := Load("./testdata/duplicate_resource_names_in_root/databricks.yml") - assert.ErrorContains(t, diags.Error(), "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/databricks.yml, pipeline at ./testdata/duplicate_resource_names_in_root/databricks.yml)") -} - -func TestDuplicateIdOnMergeReturnsError(t *testing.T) { - root, diags := Load("./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml") - require.NoError(t, diags.Error()) - - other, diags := Load("./testdata/duplicate_resource_name_in_subconfiguration/resources.yml") - require.NoError(t, diags.Error()) - - err := root.Merge(other) - assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)") -} - func TestInitializeVariables(t *testing.T) { fooDefault := "abc" root := &Root{ diff --git a/bundle/config/validate/unique_resource_keys.go b/bundle/config/validate/unique_resource_keys.go new file mode 100644 index 00000000..d6212b0a --- /dev/null +++ b/bundle/config/validate/unique_resource_keys.go @@ -0,0 +1,116 @@ +package validate + +import ( + "context" + "fmt" + "slices" + "sort" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +// This mutator validates that: +// +// 1. Each resource key is unique across different resource types. No two resources +// of the same type can have the same key. This is because command like "bundle run" +// rely on the resource key to identify the resource to run. +// Eg: jobs.foo and pipelines.foo are not allowed simultaneously. +// +// 2. Each resource definition is contained within a single file, and is not spread +// across multiple files. Note: This is not applicable to resource configuration +// defined in a target override. That is why this mutator MUST run before the target +// overrides are merged. +func UniqueResourceKeys() bundle.Mutator { + return &uniqueResourceKeys{} +} + +type uniqueResourceKeys struct{} + +func (m *uniqueResourceKeys) Name() string { + return "validate:unique_resource_keys" +} + +func (m *uniqueResourceKeys) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + diags := diag.Diagnostics{} + + type metadata struct { + locations []dyn.Location + paths []dyn.Path + } + + // Maps of resource key to the paths and locations the resource is defined at. + resourceMetadata := map[string]*metadata{} + + rv := b.Config.Value().Get("resources") + + // return early if no resources are defined or the resources block is empty. + if rv.Kind() == dyn.KindInvalid || rv.Kind() == dyn.KindNil { + return diags + } + + // Gather the paths and locations of all resources. + _, err := dyn.MapByPattern( + rv, + dyn.NewPattern(dyn.AnyKey(), dyn.AnyKey()), + func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + // The key for the resource. Eg: "my_job" for jobs.my_job. + k := p[1].Key() + + m, ok := resourceMetadata[k] + if !ok { + m = &metadata{ + paths: []dyn.Path{}, + locations: []dyn.Location{}, + } + } + + // dyn.Path under the hood is a slice. The code that walks the configuration + // tree uses the same underlying slice to track the path as it walks + // the tree. So, we need to clone it here. + m.paths = append(m.paths, slices.Clone(p)) + m.locations = append(m.locations, v.Locations()...) + + resourceMetadata[k] = m + return v, nil + }, + ) + if err != nil { + return diag.FromErr(err) + } + + for k, v := range resourceMetadata { + if len(v.locations) <= 1 { + continue + } + + // Sort the locations and paths for consistent error messages. This helps + // with unit testing. + sort.Slice(v.locations, func(i, j int) bool { + l1 := v.locations[i] + l2 := v.locations[j] + + if l1.File != l2.File { + return l1.File < l2.File + } + if l1.Line != l2.Line { + return l1.Line < l2.Line + } + return l1.Column < l2.Column + }) + sort.Slice(v.paths, func(i, j int) bool { + return v.paths[i].String() < v.paths[j].String() + }) + + // If there are multiple resources with the same key, report an error. + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("multiple resources have been defined with the same key: %s", k), + Locations: v.locations, + Paths: v.paths, + }) + } + + return diags +} diff --git a/bundle/tests/conflicting_resource_ids_test.go b/bundle/tests/conflicting_resource_ids_test.go deleted file mode 100644 index e7f0aa28..00000000 --- a/bundle/tests/conflicting_resource_ids_test.go +++ /dev/null @@ -1,42 +0,0 @@ -package config_tests - -import ( - "context" - "fmt" - "path/filepath" - "testing" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/phases" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestConflictingResourceIdsNoSubconfig(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./conflicting_resource_ids/no_subconfigurations") - require.NoError(t, err) - diags := bundle.Apply(ctx, b, phases.Load()) - bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/no_subconfigurations/databricks.yml") - assert.ErrorContains(t, diags.Error(), fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, bundleConfigPath)) -} - -func TestConflictingResourceIdsOneSubconfig(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./conflicting_resource_ids/one_subconfiguration") - require.NoError(t, err) - diags := bundle.Apply(ctx, b, phases.Load()) - bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/databricks.yml") - resourcesConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/resources.yml") - assert.ErrorContains(t, diags.Error(), fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, resourcesConfigPath)) -} - -func TestConflictingResourceIdsTwoSubconfigs(t *testing.T) { - ctx := context.Background() - b, err := bundle.Load(ctx, "./conflicting_resource_ids/two_subconfigurations") - require.NoError(t, err) - diags := bundle.Apply(ctx, b, phases.Load()) - resources1ConfigPath := filepath.FromSlash("conflicting_resource_ids/two_subconfigurations/resources1.yml") - resources2ConfigPath := filepath.FromSlash("conflicting_resource_ids/two_subconfigurations/resources2.yml") - assert.ErrorContains(t, diags.Error(), fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", resources1ConfigPath, resources2ConfigPath)) -} diff --git a/bundle/tests/conflicting_resource_ids/no_subconfigurations/databricks.yml b/bundle/tests/validate/duplicate_resource_name_in_multiple_locations/databricks.yml similarity index 53% rename from bundle/tests/conflicting_resource_ids/no_subconfigurations/databricks.yml rename to bundle/tests/validate/duplicate_resource_name_in_multiple_locations/databricks.yml index 1e9aa10b..ebb1f900 100644 --- a/bundle/tests/conflicting_resource_ids/no_subconfigurations/databricks.yml +++ b/bundle/tests/validate/duplicate_resource_name_in_multiple_locations/databricks.yml @@ -4,10 +4,10 @@ bundle: workspace: profile: test +include: + - ./*.yml + resources: jobs: foo: - name: job foo - pipelines: - foo: - name: pipeline foo + name: job foo 1 diff --git a/bundle/tests/conflicting_resource_ids/one_subconfiguration/resources.yml b/bundle/tests/validate/duplicate_resource_name_in_multiple_locations/resources1.yml similarity index 59% rename from bundle/tests/conflicting_resource_ids/one_subconfiguration/resources.yml rename to bundle/tests/validate/duplicate_resource_name_in_multiple_locations/resources1.yml index c3dcb6e2..deb81caa 100644 --- a/bundle/tests/conflicting_resource_ids/one_subconfiguration/resources.yml +++ b/bundle/tests/validate/duplicate_resource_name_in_multiple_locations/resources1.yml @@ -1,4 +1,8 @@ resources: + jobs: + foo: + name: job foo 2 + pipelines: foo: name: pipeline foo diff --git a/bundle/tests/validate/duplicate_resource_name_in_multiple_locations/resources2.yml b/bundle/tests/validate/duplicate_resource_name_in_multiple_locations/resources2.yml new file mode 100644 index 00000000..4e0a342b --- /dev/null +++ b/bundle/tests/validate/duplicate_resource_name_in_multiple_locations/resources2.yml @@ -0,0 +1,8 @@ +resources: + jobs: + foo: + name: job foo 3 + + experiments: + foo: + name: experiment foo diff --git a/bundle/tests/conflicting_resource_ids/one_subconfiguration/databricks.yml b/bundle/tests/validate/duplicate_resource_name_in_subconfiguration/databricks.yml similarity index 84% rename from bundle/tests/conflicting_resource_ids/one_subconfiguration/databricks.yml rename to bundle/tests/validate/duplicate_resource_name_in_subconfiguration/databricks.yml index ea4dec2e..5bec6748 100644 --- a/bundle/tests/conflicting_resource_ids/one_subconfiguration/databricks.yml +++ b/bundle/tests/validate/duplicate_resource_name_in_subconfiguration/databricks.yml @@ -5,7 +5,7 @@ workspace: profile: test include: - - "*.yml" + - ./resources.yml resources: jobs: diff --git a/bundle/config/testdata/duplicate_resource_name_in_subconfiguration/resources.yml b/bundle/tests/validate/duplicate_resource_name_in_subconfiguration/resources.yml similarity index 100% rename from bundle/config/testdata/duplicate_resource_name_in_subconfiguration/resources.yml rename to bundle/tests/validate/duplicate_resource_name_in_subconfiguration/resources.yml diff --git a/bundle/config/testdata/duplicate_resource_name_in_subconfiguration/databricks.yml b/bundle/tests/validate/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml similarity index 76% rename from bundle/config/testdata/duplicate_resource_name_in_subconfiguration/databricks.yml rename to bundle/tests/validate/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml index a8160292..5bec6748 100644 --- a/bundle/config/testdata/duplicate_resource_name_in_subconfiguration/databricks.yml +++ b/bundle/tests/validate/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml @@ -4,6 +4,9 @@ bundle: workspace: profile: test +include: + - ./resources.yml + resources: jobs: foo: diff --git a/bundle/tests/validate/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml b/bundle/tests/validate/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml new file mode 100644 index 00000000..83fb7573 --- /dev/null +++ b/bundle/tests/validate/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml @@ -0,0 +1,4 @@ +resources: + jobs: + foo: + name: job foo 2 diff --git a/bundle/tests/conflicting_resource_ids/two_subconfigurations/databricks.yml b/bundle/tests/validate/duplicate_resource_names_in_different_subconfiguations/databricks.yml similarity index 100% rename from bundle/tests/conflicting_resource_ids/two_subconfigurations/databricks.yml rename to bundle/tests/validate/duplicate_resource_names_in_different_subconfiguations/databricks.yml diff --git a/bundle/tests/conflicting_resource_ids/two_subconfigurations/resources1.yml b/bundle/tests/validate/duplicate_resource_names_in_different_subconfiguations/resources1.yml similarity index 100% rename from bundle/tests/conflicting_resource_ids/two_subconfigurations/resources1.yml rename to bundle/tests/validate/duplicate_resource_names_in_different_subconfiguations/resources1.yml diff --git a/bundle/tests/conflicting_resource_ids/two_subconfigurations/resources2.yml b/bundle/tests/validate/duplicate_resource_names_in_different_subconfiguations/resources2.yml similarity index 100% rename from bundle/tests/conflicting_resource_ids/two_subconfigurations/resources2.yml rename to bundle/tests/validate/duplicate_resource_names_in_different_subconfiguations/resources2.yml diff --git a/bundle/tests/validate/duplicate_resource_names_in_root_job_and_experiment/databricks.yml b/bundle/tests/validate/duplicate_resource_names_in_root_job_and_experiment/databricks.yml new file mode 100644 index 00000000..d286f104 --- /dev/null +++ b/bundle/tests/validate/duplicate_resource_names_in_root_job_and_experiment/databricks.yml @@ -0,0 +1,18 @@ +bundle: + name: test + +workspace: + profile: test + +resources: + jobs: + foo: + name: job foo + bar: + name: job bar + pipelines: + baz: + name: pipeline baz + experiments: + foo: + name: experiment foo diff --git a/bundle/config/testdata/duplicate_resource_names_in_root/databricks.yml b/bundle/tests/validate/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml similarity index 100% rename from bundle/config/testdata/duplicate_resource_names_in_root/databricks.yml rename to bundle/tests/validate/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml diff --git a/bundle/tests/validate_test.go b/bundle/tests/validate_test.go new file mode 100644 index 00000000..9cd7c201 --- /dev/null +++ b/bundle/tests/validate_test.go @@ -0,0 +1,139 @@ +package config_tests + +import ( + "context" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/phases" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestValidateUniqueResourceIdentifiers(t *testing.T) { + tcases := []struct { + name string + diagnostics diag.Diagnostics + }{ + { + name: "duplicate_resource_names_in_root_job_and_pipeline", + diagnostics: diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "multiple resources have been defined with the same key: foo", + Locations: []dyn.Location{ + {File: filepath.FromSlash("validate/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml"), Line: 10, Column: 7}, + {File: filepath.FromSlash("validate/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml"), Line: 13, Column: 7}, + }, + Paths: []dyn.Path{ + dyn.MustPathFromString("jobs.foo"), + dyn.MustPathFromString("pipelines.foo"), + }, + }, + }, + }, + { + name: "duplicate_resource_names_in_root_job_and_experiment", + diagnostics: diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "multiple resources have been defined with the same key: foo", + Locations: []dyn.Location{ + {File: filepath.FromSlash("validate/duplicate_resource_names_in_root_job_and_experiment/databricks.yml"), Line: 10, Column: 7}, + {File: filepath.FromSlash("validate/duplicate_resource_names_in_root_job_and_experiment/databricks.yml"), Line: 18, Column: 7}, + }, + Paths: []dyn.Path{ + dyn.MustPathFromString("experiments.foo"), + dyn.MustPathFromString("jobs.foo"), + }, + }, + }, + }, + { + name: "duplicate_resource_name_in_subconfiguration", + diagnostics: diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "multiple resources have been defined with the same key: foo", + Locations: []dyn.Location{ + {File: filepath.FromSlash("validate/duplicate_resource_name_in_subconfiguration/databricks.yml"), Line: 13, Column: 7}, + {File: filepath.FromSlash("validate/duplicate_resource_name_in_subconfiguration/resources.yml"), Line: 4, Column: 7}, + }, + Paths: []dyn.Path{ + dyn.MustPathFromString("jobs.foo"), + dyn.MustPathFromString("pipelines.foo"), + }, + }, + }, + }, + { + name: "duplicate_resource_name_in_subconfiguration_job_and_job", + diagnostics: diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "multiple resources have been defined with the same key: foo", + Locations: []dyn.Location{ + {File: filepath.FromSlash("validate/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml"), Line: 13, Column: 7}, + {File: filepath.FromSlash("validate/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml"), Line: 4, Column: 7}, + }, + Paths: []dyn.Path{ + dyn.MustPathFromString("jobs.foo"), + }, + }, + }, + }, + { + name: "duplicate_resource_names_in_different_subconfiguations", + diagnostics: diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "multiple resources have been defined with the same key: foo", + Locations: []dyn.Location{ + {File: filepath.FromSlash("validate/duplicate_resource_names_in_different_subconfiguations/resources1.yml"), Line: 4, Column: 7}, + {File: filepath.FromSlash("validate/duplicate_resource_names_in_different_subconfiguations/resources2.yml"), Line: 4, Column: 7}, + }, + Paths: []dyn.Path{ + dyn.MustPathFromString("jobs.foo"), + dyn.MustPathFromString("pipelines.foo"), + }, + }, + }, + }, + { + name: "duplicate_resource_name_in_multiple_locations", + diagnostics: diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "multiple resources have been defined with the same key: foo", + Locations: []dyn.Location{ + {File: filepath.FromSlash("validate/duplicate_resource_name_in_multiple_locations/databricks.yml"), Line: 13, Column: 7}, + {File: filepath.FromSlash("validate/duplicate_resource_name_in_multiple_locations/resources1.yml"), Line: 4, Column: 7}, + {File: filepath.FromSlash("validate/duplicate_resource_name_in_multiple_locations/resources1.yml"), Line: 8, Column: 7}, + {File: filepath.FromSlash("validate/duplicate_resource_name_in_multiple_locations/resources2.yml"), Line: 4, Column: 7}, + {File: filepath.FromSlash("validate/duplicate_resource_name_in_multiple_locations/resources2.yml"), Line: 8, Column: 7}, + }, + Paths: []dyn.Path{ + dyn.MustPathFromString("experiments.foo"), + dyn.MustPathFromString("jobs.foo"), + dyn.MustPathFromString("pipelines.foo"), + }, + }, + }, + }, + } + + for _, tc := range tcases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + b, err := bundle.Load(ctx, "./validate/"+tc.name) + require.NoError(t, err) + + // The UniqueResourceKeys mutator is run as part of the Load phase. + diags := bundle.Apply(ctx, b, phases.Load()) + assert.Equal(t, tc.diagnostics, diags) + }) + } +} diff --git a/libs/dyn/mapping.go b/libs/dyn/mapping.go index 668f57ec..f9f2d2e9 100644 --- a/libs/dyn/mapping.go +++ b/libs/dyn/mapping.go @@ -46,7 +46,8 @@ func newMappingFromGoMap(vin map[string]Value) Mapping { return m } -// Pairs returns all the key-value pairs in the Mapping. +// Pairs returns all the key-value pairs in the Mapping. The pairs are sorted by +// their key in lexicographic order. func (m Mapping) Pairs() []Pair { return m.pairs }