diff --git a/bundle/config/mutator/validate_unique_resource_keys.go b/bundle/config/mutator/validate_unique_resource_keys.go index 6698b822..d3cb5446 100644 --- a/bundle/config/mutator/validate_unique_resource_keys.go +++ b/bundle/config/mutator/validate_unique_resource_keys.go @@ -21,6 +21,9 @@ func (m *validateUniqueResourceKeys) Name() string { return "ValidateUniqueResourceKeys" } +// TODO: Make this a readonly mutator. +// TODO: Make this a bit terser. + // TODO: Ensure all duplicate key sites are returned to the user in the diagnostics. func (m *validateUniqueResourceKeys) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { diags := diag.Diagnostics{} @@ -30,7 +33,7 @@ func (m *validateUniqueResourceKeys) Apply(ctx context.Context, b *bundle.Bundle rv := v.Get("resources") // Walk the resources tree and accumulate the resource identifiers. - return dyn.Walk(rv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + _, err := dyn.Walk(rv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { // The path is expected to be of length 2, and of the form .. // Eg: jobs.my_job, pipelines.my_pipeline, etc. if len(p) < 2 { @@ -56,6 +59,7 @@ func (m *validateUniqueResourceKeys) Apply(ctx context.Context, b *bundle.Bundle diags = append(diags, diag.Diagnostic{ Severity: diag.Error, Summary: fmt.Sprintf("multiple resources named %s (%s at %s, %s at %s)", k, paths[k].String(), nl, p.String(), ol), + Location: nl, }) } @@ -63,6 +67,7 @@ func (m *validateUniqueResourceKeys) Apply(ctx context.Context, b *bundle.Bundle paths[k] = p return v, nil }) + return v, err }) if err != nil { diags = append(diags, diag.FromErr(err)...) diff --git a/bundle/config/root.go b/bundle/config/root.go index 8bebf648..1447dd35 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -96,12 +96,6 @@ func Load(path string) (*Root, diag.Diagnostics) { if err != nil { return nil, diag.Errorf("failed to load %s: %v", path, err) } - - // Error if there are duplicate resource identifiers in the config file. - _, err = r.gatherResourceIdentifiers() - if err != nil { - diags = diags.Extend(diag.FromErr(err)) - } return &r, diags } @@ -266,10 +260,10 @@ func (r *Root) InitializeVariables(vars []string) error { func (r *Root) Merge(other *Root) error { // Check for safe merge, protecting against duplicate resource identifiers - err := r.verifySafeMerge(*other) - if err != nil { - return err - } + // err := r.verifySafeMerge(*other) + // if err != nil { + // return err + // } // Merge dynamic configuration values. return r.Mutate(func(root dyn.Value) (dyn.Value, error) { @@ -465,81 +459,87 @@ func (r Root) GetLocation(path string) dyn.Location { return v.Location() } -// This function verifies that the merge of two Root objects is safe. It checks -// there will be no duplicate resource identifiers in the merged configuration. -func (r Root) verifySafeMerge(other Root) error { - paths, err := r.gatherResourceIdentifiers() - if err != nil { - return err - } - - otherPaths, err := other.gatherResourceIdentifiers() - if err != nil { - return err - } - - // If duplicate keys exist, return - for k, p := range paths { - if _, ok := otherPaths[k]; ok { - // Type and location of the existing resource in the map. - ot := strings.TrimSuffix(p[0].Key(), "s") - ov, _ := dyn.GetByPath(r.value.Get("resources"), p) - ol := ov.Location() - - // Type and location of the newly encountered resource with a duplicate name. - nt := strings.TrimSuffix(otherPaths[k][0].Key(), "s") - nv, _ := dyn.GetByPath(other.value.Get("resources"), otherPaths[k]) - nl := nv.Location() - - // Error, encountered a duplicate resource identifier. - return fmt.Errorf("multiple resources named %s (%s at %v, %s at %v)", k, ot, ol, nt, nl) - } - } - return nil +// Get the dynamic value of the configuration at the specified path for read-only access. +// Use the Mutate method to modify the configuration. +func (r Root) ReadOnlyValue() dyn.Value { + return r.value } -// This function gathers the resource identifiers, which exist in the bundle configuration -// in the form: resources... -// -// It returns an error if it encounters duplicate resource identifiers. -// -// Otherwise it returns a map of resource identifiers to their paths in the configuration tree, -// relative to the resources key. -func (r Root) gatherResourceIdentifiers() (map[string]dyn.Path, error) { - paths := make(map[string]dyn.Path) - rrv := r.value.Get("resources") +// // This function verifies that the merge of two Root objects is safe. It checks +// // there will be no duplicate resource identifiers in the merged configuration. +// func (r Root) verifySafeMerge(other Root) error { +// paths, err := r.gatherResourceIdentifiers() +// if err != nil { +// return err +// } - // Walk the resources tree and accumulate the resource identifiers. - _, err := dyn.Walk(rrv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { - // The path is expected to be of length 2, and of the form .. - // Eg: jobs.my_job, pipelines.my_pipeline, etc. - if len(p) < 2 { - return v, nil - } - if len(p) > 2 { - return v, dyn.ErrSkip - } +// otherPaths, err := other.gatherResourceIdentifiers() +// if err != nil { +// return err +// } - // If the resource identifier already exists in the map, return an error. - k := p[1].Key() - if _, ok := paths[k]; ok { - // Type and location of the existing resource in the map. - ot := strings.TrimSuffix(paths[k][0].Key(), "s") - ov, _ := dyn.GetByPath(rrv, paths[k]) - ol := ov.Location() +// // If duplicate keys exist, return +// for k, p := range paths { +// if _, ok := otherPaths[k]; ok { +// // Type and location of the existing resource in the map. +// ot := strings.TrimSuffix(p[0].Key(), "s") +// ov, _ := dyn.GetByPath(r.value.Get("resources"), p) +// ol := ov.Location() - // Type and location of the newly encountered with a duplicate name. - nt := strings.TrimSuffix(p[0].Key(), "s") - nv, _ := dyn.GetByPath(rrv, p) - nl := nv.Location() +// // Type and location of the newly encountered resource with a duplicate name. +// nt := strings.TrimSuffix(otherPaths[k][0].Key(), "s") +// nv, _ := dyn.GetByPath(other.value.Get("resources"), otherPaths[k]) +// nl := nv.Location() - // Error, encountered a duplicate resource identifier. - return v, fmt.Errorf("multiple resources named %s (%s at %v, %s at %v)", k, ot, ol, nt, nl) - } +// // Error, encountered a duplicate resource identifier. +// return fmt.Errorf("multiple resources named %s (%s at %v, %s at %v)", k, ot, ol, nt, nl) +// } +// } +// return nil +// } - // Accumulate the resource identifier and its path. - paths[k] = p - return v, nil - }) - return paths, err -} +// // This function gathers the resource identifiers, which exist in the bundle configuration +// // in the form: resources... +// // +// // It returns an error if it encounters duplicate resource identifiers. +// // +// // Otherwise it returns a map of resource identifiers to their paths in the configuration tree, +// // relative to the resources key. +// func (r Root) gatherResourceIdentifiers() (map[string]dyn.Path, error) { +// paths := make(map[string]dyn.Path) +// rrv := r.value.Get("resources") + +// // Walk the resources tree and accumulate the resource identifiers. +// _, err := dyn.Walk(rrv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { +// // The path is expected to be of length 2, and of the form .. +// // Eg: jobs.my_job, pipelines.my_pipeline, etc. +// if len(p) < 2 { +// return v, nil +// } +// if len(p) > 2 { +// return v, dyn.ErrSkip +// } + +// // If the resource identifier already exists in the map, return an error. +// k := p[1].Key() +// if _, ok := paths[k]; ok { +// // Type and location of the existing resource in the map. +// ot := strings.TrimSuffix(paths[k][0].Key(), "s") +// ov, _ := dyn.GetByPath(rrv, paths[k]) +// ol := ov.Location() + +// // Type and location of the newly encountered with a duplicate name. +// nt := strings.TrimSuffix(p[0].Key(), "s") +// nv, _ := dyn.GetByPath(rrv, p) +// nl := nv.Location() + +// // Error, encountered a duplicate resource identifier. +// return v, fmt.Errorf("multiple resources named %s (%s at %v, %s at %v)", k, ot, ol, nt, nl) +// } + +// // Accumulate the resource identifier and its path. +// paths[k] = p +// return v, nil +// }) +// return paths, err +// } diff --git a/bundle/config/root_test.go b/bundle/config/root_test.go index 6b2b2ba0..77e067f0 100644 --- a/bundle/config/root_test.go +++ b/bundle/config/root_test.go @@ -1,12 +1,13 @@ package config import ( + "context" "encoding/json" "reflect" "testing" + "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config/variable" - "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -32,7 +33,11 @@ func TestRootLoad(t *testing.T) { } func TestDuplicateIdOnLoadReturnsErrorForJobAndPipeline(t *testing.T) { - _, diags := Load("./testdata/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml") + b, diags := Load("./testdata/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml") + assert.NoError(t, diags.Error()) + + diags = bundle.Apply(context.Background(), b, validate.PreInitialize()) + assert.ErrorContains(t, diags.Error(), "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml:10:7, pipeline at ./testdata/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml:13:7)") } @@ -63,24 +68,6 @@ func TestDuplicateIdOnMergeReturnsErrorForJobAndJob(t *testing.T) { assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml:10:7, job at ./testdata/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml:4:7)") } -func TestGatherResourceIdentifiers(t *testing.T) { - root, diags := Load("./testdata/gather_resource_identifiers/databricks.yml") - require.NoError(t, diags.Error()) - - actual, err := root.gatherResourceIdentifiers() - assert.NoError(t, err) - - expected := map[string]dyn.Path{ - "foo": dyn.MustPathFromString("jobs.foo"), - "bar": dyn.MustPathFromString("jobs.bar"), - "zab": dyn.MustPathFromString("pipelines.zab"), - "baz": dyn.MustPathFromString("pipelines.baz"), - "zaz": dyn.MustPathFromString("experiments.zaz"), - "zuz": dyn.MustPathFromString("experiments.zuz"), - } - assert.Equal(t, expected, actual) -} - func TestInitializeVariables(t *testing.T) { fooDefault := "abc" root := &Root{ diff --git a/bundle/config/validate/pre_initialize.go b/bundle/config/validate/pre_initialize.go new file mode 100644 index 00000000..d18050b8 --- /dev/null +++ b/bundle/config/validate/pre_initialize.go @@ -0,0 +1,28 @@ +package validate + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" +) + +type preInitialize struct{} + +// Apply implements bundle.Mutator. +func (v *preInitialize) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + return bundle.ApplyReadOnly(ctx, bundle.ReadOnly(b), bundle.Parallel( + UniqueResourceKeys(), + )) +} + +// Name implements bundle.Mutator. +func (v *preInitialize) Name() string { + return "validate:pre_initialize" +} + +// Validations to perform before initialization of the bundle. These validations +// are thus applied for most bundle commands. +func PreInitialize() bundle.Mutator { + return &preInitialize{} +} diff --git a/bundle/config/validate/unique_resource_keys.go b/bundle/config/validate/unique_resource_keys.go new file mode 100644 index 00000000..86df0988 --- /dev/null +++ b/bundle/config/validate/unique_resource_keys.go @@ -0,0 +1,69 @@ +package validate + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +// TODO: Ensure the solution here works for dup keys in the same resource type. + +func UniqueResourceKeys() bundle.ReadOnlyMutator { + return &uniqueResourceKeys{} +} + +type uniqueResourceKeys struct{} + +func (m *uniqueResourceKeys) Name() string { + return "validate:unique_resource_keys" +} + +// TODO: Ensure all duplicate key sites are returned to the user in the diagnostics. +func (m *uniqueResourceKeys) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics { + diags := diag.Diagnostics{} + + paths := make(map[string]dyn.Path) + rv := rb.Config().ReadOnlyValue().Get("resources") + + // Walk the resources tree and accumulate the resource identifiers. + _, err := dyn.Walk(rv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + // The path is expected to be of length 2, and of the form .. + // Eg: jobs.my_job, pipelines.my_pipeline, etc. + if len(p) < 2 { + return v, nil + } + if len(p) > 2 { + return v, dyn.ErrSkip + } + + // If the resource identifier already exists in the map, return an error. + k := p[1].Key() + if _, ok := paths[k]; ok { + // Value of the resource that's already been seen + ov, _ := dyn.GetByPath(rv, paths[k]) + + // Value of the newly encountered resource with a duplicate identifier. + nv, _ := dyn.GetByPath(rv, p) + + // Error, encountered a duplicate resource identifier. + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("multiple resources named %s (%s at %s, %s at %s)", k, paths[k].String(), ov.Location(), p.String(), nv.Location()), + Location: nv.Location(), + }) + } + + // Accumulate the resource identifier and its path. + paths[k] = p + return v, nil + }) + + if err != nil { + diags = append(diags, diag.FromErr(err)...) + } + + return diags +} diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index d15c2323..0710c955 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -4,6 +4,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/config/validate" "github.com/databricks/cli/bundle/deploy/metadata" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/permissions" @@ -17,7 +18,7 @@ func Initialize() bundle.Mutator { return newPhase( "initialize", []bundle.Mutator{ - mutator.ValidateUniqueResourceKeys(), + validate.PreInitialize(), mutator.RewriteSyncPaths(), mutator.MergeJobClusters(), mutator.MergeJobTasks(),