From 557223e165b97ee780c2422517f10c095a9d76e8 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 2 May 2024 16:16:17 +0200 Subject: [PATCH] - --- bundle/config/resources.go | 108 --------------- bundle/config/resources/schema.go | 25 ---- bundle/config/resources_test.go | 127 ------------------ bundle/config/root.go | 86 +++++++++++- bundle/config/root_test.go | 26 +++- .../databricks.yml | 10 ++ .../resources.yml | 4 + .../databricks.yml | 18 +++ .../databricks.yml | 4 + 9 files changed, 141 insertions(+), 267 deletions(-) delete mode 100644 bundle/config/resources/schema.go delete mode 100644 bundle/config/resources_test.go create mode 100644 bundle/config/testdata/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml create mode 100644 bundle/config/testdata/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml create mode 100644 bundle/config/testdata/duplicate_resource_names_in_root_job_and_experiment/databricks.yml rename bundle/config/testdata/{duplicate_resource_names_in_root => duplicate_resource_names_in_root_job_and_pipeline}/databricks.yml (69%) diff --git a/bundle/config/resources.go b/bundle/config/resources.go index f79a9714..aa945df4 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -17,114 +17,6 @@ type Resources struct { Experiments map[string]*resources.MlflowExperiment `json:"experiments,omitempty"` ModelServingEndpoints map[string]*resources.ModelServingEndpoint `json:"model_serving_endpoints,omitempty"` RegisteredModels map[string]*resources.RegisteredModel `json:"registered_models,omitempty"` - Schemas map[string]*resources.Schema `json:"schemas,omitempty"` -} - -type UniqueResourceIdTracker struct { - Type map[string]string - ConfigPath map[string]string -} - -// verifies merging is safe by checking no duplicate identifiers exist -func (r *Resources) VerifySafeMerge(other *Resources) error { - rootTracker, err := r.VerifyUniqueResourceIdentifiers() - if err != nil { - return err - } - otherTracker, err := other.VerifyUniqueResourceIdentifiers() - if err != nil { - return err - } - for k := range otherTracker.Type { - if _, ok := rootTracker.Type[k]; ok { - return fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - rootTracker.Type[k], - rootTracker.ConfigPath[k], - otherTracker.Type[k], - otherTracker.ConfigPath[k], - ) - } - } - return nil -} - -// This function verifies there are no duplicate names used for the resource definations -func (r *Resources) VerifyUniqueResourceIdentifiers() (*UniqueResourceIdTracker, error) { - tracker := &UniqueResourceIdTracker{ - Type: make(map[string]string), - ConfigPath: make(map[string]string), - } - for k := range r.Jobs { - tracker.Type[k] = "job" - tracker.ConfigPath[k] = r.Jobs[k].ConfigFilePath - } - for k := range r.Pipelines { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "pipeline", - r.Pipelines[k].ConfigFilePath, - ) - } - tracker.Type[k] = "pipeline" - tracker.ConfigPath[k] = r.Pipelines[k].ConfigFilePath - } - for k := range r.Models { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "mlflow_model", - r.Models[k].ConfigFilePath, - ) - } - tracker.Type[k] = "mlflow_model" - tracker.ConfigPath[k] = r.Models[k].ConfigFilePath - } - for k := range r.Experiments { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "mlflow_experiment", - r.Experiments[k].ConfigFilePath, - ) - } - tracker.Type[k] = "mlflow_experiment" - tracker.ConfigPath[k] = r.Experiments[k].ConfigFilePath - } - for k := range r.ModelServingEndpoints { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "model_serving_endpoint", - r.ModelServingEndpoints[k].ConfigFilePath, - ) - } - tracker.Type[k] = "model_serving_endpoint" - tracker.ConfigPath[k] = r.ModelServingEndpoints[k].ConfigFilePath - } - for k := range r.RegisteredModels { - if _, ok := tracker.Type[k]; ok { - return tracker, fmt.Errorf("multiple resources named %s (%s at %s, %s at %s)", - k, - tracker.Type[k], - tracker.ConfigPath[k], - "registered_model", - r.RegisteredModels[k].ConfigFilePath, - ) - } - tracker.Type[k] = "registered_model" - tracker.ConfigPath[k] = r.RegisteredModels[k].ConfigFilePath - } - return tracker, nil } // ConfigureConfigFilePath sets the specified path for all resources contained in this instance. diff --git a/bundle/config/resources/schema.go b/bundle/config/resources/schema.go deleted file mode 100644 index d8f50a59..00000000 --- a/bundle/config/resources/schema.go +++ /dev/null @@ -1,25 +0,0 @@ -package resources - -import ( - "github.com/databricks/cli/bundle/config/paths" - "github.com/databricks/databricks-sdk-go/service/catalog" -) - -type Schema struct { - // List of grants to apply on this schema. - Grants []Grant `json:"grants,omitempty"` - - // This represents the id which is the full name of the schema - // (catalog_name.schema_name) that can be used - // as a reference in other resources. This value is returned by terraform. - // TODO: verify the accuracy of this comment - ID string `json:"id,omitempty" bundle:"readonly"` - - // Path to config file where the resource is defined. All bundle resources - // include this for interpolation purposes. - paths.Paths - - *catalog.CreateSchema - - ModifiedStatus ModifiedStatus `json:"modified_status,omitempty" bundle:"internal"` -} diff --git a/bundle/config/resources_test.go b/bundle/config/resources_test.go deleted file mode 100644 index 9c4104e4..00000000 --- a/bundle/config/resources_test.go +++ /dev/null @@ -1,127 +0,0 @@ -package config - -import ( - "testing" - - "github.com/databricks/cli/bundle/config/paths" - "github.com/databricks/cli/bundle/config/resources" - "github.com/stretchr/testify/assert" -) - -func TestVerifyUniqueResourceIdentifiers(t *testing.T) { - r := Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo.yml", - }, - }, - }, - Models: map[string]*resources.MlflowModel{ - "bar": { - Paths: paths.Paths{ - ConfigFilePath: "bar.yml", - }, - }, - }, - Experiments: map[string]*resources.MlflowExperiment{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo2.yml", - }, - }, - }, - } - _, err := r.VerifyUniqueResourceIdentifiers() - assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, mlflow_experiment at foo2.yml)") -} - -func TestVerifySafeMerge(t *testing.T) { - r := Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo.yml", - }, - }, - }, - Models: map[string]*resources.MlflowModel{ - "bar": { - Paths: paths.Paths{ - ConfigFilePath: "bar.yml", - }, - }, - }, - } - other := Resources{ - Pipelines: map[string]*resources.Pipeline{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo2.yml", - }, - }, - }, - } - err := r.VerifySafeMerge(&other) - assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, pipeline at foo2.yml)") -} - -func TestVerifySafeMergeForSameResourceType(t *testing.T) { - r := Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo.yml", - }, - }, - }, - Models: map[string]*resources.MlflowModel{ - "bar": { - Paths: paths.Paths{ - ConfigFilePath: "bar.yml", - }, - }, - }, - } - other := Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo2.yml", - }, - }, - }, - } - err := r.VerifySafeMerge(&other) - assert.ErrorContains(t, err, "multiple resources named foo (job at foo.yml, job at foo2.yml)") -} - -func TestVerifySafeMergeForRegisteredModels(t *testing.T) { - r := Resources{ - Jobs: map[string]*resources.Job{ - "foo": { - Paths: paths.Paths{ - ConfigFilePath: "foo.yml", - }, - }, - }, - RegisteredModels: map[string]*resources.RegisteredModel{ - "bar": { - Paths: paths.Paths{ - ConfigFilePath: "bar.yml", - }, - }, - }, - } - other := Resources{ - RegisteredModels: map[string]*resources.RegisteredModel{ - "bar": { - Paths: paths.Paths{ - ConfigFilePath: "bar2.yml", - }, - }, - }, - } - err := r.VerifySafeMerge(&other) - assert.ErrorContains(t, err, "multiple resources named bar (registered_model at bar.yml, registered_model at bar2.yml)") -} diff --git a/bundle/config/root.go b/bundle/config/root.go index fda3759d..6a6daf46 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -97,7 +97,8 @@ func Load(path string) (*Root, diag.Diagnostics) { return nil, diag.Errorf("failed to load %s: %v", path, err) } - _, err = r.Resources.VerifyUniqueResourceIdentifiers() + // Error if there are duplicate resource identifiers in the config file. + _, err = r.gatherResourceIdentifiers() if err != nil { diags = diags.Extend(diag.FromErr(err)) } @@ -265,7 +266,7 @@ func (r *Root) InitializeVariables(vars []string) error { func (r *Root) Merge(other *Root) error { // Check for safe merge, protecting against duplicate resource identifiers - err := r.Resources.VerifySafeMerge(&other.Resources) + err := r.verifySafeMerge(*other) if err != nil { return err } @@ -463,3 +464,84 @@ func (r Root) GetLocation(path string) dyn.Location { } return v.Location() } + +// This function verifies that the merge of two Root objects is safe. It checks +// there will be no duplicate resource identifiers in the merged configuration. +func (r Root) verifySafeMerge(other Root) error { + paths, err := r.gatherResourceIdentifiers() + if err != nil { + return err + } + + otherPaths, err := other.gatherResourceIdentifiers() + if err != nil { + return err + } + + // If duplicate keys exist, return + for k, p := range paths { + if _, ok := otherPaths[k]; ok { + // Type and location of the existing resource in the map. + ot := strings.TrimSuffix(paths[k][0].Key(), "s") + ov, _ := dyn.GetByPath(r.value.Get("resources"), paths[k]) + ol := ov.Location() + + // Type and location of the newly encountered resource with a duplicate name. + nt := strings.TrimSuffix(p[0].Key(), "s") + nv, _ := dyn.GetByPath(r.value.Get("resources"), p) + nl := nv.Location() + + // Error, encountered a duplicate resource identifier. + return fmt.Errorf("multiple resources named %s (%s at %v, %s at %v)", k, ot, ol, nt, nl) + } + } + return nil +} + +// This function gathers the resource identifiers, which exist in the bundle configuration +// in the form: resources... +// +// It returns an error if it encounters a duplicate resource identifiers. +// +// Otherwise it returns a map of resource identifiers to their paths in the configuration tree +// relative to the resources key. +func (r Root) gatherResourceIdentifiers() (map[string]dyn.Path, error) { + paths := make(map[string]dyn.Path) + rrv := r.value.Get("resources") + + // Walk the resources tree and accumulate the resource identifiers. + _, err := dyn.Walk(rrv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + // The path is expected to be of length 2, and of the form .. + // Eg: jobs.my_job, pipelines.my_pipeline, etc. + if len(p) < 2 { + return v, nil + } + if len(p) > 2 { + return v, dyn.ErrSkip + } + + // TODO: Add validation that the resource is a map. + + // If the resource identifier already exists in the map, return an error. + k := p[1].Key() + if _, ok := paths[k]; ok { + // Type and location of the existing resource in the map. + ot := strings.TrimSuffix(paths[k][0].Key(), "s") + ov, _ := dyn.GetByPath(rrv, paths[k]) + ol := ov.Location() + + // Type and location of the newly encountered with a duplicate name. + nt := strings.TrimSuffix(p[0].Key(), "s") + nv, _ := dyn.GetByPath(rrv, p) + nl := nv.Location() + + // Error, encountered a duplicate resource identifier. + return v, fmt.Errorf("multiple resources named %s (%s at %v, %s at %v)", k, ot, ol, nt, nl) + } + + // Accumulate the resource identifier and its path. + paths[k] = p + return v, nil + }) + return paths, err +} diff --git a/bundle/config/root_test.go b/bundle/config/root_test.go index b5676884..a6a1cdce 100644 --- a/bundle/config/root_test.go +++ b/bundle/config/root_test.go @@ -30,12 +30,17 @@ func TestRootLoad(t *testing.T) { assert.Equal(t, "basic", root.Bundle.Name) } -func TestDuplicateIdOnLoadReturnsError(t *testing.T) { - _, diags := Load("./testdata/duplicate_resource_names_in_root/databricks.yml") - assert.ErrorContains(t, diags.Error(), "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/databricks.yml, pipeline at ./testdata/duplicate_resource_names_in_root/databricks.yml)") +func TestDuplicateIdOnLoadReturnsErrorForJobAndPipeline(t *testing.T) { + _, diags := Load("./testdata/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml") + assert.ErrorContains(t, diags.Error(), "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml:10:7, pipeline at ./testdata/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml:15:7)") } -func TestDuplicateIdOnMergeReturnsError(t *testing.T) { +func TestDuplicateIdOnLoadReturnsErrorForJobsAndExperiments(t *testing.T) { + _, diags := Load("./testdata/duplicate_resource_names_in_root_job_and_experiment/databricks.yml") + assert.ErrorContains(t, diags.Error(), "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root_jobs_and_experiments/databricks.yml:10:7, experiment at ./testdata/duplicate_resource_names_in_root_jobs_and_experiments/databricks.yml:18:7)") +} + +func TestDuplicateIdOnMergeReturnsErrorForJobAndPipeline(t *testing.T) { root, diags := Load("./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml") require.NoError(t, diags.Error()) @@ -43,7 +48,18 @@ func TestDuplicateIdOnMergeReturnsError(t *testing.T) { require.NoError(t, diags.Error()) err := root.Merge(other) - assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)") + assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml:10:7, job at ./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml:10:7)") +} + +func TestDuplicateIdOnMergeReturnsErrorForJobAndJob(t *testing.T) { + root, diags := Load("./testdata/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml") + require.NoError(t, diags.Error()) + + other, diags := Load("./testdata/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml") + require.NoError(t, diags.Error()) + + err := root.Merge(other) + assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml:10:7, job at ./testdata/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml:10:7)") } func TestInitializeVariables(t *testing.T) { diff --git a/bundle/config/testdata/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml b/bundle/config/testdata/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml new file mode 100644 index 00000000..a8160292 --- /dev/null +++ b/bundle/config/testdata/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: test + +workspace: + profile: test + +resources: + jobs: + foo: + name: job foo diff --git a/bundle/config/testdata/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml b/bundle/config/testdata/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml new file mode 100644 index 00000000..f644bc84 --- /dev/null +++ b/bundle/config/testdata/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml @@ -0,0 +1,4 @@ +resources: + jobs: + foo: + name: pipeline foo diff --git a/bundle/config/testdata/duplicate_resource_names_in_root_job_and_experiment/databricks.yml b/bundle/config/testdata/duplicate_resource_names_in_root_job_and_experiment/databricks.yml new file mode 100644 index 00000000..d286f104 --- /dev/null +++ b/bundle/config/testdata/duplicate_resource_names_in_root_job_and_experiment/databricks.yml @@ -0,0 +1,18 @@ +bundle: + name: test + +workspace: + profile: test + +resources: + jobs: + foo: + name: job foo + bar: + name: job bar + pipelines: + baz: + name: pipeline baz + experiments: + foo: + name: experiment foo diff --git a/bundle/config/testdata/duplicate_resource_names_in_root/databricks.yml b/bundle/config/testdata/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml similarity index 69% rename from bundle/config/testdata/duplicate_resource_names_in_root/databricks.yml rename to bundle/config/testdata/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml index 1e9aa10b..9189fe4e 100644 --- a/bundle/config/testdata/duplicate_resource_names_in_root/databricks.yml +++ b/bundle/config/testdata/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml @@ -8,6 +8,10 @@ resources: jobs: foo: name: job foo + bar: + name: job bar pipelines: foo: name: pipeline foo + baz: + name: pipeline baz