From 5c351d7ea33c3f7eb57e8ebb21ca58b13e9a9f5a Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Tue, 24 Sep 2024 14:39:28 +0200 Subject: [PATCH] more wip on making it work --- bundle/config/loader/process_include.go | 137 +++++----- bundle/config/loader/process_include_test.go | 249 ++++++++++--------- bundle/internal/bundletest/location.go | 3 +- 3 files changed, 214 insertions(+), 175 deletions(-) diff --git a/bundle/config/loader/process_include.go b/bundle/config/loader/process_include.go index 4ec9eba34..cc13d6900 100644 --- a/bundle/config/loader/process_include.go +++ b/bundle/config/loader/process_include.go @@ -9,7 +9,6 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" - "golang.org/x/exp/maps" ) // Steps: @@ -27,6 +26,7 @@ import ( // TODO: Talk in the PR about how this synergizes with the validate all unique // keys mutator. +// Should I add a new abstraction for dyn values here? var resourceTypes = []string{ "job", @@ -39,16 +39,29 @@ var resourceTypes = []string{ "schema", } -type resource struct { - typ string - l dyn.Location - p dyn.Path +func validateFileFormat(r *config.Root, filePath string) diag.Diagnostics { + for _, typ := range resourceTypes { + for _, ext := range []string{fmt.Sprintf(".%s.yml", typ), fmt.Sprintf(".%s.yaml", typ)} { + if strings.HasSuffix(filePath, ext) { + return validateSingleResourceDefined(r, ext, typ) + } + } + } + + return nil } -func gatherResources(r *config.Root) (map[string]resource, error) { - res := map[string]resource{} +func validateSingleResourceDefined(r *config.Root, ext, typ string) diag.Diagnostics { + type resource struct { + path dyn.Path + value dyn.Value + typ string + key string + } - // Gather all resources defined in the "resources" block. + resources := []resource{} + + // Gather all resources defined in the resources block. _, err := dyn.MapByPattern( r.Value(), dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()), @@ -58,14 +71,11 @@ func gatherResources(r *config.Root) (map[string]resource, error) { // The type of the resource. Eg: "job" for jobs.my_job. typ := strings.TrimSuffix(p[1].Key(), "s") - // We don't care if duplicate keys are defined across resources. That'll - // cause an error that is caught by a separate mutator that validates - // unique keys across all resources. - res[k] = resource{typ: typ, l: v.Location(), p: p} + resources = append(resources, resource{path: p, value: v, typ: typ, key: k}) return v, nil }) if err != nil { - return nil, err + return diag.FromErr(err) } // Gather all resources defined in a target block. @@ -73,14 +83,65 @@ func gatherResources(r *config.Root) (map[string]resource, error) { r.Value(), dyn.NewPattern(dyn.Key("targets"), dyn.AnyKey(), dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + // The key for the resource. Eg: "my_job" for jobs.my_job. k := p[4].Key() + // The type of the resource. Eg: "job" for jobs.my_job. typ := strings.TrimSuffix(p[3].Key(), "s") - res[k] = resource{typ: typ, l: v.Location(), p: p} + resources = append(resources, resource{path: p, value: v, typ: typ, key: k}) return v, nil + }) + if err != nil { + return diag.FromErr(err) + } + + valid := true + seenKeys := map[string]struct{}{} + for _, rr := range resources { + if len(seenKeys) == 0 { + seenKeys[rr.key] = struct{}{} + continue + } + + if _, ok := seenKeys[rr.key]; !ok { + valid = false + break + } + + if rr.typ != typ { + valid = false + break + } + } + + // The file only contains one resource defined in its resources or targets block, + // and the resource is of the correct type. + if valid { + return nil + } + + msg := strings.Builder{} + msg.WriteString(fmt.Sprintf("We recommend only defining a single %s when a file has the %s extension.", typ, ext)) + msg.WriteString("The following resources are defined or configured in this file:\n") + for _, r := range resources { + msg.WriteString(fmt.Sprintf(" - %s (%s)\n", r.key, r.typ)) + } + + locations := []dyn.Location{} + paths := []dyn.Path{} + for _, rr := range resources { + locations = append(locations, rr.value.Locations()...) + paths = append(paths, rr.path) + } + + return diag.Diagnostics{ + { + Severity: diag.Info, + Summary: msg.String(), + Locations: locations, + Paths: paths, }, - ) - return res, err + } } type processInclude struct { @@ -106,48 +167,8 @@ func (m *processInclude) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnos return diags } - for _, typ := range resourceTypes { - ext := fmt.Sprintf("%s.yml", typ) - - // File does not match this resource type. Check the next one. - if !strings.HasSuffix(m.relPath, ".yml") { - continue - } - - resources, err := gatherResources(this) - if err != nil { - return diag.FromErr(err) - } - - // file only has one resource defined, and the resource is of the correct - // type. Thus it matches the recommendation we have for extensions like - // .job.yml, .pipeline.yml, etc. - keys := maps.Keys(resources) - if len(resources) == 1 && resources[keys[0]].typ == typ { - continue - } - - msg := strings.Builder{} - msg.WriteString(fmt.Sprintf("We recommend only defining a single %s in a file with the extension %s.\n", typ, ext)) - msg.WriteString("The following resources are defined or configured in this file:\n") - for k, v := range resources { - msg.WriteString(fmt.Sprintf(" - %s (%s)\n", k, v.typ)) - } - - locations := []dyn.Location{} - paths := []dyn.Path{} - for _, r := range resources { - locations = append(locations, []dyn.Location{r.l}...) - paths = append(paths, []dyn.Path{r.p}...) - } - - diags = append(diags, diag.Diagnostic{ - Severity: diag.Info, - Summary: msg.String(), - Locations: locations, - Paths: paths, - }) - } + // Add any diagnostics associated with the file format. + diags = append(diags, validateFileFormat(this, m.relPath)...) err := b.Config.Merge(this) if err != nil { diff --git a/bundle/config/loader/process_include_test.go b/bundle/config/loader/process_include_test.go index e44e4f349..275b29fcc 100644 --- a/bundle/config/loader/process_include_test.go +++ b/bundle/config/loader/process_include_test.go @@ -10,8 +10,6 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" - "github.com/databricks/cli/libs/diag" - "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -57,123 +55,142 @@ func TestResourceNames(t *testing.T) { } } -func TestGatherResources(t *testing.T) { - // TODO: Add location tests? - tcases := []struct { - name string - resources config.Resources - targets map[string]*config.Target - filenames map[string]string - expected map[string]resource - }{ - { - name: "empty", - resources: config.Resources{}, - expected: map[string]resource{}, - }, - { - name: "one job", - resources: config.Resources{ - Jobs: map[string]*resources.Job{ - "job1": {}, - }, +func TestValidateFileFormat(t *testing.T) { + onlyJob := config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": {}, }, - // TODO CONTINUE: Setting file names for the resources defined here. - // and testing they are correctly aggregated. - expected: map[string]resource{ - "job1": { - typ: "job", - p: dyn.MustPathFromString("resources.jobs.job1"), + }, + Targets: map[string]*config.Target{ + "target1": { + Resources: &config.Resources{ + Jobs: map[string]*resources.Job{ + "job1": {}, + }, }, }, }, - // { - // name: "three jobs", - // resources: config.Resources{ - // Jobs: map[string]*resources.Job{ - // "job1": {}, - // "job2": {}, - // "job3": {}, - // }, - // }, - // expected: []resource{ - // {"job1", "job"}, - // {"job2", "job"}, - // {"job3", "job"}, - // }, - // }, - // { - // name: "only one experiment target", - // resources: config.Resources{}, - // targets: map[string]*config.Target{ - // "target1": { - // Resources: &config.Resources{ - // Experiments: map[string]*resources.MlflowExperiment{ - // "experiment1": {}, - // }, - // }, - // }, - // }, - // expected: []resource{ - // {"experiment1", "experiment"}, - // }, - // }, - // { - // name: "multiple resources", - // resources: config.Resources{ - // Jobs: map[string]*resources.Job{ - // "job1": {}, - // }, - // Pipelines: map[string]*resources.Pipeline{ - // "pipeline1": {}, - // "pipeline2": {}, - // }, - // Experiments: map[string]*resources.MlflowExperiment{ - // "experiment1": {}, - // }, - // }, - // targets: map[string]*config.Target{ - // "target1": { - // Resources: &config.Resources{ - // ModelServingEndpoints: map[string]*resources.ModelServingEndpoint{ - // "model_serving_endpoint1": {}, - // }, - // }, - // }, - // "target2": { - // Resources: &config.Resources{ - // RegisteredModels: map[string]*resources.RegisteredModel{ - // "registered_model1": {}, - // }, - // }, - // }, - // }, - // expected: []resource{ - // {"job1", "job"}, - // {"pipeline1", "pipeline"}, - // {"pipeline2", "pipeline"}, - // {"experiment1", "experiment"}, - // {"model_serving_endpoint1", "model_serving_endpoint"}, - // {"registered_model1", "registered_model"}, - // }, - // }, } - - for _, tc := range tcases { - t.Run(tc.name, func(t *testing.T) { - b := &bundle.Bundle{} - - bundle.ApplyFunc(context.Background(), b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - b.Config.Resources = tc.resources - b.Config.Targets = tc.targets - return nil - }) - - res, err := gatherResources(&b.Config) - require.NoError(t, err) - - assert.Equal(t, tc.expected, res) - }) - } - } + +// func TestGatherResources(t *testing.T) { +// // TODO: Add location tests? +// tcases := []struct { +// name string +// resources config.Resources +// targets map[string]*config.Target +// filenames map[string]string +// expected map[string]resource +// }{ +// { +// name: "empty", +// resources: config.Resources{}, +// expected: map[string]resource{}, +// }, +// { +// name: "one job", +// resources: config.Resources{ +// Jobs: map[string]*resources.Job{ +// "job1": {}, +// }, +// }, +// // TODO CONTINUE: Setting file names for the resources defined here. +// // and testing they are correctly aggregated. +// expected: map[string]resource{ +// "job1": { +// typ: "job", +// p: dyn.MustPathFromString("resources.jobs.job1"), +// }, +// }, +// }, +// { +// name: "three jobs", +// resources: config.Resources{ +// Jobs: map[string]*resources.Job{ +// "job1": {}, +// "job2": {}, +// "job3": {}, +// }, +// }, +// expected: []resource{ +// {"job1", "job"}, +// {"job2", "job"}, +// {"job3", "job"}, +// }, +// }, +// { +// name: "only one experiment target", +// resources: config.Resources{}, +// targets: map[string]*config.Target{ +// "target1": { +// Resources: &config.Resources{ +// Experiments: map[string]*resources.MlflowExperiment{ +// "experiment1": {}, +// }, +// }, +// }, +// }, +// expected: []resource{ +// {"experiment1", "experiment"}, +// }, +// }, +// { +// name: "multiple resources", +// resources: config.Resources{ +// Jobs: map[string]*resources.Job{ +// "job1": {}, +// }, +// Pipelines: map[string]*resources.Pipeline{ +// "pipeline1": {}, +// "pipeline2": {}, +// }, +// Experiments: map[string]*resources.MlflowExperiment{ +// "experiment1": {}, +// }, +// }, +// targets: map[string]*config.Target{ +// "target1": { +// Resources: &config.Resources{ +// ModelServingEndpoints: map[string]*resources.ModelServingEndpoint{ +// "model_serving_endpoint1": {}, +// }, +// }, +// }, +// "target2": { +// Resources: &config.Resources{ +// RegisteredModels: map[string]*resources.RegisteredModel{ +// "registered_model1": {}, +// }, +// }, +// }, +// }, +// expected: []resource{ +// {"job1", "job"}, +// {"pipeline1", "pipeline"}, +// {"pipeline2", "pipeline"}, +// {"experiment1", "experiment"}, +// {"model_serving_endpoint1", "model_serving_endpoint"}, +// {"registered_model1", "registered_model"}, +// }, +// }, +// } + +// for _, tc := range tcases { +// t.Run(tc.name, func(t *testing.T) { +// b := &bundle.Bundle{} + +// bundle.ApplyFunc(context.Background(), b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { +// b.Config.Resources = tc.resources +// b.Config.Targets = tc.targets +// return nil +// }) + +// res, err := gatherResources(&b.Config) +// require.NoError(t, err) + +// assert.Equal(t, tc.expected, res) +// }) +// } + +// } diff --git a/bundle/internal/bundletest/location.go b/bundle/internal/bundletest/location.go index 2ffd621bf..681262700 100644 --- a/bundle/internal/bundletest/location.go +++ b/bundle/internal/bundletest/location.go @@ -8,7 +8,8 @@ import ( // SetLocation sets the location of all values in the bundle to the given path. // This is useful for testing where we need to associate configuration // with the path it is loaded from. -func SetLocation(b *bundle.Bundle, prefix string, locations []dyn.Location) { +// TODO: Go patch the location. +func SetLocation(b *bundle.Bundle, prefix string, filePath string) { start := dyn.MustPathFromString(prefix) b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { return dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {