Add detection and info diagnostic for convention for `.<resource-type>.yml` files

This commit is contained in:
Shreyas Goenka 2024-09-19 11:41:40 +02:00
parent aa51b63352
commit 667fe6bf38
No known key found for this signature in database
GPG Key ID: 92A07DF49CCB0622
2 changed files with 263 additions and 3 deletions

View File

@ -3,12 +3,84 @@ package loader
import (
"context"
"fmt"
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"golang.org/x/exp/maps"
)
// Steps:
// 1. Return info diag here if convention not followed
// 2. Add unit test for this mutator that convention is followed. Also add mutators for the dynamic extensions computation.
// 3. Add INFO rendering to the validate command
// 4. Add unit test that the INFO rendering is correct
// 5. Manually test the info diag.
// TODO: Since we are skipping environemnts here, we should return a warning
// if environemnts is used (is that already the case?). And explain in the PR that
// we are choosing to not gather resources from environments.
// TODO: Talk in the PR about how this synergizes with the validate all unique
// keys mutator.
var resourceTypes = []string{
"job",
"pipeline",
"model",
"experiment",
"model_serving_endpoint",
"registered_model",
"quality_monitor",
"schema",
}
type resource struct {
typ string
l dyn.Location
p dyn.Path
}
func gatherResources(r *config.Root) (map[string]resource, error) {
res := map[string]resource{}
// Gather all resources defined in the "resources" block.
_, err := dyn.MapByPattern(
r.Value(),
dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()),
func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
// The key for the resource. Eg: "my_job" for jobs.my_job.
k := p[2].Key()
// The type of the resource. Eg: "job" for jobs.my_job.
typ := strings.TrimSuffix(p[1].Key(), "s")
// We don't care if duplicate keys are defined across resources. That'll
// cause an error that is caught by a separate mutator that validates
// unique keys across all resources.
res[k] = resource{typ: typ, l: v.Location(), p: p}
return v, nil
})
if err != nil {
return nil, err
}
// Gather all resources defined in a target block.
_, err = dyn.MapByPattern(
r.Value(),
dyn.NewPattern(dyn.Key("targets"), dyn.AnyKey(), dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()),
func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
k := p[4].Key()
typ := strings.TrimSuffix(p[3].Key(), "s")
res[k] = resource{typ: typ, l: v.Location(), p: p}
return v, nil
},
)
return res, err
}
type processInclude struct {
fullPath string
relPath string
@ -31,6 +103,50 @@ func (m *processInclude) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnos
if diags.HasError() {
return diags
}
for _, typ := range resourceTypes {
ext := fmt.Sprintf("%s.yml", typ)
// File does not match this resource type. Check the next one.
if !strings.HasSuffix(m.relPath, ".yml") {
continue
}
resources, err := gatherResources(this)
if err != nil {
return diag.FromErr(err)
}
// file only has one resource defined, and the resource is of the correct
// type. Thus it matches the recommendation we have for extensions like
// .job.yml, .pipeline.yml, etc.
keys := maps.Keys(resources)
if len(resources) == 1 && resources[keys[0]].typ == typ {
continue
}
msg := strings.Builder{}
msg.WriteString(fmt.Sprintf("We recommend only defining a single %s in a file with the extension %s.\n", typ, ext))
msg.WriteString("The following resources are defined or configured in this file:\n")
for k, v := range resources {
msg.WriteString(fmt.Sprintf(" - %s (%s)\n", k, v.typ))
}
locations := []dyn.Location{}
paths := []dyn.Path{}
for _, r := range resources {
locations = append(locations, []dyn.Location{r.l}...)
paths = append(paths, []dyn.Path{r.p}...)
}
diags = append(diags, diag.Diagnostic{
Severity: diag.Info,
Summary: msg.String(),
Locations: locations,
Paths: paths,
})
}
err := b.Config.Merge(this)
if err != nil {
diags = diags.Extend(diag.FromErr(err))

View File

@ -1,13 +1,17 @@
package loader_test
package loader
import (
"context"
"path/filepath"
"reflect"
"strings"
"testing"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/loader"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -22,7 +26,7 @@ func TestProcessInclude(t *testing.T) {
},
}
m := loader.ProcessInclude(filepath.Join(b.RootPath, "host.yml"), "host.yml")
m := ProcessInclude(filepath.Join(b.RootPath, "host.yml"), "host.yml")
assert.Equal(t, "ProcessInclude(host.yml)", m.Name())
// Assert the host value prior to applying the mutator
@ -33,3 +37,143 @@ func TestProcessInclude(t *testing.T) {
require.NoError(t, diags.Error())
assert.Equal(t, "bar", b.Config.Workspace.Host)
}
func TestResourceNames(t *testing.T) {
names := []string{}
typ := reflect.TypeOf(config.Resources{})
for i := 0; i < typ.NumField(); i++ {
field := typ.Field(i)
jsonTags := strings.Split(field.Tag.Get("json"), ",")
singularName := strings.TrimSuffix(jsonTags[0], "s")
names = append(names, singularName)
}
// Assert the contents of the two lists are equal. Please add the singular
// name of your resource to resourceNames global if you are adding a new
// resource.
assert.Equal(t, len(resourceTypes), len(names))
for _, name := range names {
assert.Contains(t, resourceTypes, name)
}
}
func TestGatherResources(t *testing.T) {
// TODO: Add location tests?
tcases := []struct {
name string
resources config.Resources
targets map[string]*config.Target
filenames map[string]string
expected map[string]resource
}{
{
name: "empty",
resources: config.Resources{},
expected: map[string]resource{},
},
{
name: "one job",
resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": {},
},
},
// TODO CONTINUE: Setting file names for the resources defined here.
// and testing they are correctly aggregated.
expected: map[string]resource{
"job1": {
typ: "job",
p: dyn.MustPathFromString("resources.jobs.job1"),
},
},
},
// {
// name: "three jobs",
// resources: config.Resources{
// Jobs: map[string]*resources.Job{
// "job1": {},
// "job2": {},
// "job3": {},
// },
// },
// expected: []resource{
// {"job1", "job"},
// {"job2", "job"},
// {"job3", "job"},
// },
// },
// {
// name: "only one experiment target",
// resources: config.Resources{},
// targets: map[string]*config.Target{
// "target1": {
// Resources: &config.Resources{
// Experiments: map[string]*resources.MlflowExperiment{
// "experiment1": {},
// },
// },
// },
// },
// expected: []resource{
// {"experiment1", "experiment"},
// },
// },
// {
// name: "multiple resources",
// resources: config.Resources{
// Jobs: map[string]*resources.Job{
// "job1": {},
// },
// Pipelines: map[string]*resources.Pipeline{
// "pipeline1": {},
// "pipeline2": {},
// },
// Experiments: map[string]*resources.MlflowExperiment{
// "experiment1": {},
// },
// },
// targets: map[string]*config.Target{
// "target1": {
// Resources: &config.Resources{
// ModelServingEndpoints: map[string]*resources.ModelServingEndpoint{
// "model_serving_endpoint1": {},
// },
// },
// },
// "target2": {
// Resources: &config.Resources{
// RegisteredModels: map[string]*resources.RegisteredModel{
// "registered_model1": {},
// },
// },
// },
// },
// expected: []resource{
// {"job1", "job"},
// {"pipeline1", "pipeline"},
// {"pipeline2", "pipeline"},
// {"experiment1", "experiment"},
// {"model_serving_endpoint1", "model_serving_endpoint"},
// {"registered_model1", "registered_model"},
// },
// },
}
for _, tc := range tcases {
t.Run(tc.name, func(t *testing.T) {
b := &bundle.Bundle{}
bundle.ApplyFunc(context.Background(), b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
b.Config.Resources = tc.resources
b.Config.Targets = tc.targets
return nil
})
res, err := gatherResources(&b.Config)
require.NoError(t, err)
assert.Equal(t, tc.expected, res)
})
}
}