more wip on making it work

This commit is contained in:
Shreyas Goenka 2024-09-24 14:39:28 +02:00
parent 458dbfb04d
commit 5c351d7ea3
No known key found for this signature in database
GPG Key ID: 92A07DF49CCB0622
3 changed files with 214 additions and 175 deletions

View File

@ -9,7 +9,6 @@ import (
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"golang.org/x/exp/maps"
)
// Steps:
@ -27,6 +26,7 @@ import (
// TODO: Talk in the PR about how this synergizes with the validate all unique
// keys mutator.
// Should I add a new abstraction for dyn values here?
var resourceTypes = []string{
"job",
@ -39,16 +39,29 @@ var resourceTypes = []string{
"schema",
}
type resource struct {
typ string
l dyn.Location
p dyn.Path
func validateFileFormat(r *config.Root, filePath string) diag.Diagnostics {
for _, typ := range resourceTypes {
for _, ext := range []string{fmt.Sprintf(".%s.yml", typ), fmt.Sprintf(".%s.yaml", typ)} {
if strings.HasSuffix(filePath, ext) {
return validateSingleResourceDefined(r, ext, typ)
}
}
}
return nil
}
func gatherResources(r *config.Root) (map[string]resource, error) {
res := map[string]resource{}
func validateSingleResourceDefined(r *config.Root, ext, typ string) diag.Diagnostics {
type resource struct {
path dyn.Path
value dyn.Value
typ string
key string
}
// Gather all resources defined in the "resources" block.
resources := []resource{}
// Gather all resources defined in the resources block.
_, err := dyn.MapByPattern(
r.Value(),
dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()),
@ -58,14 +71,11 @@ func gatherResources(r *config.Root) (map[string]resource, error) {
// The type of the resource. Eg: "job" for jobs.my_job.
typ := strings.TrimSuffix(p[1].Key(), "s")
// We don't care if duplicate keys are defined across resources. That'll
// cause an error that is caught by a separate mutator that validates
// unique keys across all resources.
res[k] = resource{typ: typ, l: v.Location(), p: p}
resources = append(resources, resource{path: p, value: v, typ: typ, key: k})
return v, nil
})
if err != nil {
return nil, err
return diag.FromErr(err)
}
// Gather all resources defined in a target block.
@ -73,14 +83,65 @@ func gatherResources(r *config.Root) (map[string]resource, error) {
r.Value(),
dyn.NewPattern(dyn.Key("targets"), dyn.AnyKey(), dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()),
func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
// The key for the resource. Eg: "my_job" for jobs.my_job.
k := p[4].Key()
// The type of the resource. Eg: "job" for jobs.my_job.
typ := strings.TrimSuffix(p[3].Key(), "s")
res[k] = resource{typ: typ, l: v.Location(), p: p}
resources = append(resources, resource{path: p, value: v, typ: typ, key: k})
return v, nil
})
if err != nil {
return diag.FromErr(err)
}
valid := true
seenKeys := map[string]struct{}{}
for _, rr := range resources {
if len(seenKeys) == 0 {
seenKeys[rr.key] = struct{}{}
continue
}
if _, ok := seenKeys[rr.key]; !ok {
valid = false
break
}
if rr.typ != typ {
valid = false
break
}
}
// The file only contains one resource defined in its resources or targets block,
// and the resource is of the correct type.
if valid {
return nil
}
msg := strings.Builder{}
msg.WriteString(fmt.Sprintf("We recommend only defining a single %s when a file has the %s extension.", typ, ext))
msg.WriteString("The following resources are defined or configured in this file:\n")
for _, r := range resources {
msg.WriteString(fmt.Sprintf(" - %s (%s)\n", r.key, r.typ))
}
locations := []dyn.Location{}
paths := []dyn.Path{}
for _, rr := range resources {
locations = append(locations, rr.value.Locations()...)
paths = append(paths, rr.path)
}
return diag.Diagnostics{
{
Severity: diag.Info,
Summary: msg.String(),
Locations: locations,
Paths: paths,
},
)
return res, err
}
}
type processInclude struct {
@ -106,48 +167,8 @@ func (m *processInclude) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnos
return diags
}
for _, typ := range resourceTypes {
ext := fmt.Sprintf("%s.yml", typ)
// File does not match this resource type. Check the next one.
if !strings.HasSuffix(m.relPath, ".yml") {
continue
}
resources, err := gatherResources(this)
if err != nil {
return diag.FromErr(err)
}
// file only has one resource defined, and the resource is of the correct
// type. Thus it matches the recommendation we have for extensions like
// .job.yml, .pipeline.yml, etc.
keys := maps.Keys(resources)
if len(resources) == 1 && resources[keys[0]].typ == typ {
continue
}
msg := strings.Builder{}
msg.WriteString(fmt.Sprintf("We recommend only defining a single %s in a file with the extension %s.\n", typ, ext))
msg.WriteString("The following resources are defined or configured in this file:\n")
for k, v := range resources {
msg.WriteString(fmt.Sprintf(" - %s (%s)\n", k, v.typ))
}
locations := []dyn.Location{}
paths := []dyn.Path{}
for _, r := range resources {
locations = append(locations, []dyn.Location{r.l}...)
paths = append(paths, []dyn.Path{r.p}...)
}
diags = append(diags, diag.Diagnostic{
Severity: diag.Info,
Summary: msg.String(),
Locations: locations,
Paths: paths,
})
}
// Add any diagnostics associated with the file format.
diags = append(diags, validateFileFormat(this, m.relPath)...)
err := b.Config.Merge(this)
if err != nil {

View File

@ -10,8 +10,6 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -57,123 +55,142 @@ func TestResourceNames(t *testing.T) {
}
}
func TestGatherResources(t *testing.T) {
// TODO: Add location tests?
tcases := []struct {
name string
resources config.Resources
targets map[string]*config.Target
filenames map[string]string
expected map[string]resource
}{
{
name: "empty",
resources: config.Resources{},
expected: map[string]resource{},
},
{
name: "one job",
resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": {},
},
func TestValidateFileFormat(t *testing.T) {
onlyJob := config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job1": {},
},
// TODO CONTINUE: Setting file names for the resources defined here.
// and testing they are correctly aggregated.
expected: map[string]resource{
"job1": {
typ: "job",
p: dyn.MustPathFromString("resources.jobs.job1"),
},
Targets: map[string]*config.Target{
"target1": {
Resources: &config.Resources{
Jobs: map[string]*resources.Job{
"job1": {},
},
},
},
},
// {
// name: "three jobs",
// resources: config.Resources{
// Jobs: map[string]*resources.Job{
// "job1": {},
// "job2": {},
// "job3": {},
// },
// },
// expected: []resource{
// {"job1", "job"},
// {"job2", "job"},
// {"job3", "job"},
// },
// },
// {
// name: "only one experiment target",
// resources: config.Resources{},
// targets: map[string]*config.Target{
// "target1": {
// Resources: &config.Resources{
// Experiments: map[string]*resources.MlflowExperiment{
// "experiment1": {},
// },
// },
// },
// },
// expected: []resource{
// {"experiment1", "experiment"},
// },
// },
// {
// name: "multiple resources",
// resources: config.Resources{
// Jobs: map[string]*resources.Job{
// "job1": {},
// },
// Pipelines: map[string]*resources.Pipeline{
// "pipeline1": {},
// "pipeline2": {},
// },
// Experiments: map[string]*resources.MlflowExperiment{
// "experiment1": {},
// },
// },
// targets: map[string]*config.Target{
// "target1": {
// Resources: &config.Resources{
// ModelServingEndpoints: map[string]*resources.ModelServingEndpoint{
// "model_serving_endpoint1": {},
// },
// },
// },
// "target2": {
// Resources: &config.Resources{
// RegisteredModels: map[string]*resources.RegisteredModel{
// "registered_model1": {},
// },
// },
// },
// },
// expected: []resource{
// {"job1", "job"},
// {"pipeline1", "pipeline"},
// {"pipeline2", "pipeline"},
// {"experiment1", "experiment"},
// {"model_serving_endpoint1", "model_serving_endpoint"},
// {"registered_model1", "registered_model"},
// },
// },
}
for _, tc := range tcases {
t.Run(tc.name, func(t *testing.T) {
b := &bundle.Bundle{}
bundle.ApplyFunc(context.Background(), b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
b.Config.Resources = tc.resources
b.Config.Targets = tc.targets
return nil
})
res, err := gatherResources(&b.Config)
require.NoError(t, err)
assert.Equal(t, tc.expected, res)
})
}
}
// func TestGatherResources(t *testing.T) {
// // TODO: Add location tests?
// tcases := []struct {
// name string
// resources config.Resources
// targets map[string]*config.Target
// filenames map[string]string
// expected map[string]resource
// }{
// {
// name: "empty",
// resources: config.Resources{},
// expected: map[string]resource{},
// },
// {
// name: "one job",
// resources: config.Resources{
// Jobs: map[string]*resources.Job{
// "job1": {},
// },
// },
// // TODO CONTINUE: Setting file names for the resources defined here.
// // and testing they are correctly aggregated.
// expected: map[string]resource{
// "job1": {
// typ: "job",
// p: dyn.MustPathFromString("resources.jobs.job1"),
// },
// },
// },
// {
// name: "three jobs",
// resources: config.Resources{
// Jobs: map[string]*resources.Job{
// "job1": {},
// "job2": {},
// "job3": {},
// },
// },
// expected: []resource{
// {"job1", "job"},
// {"job2", "job"},
// {"job3", "job"},
// },
// },
// {
// name: "only one experiment target",
// resources: config.Resources{},
// targets: map[string]*config.Target{
// "target1": {
// Resources: &config.Resources{
// Experiments: map[string]*resources.MlflowExperiment{
// "experiment1": {},
// },
// },
// },
// },
// expected: []resource{
// {"experiment1", "experiment"},
// },
// },
// {
// name: "multiple resources",
// resources: config.Resources{
// Jobs: map[string]*resources.Job{
// "job1": {},
// },
// Pipelines: map[string]*resources.Pipeline{
// "pipeline1": {},
// "pipeline2": {},
// },
// Experiments: map[string]*resources.MlflowExperiment{
// "experiment1": {},
// },
// },
// targets: map[string]*config.Target{
// "target1": {
// Resources: &config.Resources{
// ModelServingEndpoints: map[string]*resources.ModelServingEndpoint{
// "model_serving_endpoint1": {},
// },
// },
// },
// "target2": {
// Resources: &config.Resources{
// RegisteredModels: map[string]*resources.RegisteredModel{
// "registered_model1": {},
// },
// },
// },
// },
// expected: []resource{
// {"job1", "job"},
// {"pipeline1", "pipeline"},
// {"pipeline2", "pipeline"},
// {"experiment1", "experiment"},
// {"model_serving_endpoint1", "model_serving_endpoint"},
// {"registered_model1", "registered_model"},
// },
// },
// }
// for _, tc := range tcases {
// t.Run(tc.name, func(t *testing.T) {
// b := &bundle.Bundle{}
// bundle.ApplyFunc(context.Background(), b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
// b.Config.Resources = tc.resources
// b.Config.Targets = tc.targets
// return nil
// })
// res, err := gatherResources(&b.Config)
// require.NoError(t, err)
// assert.Equal(t, tc.expected, res)
// })
// }
// }

View File

@ -8,7 +8,8 @@ import (
// SetLocation sets the location of all values in the bundle to the given path.
// This is useful for testing where we need to associate configuration
// with the path it is loaded from.
func SetLocation(b *bundle.Bundle, prefix string, locations []dyn.Location) {
// TODO: Go patch the location.
func SetLocation(b *bundle.Bundle, prefix string, filePath string) {
start := dyn.MustPathFromString(prefix)
b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) {
return dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {