wip adding tracking for YAML locations

This commit is contained in:
Shreyas Goenka 2024-05-27 14:32:01 +02:00
parent 6302e58965
commit cd5cf70701
No known key found for this signature in database
GPG Key ID: 92A07DF49CCB0622
14 changed files with 214 additions and 49 deletions

View File

@ -32,7 +32,7 @@ func (m *environmentsToTargets) Apply(ctx context.Context, b *bundle.Bundle) dia
targets := v.Get("targets") targets := v.Get("targets")
// Return an error if both "environments" and "targets" are set. // Return an error if both "environments" and "targets" are set.
if environments != dyn.NilValue && targets != dyn.NilValue { if !environments.IsNil() && !targets.IsNil() {
return dyn.NilValue, fmt.Errorf( return dyn.NilValue, fmt.Errorf(
"both 'environments' and 'targets' are specified; only 'targets' should be used: %s", "both 'environments' and 'targets' are specified; only 'targets' should be used: %s",
environments.Location().String(), environments.Location().String(),
@ -40,7 +40,7 @@ func (m *environmentsToTargets) Apply(ctx context.Context, b *bundle.Bundle) dia
} }
// Rewrite "environments" to "targets". // Rewrite "environments" to "targets".
if environments != dyn.NilValue && targets == dyn.NilValue { if !environments.IsNil() && !targets.IsNil() {
nv, err := dyn.Set(v, "targets", environments) nv, err := dyn.Set(v, "targets", environments)
if err != nil { if err != nil {
return dyn.NilValue, err return dyn.NilValue, err

View File

@ -32,7 +32,7 @@ func (m *mergeJobClusters) jobClusterKey(v dyn.Value) string {
func (m *mergeJobClusters) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { func (m *mergeJobClusters) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
if v == dyn.NilValue { if v.IsNil() {
return v, nil return v, nil
} }

View File

@ -32,7 +32,7 @@ func (m *mergeJobTasks) taskKeyString(v dyn.Value) string {
func (m *mergeJobTasks) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { func (m *mergeJobTasks) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
if v == dyn.NilValue { if v.IsNil() {
return v, nil return v, nil
} }

View File

@ -35,7 +35,7 @@ func (m *mergePipelineClusters) clusterLabel(v dyn.Value) string {
func (m *mergePipelineClusters) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { func (m *mergePipelineClusters) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
if v == dyn.NilValue { if v.IsNil() {
return v, nil return v, nil
} }

View File

@ -124,6 +124,32 @@ func (r *Root) updateWithDynamicValue(nv dyn.Value) error {
r.depth = depth r.depth = depth
}() }()
// // CONTINUE: This does not work. This continues to use the value from the
// // previous loaded databricks.yml. Why?....
// //
// // Annotate original locations of values as metadata in the new value,
// // if the value has already been set once in the configuration tree.
// // TODO: Move this to a more appropriate location. closer to the loading of YAML.
// nv, err := dyn.Walk(nv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
// ov, err := dyn.GetByPath(r.value, p)
// // The value has not been set in the original configuration tree.
// if err != nil {
// return v, nil
// }
// s := p.String()
// if s == "resources.jobs.foo" {
// fmt.Println("resources.jobs.foo")
// }
// // The new value will override the old one. Include the old location as
// // additional metadata.
// return v.WithYamlLocation(ov.Location()), nil
// })
// if err != nil {
// return err
// }
// Convert normalized configuration tree to typed configuration. // Convert normalized configuration tree to typed configuration.
err := convert.ToTyped(r, nv) err := convert.ToTyped(r, nv)
if err != nil { if err != nil {
@ -318,7 +344,7 @@ func (r *Root) MergeTargetOverrides(name string) error {
} }
// Merge `run_as`. This field must be overwritten if set, not merged. // Merge `run_as`. This field must be overwritten if set, not merged.
if v := target.Get("run_as"); v != dyn.NilValue { if v := target.Get("run_as"); !v.IsNil() {
root, err = dyn.Set(root, "run_as", v) root, err = dyn.Set(root, "run_as", v)
if err != nil { if err != nil {
return err return err
@ -326,7 +352,7 @@ func (r *Root) MergeTargetOverrides(name string) error {
} }
// Below, we're setting fields on the bundle key, so make sure it exists. // Below, we're setting fields on the bundle key, so make sure it exists.
if root.Get("bundle") == dyn.NilValue { if root.Get("bundle").IsNil() {
root, err = dyn.Set(root, "bundle", dyn.NewValue(map[string]dyn.Value{}, dyn.Location{})) root, err = dyn.Set(root, "bundle", dyn.NewValue(map[string]dyn.Value{}, dyn.Location{}))
if err != nil { if err != nil {
return err return err
@ -334,7 +360,7 @@ func (r *Root) MergeTargetOverrides(name string) error {
} }
// Merge `mode`. This field must be overwritten if set, not merged. // Merge `mode`. This field must be overwritten if set, not merged.
if v := target.Get("mode"); v != dyn.NilValue { if v := target.Get("mode"); !v.IsNil() {
root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("mode")), v) root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("mode")), v)
if err != nil { if err != nil {
return err return err
@ -342,7 +368,7 @@ func (r *Root) MergeTargetOverrides(name string) error {
} }
// Merge `compute_id`. This field must be overwritten if set, not merged. // Merge `compute_id`. This field must be overwritten if set, not merged.
if v := target.Get("compute_id"); v != dyn.NilValue { if v := target.Get("compute_id"); !v.IsNil() {
root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("compute_id")), v) root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("compute_id")), v)
if err != nil { if err != nil {
return err return err
@ -350,7 +376,7 @@ func (r *Root) MergeTargetOverrides(name string) error {
} }
// Merge `git`. // Merge `git`.
if v := target.Get("git"); v != dyn.NilValue { if v := target.Get("git"); !v.IsNil() {
ref, err := dyn.GetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("git"))) ref, err := dyn.GetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("git")))
if err != nil { if err != nil {
ref = dyn.NewValue(map[string]dyn.Value{}, dyn.Location{}) ref = dyn.NewValue(map[string]dyn.Value{}, dyn.Location{})
@ -363,7 +389,7 @@ func (r *Root) MergeTargetOverrides(name string) error {
} }
// If the branch was overridden, we need to clear the inferred flag. // If the branch was overridden, we need to clear the inferred flag.
if branch := v.Get("branch"); branch != dyn.NilValue { if branch := v.Get("branch"); !branch.IsNil() {
out, err = dyn.SetByPath(out, dyn.NewPath(dyn.Key("inferred")), dyn.NewValue(false, dyn.Location{})) out, err = dyn.SetByPath(out, dyn.NewPath(dyn.Key("inferred")), dyn.NewValue(false, dyn.Location{}))
if err != nil { if err != nil {
return err return err
@ -391,7 +417,7 @@ func rewriteShorthands(v dyn.Value) (dyn.Value, error) {
// For each target, rewrite the variables block. // For each target, rewrite the variables block.
return dyn.Map(v, "targets", dyn.Foreach(func(_ dyn.Path, target dyn.Value) (dyn.Value, error) { return dyn.Map(v, "targets", dyn.Foreach(func(_ dyn.Path, target dyn.Value) (dyn.Value, error) {
// Confirm it has a variables block. // Confirm it has a variables block.
if target.Get("variables") == dyn.NilValue { if target.Get("variables").IsNil() {
return target, nil return target, nil
} }

View File

@ -3,6 +3,8 @@ package validate
import ( import (
"context" "context"
"fmt" "fmt"
"slices"
"strings"
"github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/diag"
@ -21,12 +23,36 @@ func (m *uniqueResourceKeys) Name() string {
return "validate:unique_resource_keys" return "validate:unique_resource_keys"
} }
// Validate a single resource is only defined at a single location. We do not allow
// a single resource to be defined at multiple files.
// func validateSingleYamlFile(rv dyn.Value, rp dyn.Path) error {
// rrv, err := dyn.GetByPath(rv, rp)
// if err != nil {
// return err
// }
// _, err = dyn.Walk(rrv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
// locations := v.YamlLocations()
// if len(locations) <= 1 {
// return v, nil
// }
// return v, fmt.Errorf("resource %s has been defined at multiple locations: (%s)", rp.String(), strings.Join(ls, ", "))
// })
// return err
// }
// TODO: Ensure all duplicate key sites are returned to the user in the diagnostics. // TODO: Ensure all duplicate key sites are returned to the user in the diagnostics.
// TODO: Add unit tests for this mutator. // TODO: Add unit tests for this mutator.
// TODO: l is not effective location because of additive semantics. Clarify this in the
// documentation.
func (m *uniqueResourceKeys) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics { func (m *uniqueResourceKeys) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics {
diags := diag.Diagnostics{} diags := diag.Diagnostics{}
paths := make(map[string]dyn.Path) // Map of resource identifiers to their paths.
// Example entry: my_job -> jobs.my_job
seenPaths := make(map[string]dyn.Path)
rv := rb.Config().ReadOnlyValue().Get("resources") rv := rb.Config().ReadOnlyValue().Get("resources")
// Walk the resources tree and accumulate the resource identifiers. // Walk the resources tree and accumulate the resource identifiers.
@ -42,9 +68,9 @@ func (m *uniqueResourceKeys) Apply(ctx context.Context, rb bundle.ReadOnlyBundle
// If the resource identifier already exists in the map, return an error. // If the resource identifier already exists in the map, return an error.
k := p[1].Key() k := p[1].Key()
if _, ok := paths[k]; ok { if _, ok := seenPaths[k]; ok {
// Value of the resource that's already been seen // Value of the resource that's already been seen
ov, _ := dyn.GetByPath(rv, paths[k]) ov, _ := dyn.GetByPath(rv, seenPaths[k])
// Value of the newly encountered resource with a duplicate identifier. // Value of the newly encountered resource with a duplicate identifier.
nv, _ := dyn.GetByPath(rv, p) nv, _ := dyn.GetByPath(rv, p)
@ -52,13 +78,43 @@ func (m *uniqueResourceKeys) Apply(ctx context.Context, rb bundle.ReadOnlyBundle
// Error, encountered a duplicate resource identifier. // Error, encountered a duplicate resource identifier.
diags = append(diags, diag.Diagnostic{ diags = append(diags, diag.Diagnostic{
Severity: diag.Error, Severity: diag.Error,
Summary: fmt.Sprintf("multiple resources named %s (%s at %s, %s at %s)", k, paths[k].String(), ov.Location(), p.String(), nv.Location()), Summary: fmt.Sprintf("multiple resources named %s (%s at %s, %s at %s)", k, seenPaths[k].String(), ov.Location(), p.String(), nv.Location()),
Location: nv.Location(), Location: nv.Location(),
}) })
} }
s := p.String()
if s == "jobs.foo" {
fmt.Println("jobs.foo")
}
yamlLocations := v.YamlLocations()
if len(yamlLocations) > 1 {
// Sort locations to make the error message deterministic.
ls := make([]string, 0, len(yamlLocations))
for _, l := range yamlLocations {
ls = append(ls, l.String())
}
slices.Sort(ls)
diags = append(diags, diag.Diagnostic{
Severity: diag.Error,
Summary: fmt.Sprintf("resource %s has been defined at multiple locations: (%s)", p.String(), strings.Join(ls, ", ")),
Location: v.Location(),
})
}
// err := validateSingleYamlFile(rv, p)
// if err != nil {
// diags = append(diags, diag.Diagnostic{
// Severity: diag.Error,
// Summary: err.Error(),
// Location: v.Location(),
// })
// }
// Accumulate the resource identifier and its path. // Accumulate the resource identifier and its path.
paths[k] = p seenPaths[k] = p
return v, nil return v, nil
}) })

View File

@ -4,6 +4,9 @@ bundle:
workspace: workspace:
profile: test profile: test
include:
- ./resources.yml
resources: resources:
jobs: jobs:
foo: foo:

View File

@ -14,26 +14,26 @@ func TestValidateUniqueResourceIdentifiers(t *testing.T) {
name string name string
errorMsg string errorMsg string
}{ }{
{
name: "duplicate_resource_names_in_root_job_and_pipeline",
errorMsg: "multiple resources named foo (jobs.foo at validate/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml:10:7, pipelines.foo at validate/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml:13:7)",
},
{
name: "duplicate_resource_names_in_root_job_and_experiment",
errorMsg: "multiple resources named foo (experiments.foo at validate/duplicate_resource_names_in_root_job_and_experiment/databricks.yml:18:7, jobs.foo at validate/duplicate_resource_names_in_root_job_and_experiment/databricks.yml:10:7)",
},
{
name: "duplicate_resource_name_in_subconfiguration",
errorMsg: "multiple resources named foo (jobs.foo at validate/duplicate_resource_name_in_subconfiguration/databricks.yml:13:7, pipelines.foo at validate/duplicate_resource_name_in_subconfiguration/resources.yml:4:7)",
},
// { // {
// name: "duplicate_resource_name_in_subconfiguration_job_and_job", // name: "duplicate_resource_names_in_root_job_and_pipeline",
// errorMsg: "multiple resources named foo (jobs.foo at validate/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml:10:7, jobs.foo at validate/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml:4:7)", // errorMsg: "multiple resources named foo (jobs.foo at validate/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml:10:7, pipelines.foo at validate/duplicate_resource_names_in_root_job_and_pipeline/databricks.yml:13:7)",
// },
// {
// name: "duplicate_resource_names_in_root_job_and_experiment",
// errorMsg: "multiple resources named foo (experiments.foo at validate/duplicate_resource_names_in_root_job_and_experiment/databricks.yml:18:7, jobs.foo at validate/duplicate_resource_names_in_root_job_and_experiment/databricks.yml:10:7)",
// },
// {
// name: "duplicate_resource_name_in_subconfiguration",
// errorMsg: "multiple resources named foo (jobs.foo at validate/duplicate_resource_name_in_subconfiguration/databricks.yml:13:7, pipelines.foo at validate/duplicate_resource_name_in_subconfiguration/resources.yml:4:7)",
// }, // },
{ {
name: "duplicate_resource_names_in_different_subconfiguations", name: "duplicate_resource_name_in_subconfiguration_job_and_job",
errorMsg: "multiple resources named foo (jobs.foo at validate/duplicate_resource_names_in_different_subconfiguations/resources1.yml:4:7, pipelines.foo at validate/duplicate_resource_names_in_different_subconfiguations/resources2.yml:4:7)", errorMsg: "resource jobs.foo has been defined at multiple locations: (validate/duplicate_resource_name_in_subconfiguration_job_and_job/databricks.yml:13:13, validate/duplicate_resource_name_in_subconfiguration_job_and_job/resources.yml:4:13)",
}, },
// {
// name: "duplicate_resource_names_in_different_subconfiguations",
// errorMsg: "multiple resources named foo (jobs.foo at validate/duplicate_resource_names_in_different_subconfiguations/resources1.yml:4:7, pipelines.foo at validate/duplicate_resource_names_in_different_subconfiguations/resources2.yml:4:7)",
// },
} }
for _, tc := range tcases { for _, tc := range tcases {

View File

@ -91,7 +91,7 @@ func fromTypedStruct(src reflect.Value, ref dyn.Value) (dyn.Value, error) {
return dyn.InvalidValue, err return dyn.InvalidValue, err
} }
if nv != dyn.NilValue { if !nv.IsNil() {
out.Set(refk, nv) out.Set(refk, nv)
} }
} }

View File

@ -16,7 +16,7 @@ func ToTyped(dst any, src dyn.Value) error {
for dstv.Kind() == reflect.Pointer { for dstv.Kind() == reflect.Pointer {
// If the source value is nil and the destination is a settable pointer, // If the source value is nil and the destination is a settable pointer,
// set the destination to nil. Also see `end_to_end_test.go`. // set the destination to nil. Also see `end_to_end_test.go`.
if dstv.CanSet() && src == dyn.NilValue { if dstv.CanSet() && src.IsNil() {
dstv.SetZero() dstv.SetZero()
return nil return nil
} }

View File

@ -11,6 +11,10 @@ type Location struct {
Column int Column int
} }
// nilLocation is a convenient constant for a nil location.
// TODO: Remove this constant and rely on the file path in the location?
var nilLocation = Location{}
func (l Location) String() string { func (l Location) String() string {
return fmt.Sprintf("%s:%d:%d", l.File, l.Line, l.Column) return fmt.Sprintf("%s:%d:%d", l.File, l.Line, l.Column)
} }

View File

@ -22,7 +22,7 @@ func merge(a, b dyn.Value) (dyn.Value, error) {
// If a is nil, return b. // If a is nil, return b.
if ak == dyn.KindNil { if ak == dyn.KindNil {
return b, nil return b.WithYamlLocation(a.Location()), nil
} }
// If b is nil, return a. // If b is nil, return a.
@ -75,8 +75,9 @@ func mergeMap(a, b dyn.Value) (dyn.Value, error) {
} }
} }
// Preserve the location of the first value. // Preserve the location of the first value. Keep track of the location of the second value
return dyn.NewValue(out, a.Location()), nil // as metadata.
return dyn.NewValue(out, a.Location()).WithYamlLocation(b.Location()), nil
} }
func mergeSequence(a, b dyn.Value) (dyn.Value, error) { func mergeSequence(a, b dyn.Value) (dyn.Value, error) {
@ -88,11 +89,13 @@ func mergeSequence(a, b dyn.Value) (dyn.Value, error) {
copy(out[:], as) copy(out[:], as)
copy(out[len(as):], bs) copy(out[len(as):], bs)
// Preserve the location of the first value. // Preserve the location of the first value. Keep track of the location of the second value
return dyn.NewValue(out, a.Location()), nil // as metadata.
return dyn.NewValue(out, a.Location()).WithYamlLocation(b.Location()), nil
} }
func mergePrimitive(a, b dyn.Value) (dyn.Value, error) { func mergePrimitive(a, b dyn.Value) (dyn.Value, error) {
// Merging primitive values means using the incoming value. // Merging primitive values means using the incoming value. Keep track of the location of the
return b, nil // first value as metadata.
return b.WithYamlLocation(a.Location()), nil
} }

View File

@ -41,6 +41,32 @@ func TestMergeMaps(t *testing.T) {
} }
} }
func TestFoo(t *testing.T) {
v1 := dyn.V(map[string]dyn.Value{
"foo": dyn.V("bar"),
"bar": dyn.V("baz").WithLocation(dyn.Location{File: "abc"}),
})
v2 := dyn.V(map[string]dyn.Value{
"bar": dyn.V("qux").WithLocation(dyn.Location{File: "def"}),
"qux": dyn.V("foo"),
})
// Merge v2 into v1.
{
out, err := Merge(v1, v2)
assert.NoError(t, err)
assert.Equal(t, map[string]any{
"foo": "bar",
"bar": "qux",
"qux": "foo",
}, out.AsAny())
vv, _ := out.MustMap().GetPairByString("bar")
assert.Equal(t, []dyn.Location{}, vv.Value.YamlLocations())
}
}
func TestMergeMapsNil(t *testing.T) { func TestMergeMapsNil(t *testing.T) {
v := dyn.V(map[string]dyn.Value{ v := dyn.V(map[string]dyn.Value{
"foo": dyn.V("bar"), "foo": dyn.V("bar"),

View File

@ -2,14 +2,23 @@ package dyn
import ( import (
"fmt" "fmt"
"slices"
) )
type Value struct { type Value struct {
v any v any
k Kind k Kind
// Effective location where this value was defined and loaded from.
l Location l Location
// All YAML locations where this value has been defined. When merging configurations,
// values in a map may override values in another map. This field is used to track
// all YAML locations where a value was defined, even if the values at these locations
// were overridden by other values.
yamlLocations []Location
// Whether or not this value is an anchor. // Whether or not this value is an anchor.
// If this node doesn't map to a type, we don't need to warn about it. // If this node doesn't map to a type, we don't need to warn about it.
anchor bool anchor bool
@ -25,6 +34,10 @@ var NilValue = Value{
k: KindNil, k: KindNil,
} }
func (v Value) IsNil() bool {
return v.k == KindNil && v.v == nil
}
// V constructs a new Value with the given value. // V constructs a new Value with the given value.
func V(v any) Value { func V(v any) Value {
return NewValue(v, Location{}) return NewValue(v, Location{})
@ -37,22 +50,50 @@ func NewValue(v any, loc Location) Value {
v = newMappingFromGoMap(vin) v = newMappingFromGoMap(vin)
} }
yamlLocations := make([]Location, 0)
if loc != nilLocation {
yamlLocations = append(yamlLocations, loc)
}
return Value{ return Value{
v: v, v: v,
k: kindOf(v), k: kindOf(v),
l: loc, l: loc,
yamlLocations: yamlLocations,
} }
} }
// WithLocation returns a new Value with its location set to the given value. // WithLocation returns a new Value with its location set to the given value.
func (v Value) WithLocation(loc Location) Value { func (v Value) WithLocation(loc Location) Value {
if loc != nilLocation {
v.yamlLocations = append(v.yamlLocations, loc)
}
return Value{ return Value{
v: v.v, v: v.v,
k: v.k, k: v.k,
l: loc, l: loc,
yamlLocations: v.yamlLocations,
} }
} }
// WithYamlLocation returns a new Value with the given location added to its
// list of tracked YAML locations.
// This function is idempotent
func (v Value) WithYamlLocation(loc Location) Value {
// Location is already being tracked
if slices.Contains(v.yamlLocations, loc) {
return v
}
// We don't track empty locations.
if loc == nilLocation {
return v
}
v.yamlLocations = append(v.yamlLocations, loc)
return v
}
func (v Value) Kind() Kind { func (v Value) Kind() Kind {
return v.k return v.k
} }
@ -65,6 +106,12 @@ func (v Value) Location() Location {
return v.l return v.l
} }
// All YAML locations where this value has been defined. Is empty if the value
// was never defined in a YAML file.
func (v Value) YamlLocations() []Location {
return v.yamlLocations
}
func (v Value) IsValid() bool { func (v Value) IsValid() bool {
return v.k != KindInvalid return v.k != KindInvalid
} }