From 14abcb3ad7268ea9b5c6498a5d7cd5494eed3cf2 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 24 Jan 2024 19:49:06 +0100 Subject: [PATCH] Add `dynvar` package for variable resolution with a `dyn.Value` tree (#1143) ## Changes This is the `dyn` counterpart to the `bundle/config/interpolation` package. It relies on the paths in `${foo.bar}` being valid `dyn.Path` instances. It leverages `dyn.Walk` to get a complete picture of all variable references and uses `dyn.Get` to retrieve values pointed to by variable references. Depends on #1142. ## Tests Unit test coverage. I tried to mirror the tests from `bundle/config/interpolation` and added new ones where applicable (for example to test type retention of referenced values). --- libs/dyn/dynvar/lookup.go | 21 ++++ libs/dyn/dynvar/lookup_test.go | 27 +++++ libs/dyn/dynvar/ref.go | 69 +++++++++++ libs/dyn/dynvar/ref_test.go | 46 +++++++ libs/dyn/dynvar/resolve.go | 206 ++++++++++++++++++++++++++++++++ libs/dyn/dynvar/resolve_test.go | 184 ++++++++++++++++++++++++++++ 6 files changed, 553 insertions(+) create mode 100644 libs/dyn/dynvar/lookup.go create mode 100644 libs/dyn/dynvar/lookup_test.go create mode 100644 libs/dyn/dynvar/ref.go create mode 100644 libs/dyn/dynvar/ref_test.go create mode 100644 libs/dyn/dynvar/resolve.go create mode 100644 libs/dyn/dynvar/resolve_test.go diff --git a/libs/dyn/dynvar/lookup.go b/libs/dyn/dynvar/lookup.go new file mode 100644 index 00000000..2bc08f47 --- /dev/null +++ b/libs/dyn/dynvar/lookup.go @@ -0,0 +1,21 @@ +package dynvar + +import ( + "errors" + + "github.com/databricks/cli/libs/dyn" +) + +// Lookup is the type of lookup functions that can be used with [Resolve]. +type Lookup func(path dyn.Path) (dyn.Value, error) + +// ErrSkipResolution is returned by a lookup function to indicate that the +// resolution of a variable reference should be skipped. +var ErrSkipResolution = errors.New("skip resolution") + +// DefaultLookup is the default lookup function used by [Resolve]. +func DefaultLookup(in dyn.Value) Lookup { + return func(path dyn.Path) (dyn.Value, error) { + return dyn.GetByPath(in, path) + } +} diff --git a/libs/dyn/dynvar/lookup_test.go b/libs/dyn/dynvar/lookup_test.go new file mode 100644 index 00000000..2341d720 --- /dev/null +++ b/libs/dyn/dynvar/lookup_test.go @@ -0,0 +1,27 @@ +package dynvar_test + +import ( + "testing" + + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/dynvar" + "github.com/stretchr/testify/assert" +) + +func TestDefaultLookup(t *testing.T) { + lookup := dynvar.DefaultLookup(dyn.V(map[string]dyn.Value{ + "a": dyn.V("a"), + "b": dyn.V("b"), + })) + + v1, err := lookup(dyn.NewPath(dyn.Key("a"))) + assert.NoError(t, err) + assert.Equal(t, dyn.V("a"), v1) + + v2, err := lookup(dyn.NewPath(dyn.Key("b"))) + assert.NoError(t, err) + assert.Equal(t, dyn.V("b"), v2) + + _, err = lookup(dyn.NewPath(dyn.Key("c"))) + assert.True(t, dyn.IsNoSuchKeyError(err)) +} diff --git a/libs/dyn/dynvar/ref.go b/libs/dyn/dynvar/ref.go new file mode 100644 index 00000000..e4616c52 --- /dev/null +++ b/libs/dyn/dynvar/ref.go @@ -0,0 +1,69 @@ +package dynvar + +import ( + "regexp" + + "github.com/databricks/cli/libs/dyn" +) + +var re = regexp.MustCompile(`\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}`) + +// ref represents a variable reference. +// It is a string [dyn.Value] contained in a larger [dyn.Value]. +// Its path within the containing [dyn.Value] is also stored. +type ref struct { + // Original value. + value dyn.Value + + // String value in the original [dyn.Value]. + str string + + // Matches of the variable reference in the string. + matches [][]string +} + +// newRef returns a new ref if the given [dyn.Value] contains a string +// with one or more variable references. It returns false if the given +// [dyn.Value] does not contain variable references. +// +// Examples of a valid variable references: +// - "${a.b}" +// - "${a.b.c}" +// - "${a} ${b} ${c}" +func newRef(v dyn.Value) (ref, bool) { + s, ok := v.AsString() + if !ok { + return ref{}, false + } + + // Check if the string contains any variable references. + m := re.FindAllStringSubmatch(s, -1) + if len(m) == 0 { + return ref{}, false + } + + return ref{ + value: v, + str: s, + matches: m, + }, true +} + +// isPure returns true if the variable reference contains a single +// variable reference and nothing more. We need this so we can +// interpolate values of non-string types (i.e. it can be substituted). +func (v ref) isPure() bool { + // Need single match, equal to the incoming string. + if len(v.matches) == 0 || len(v.matches[0]) == 0 { + panic("invalid variable reference; expect at least one match") + } + return v.matches[0][0] == v.str +} + +func (v ref) references() []string { + var out []string + for _, m := range v.matches { + out = append(out, m[1]) + } + return out +} diff --git a/libs/dyn/dynvar/ref_test.go b/libs/dyn/dynvar/ref_test.go new file mode 100644 index 00000000..b3066276 --- /dev/null +++ b/libs/dyn/dynvar/ref_test.go @@ -0,0 +1,46 @@ +package dynvar + +import ( + "testing" + + "github.com/databricks/cli/libs/dyn" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewRefNoString(t *testing.T) { + _, ok := newRef(dyn.V(1)) + require.False(t, ok, "should not match non-string") +} + +func TestNewRefValidPattern(t *testing.T) { + for in, refs := range map[string][]string{ + "${hello_world.world_world}": {"hello_world.world_world"}, + "${helloworld.world-world}": {"helloworld.world-world"}, + "${hello-world.world-world}": {"hello-world.world-world"}, + } { + ref, ok := newRef(dyn.V(in)) + require.True(t, ok, "should match valid pattern: %s", in) + assert.Equal(t, refs, ref.references()) + } +} + +func TestNewRefInvalidPattern(t *testing.T) { + invalid := []string{ + "${hello_world-.world_world}", // the first segment ending must not end with hyphen (-) + "${hello_world-_.world_world}", // the first segment ending must not end with underscore (_) + "${helloworld.world-world-}", // second segment must not end with hyphen (-) + "${helloworld-.world-world}", // first segment must not end with hyphen (-) + "${helloworld.-world-world}", // second segment must not start with hyphen (-) + "${-hello-world.-world-world-}", // must not start or end with hyphen (-) + "${_-_._-_.id}", // cannot use _- in sequence + "${0helloworld.world-world}", // interpolated first section shouldn't start with number + "${helloworld.9world-world}", // interpolated second section shouldn't start with number + "${a-a.a-_a-a.id}", // fails because of -_ in the second segment + "${a-a.a--a-a.id}", // fails because of -- in the second segment + } + for _, v := range invalid { + _, ok := newRef(dyn.V(v)) + require.False(t, ok, "should not match invalid pattern: %s", v) + } +} diff --git a/libs/dyn/dynvar/resolve.go b/libs/dyn/dynvar/resolve.go new file mode 100644 index 00000000..b4e119b6 --- /dev/null +++ b/libs/dyn/dynvar/resolve.go @@ -0,0 +1,206 @@ +package dynvar + +import ( + "errors" + "fmt" + "slices" + "sort" + "strings" + + "github.com/databricks/cli/libs/dyn" + "golang.org/x/exp/maps" +) + +// Resolve resolves variable references in the given input value using the provided lookup function. +// It returns the resolved output value and any error encountered during the resolution process. +// +// For example, given the input value: +// +// { +// "a": "a", +// "b": "${a}", +// "c": "${b}${b}", +// } +// +// The output value will be: +// +// { +// "a": "a", +// "b": "a", +// "c": "aa", +// } +// +// If the input value contains a variable reference that cannot be resolved, an error is returned. +// If a cycle is detected in the variable references, an error is returned. +// If for some path the resolution function returns [ErrSkipResolution], the variable reference is left in place. +// This is useful when some variable references are not yet ready to be interpolated. +func Resolve(in dyn.Value, fn Lookup) (out dyn.Value, err error) { + return resolver{in: in, fn: fn}.run() +} + +type resolver struct { + in dyn.Value + fn Lookup + + refs map[string]ref + resolved map[string]dyn.Value +} + +func (r resolver) run() (out dyn.Value, err error) { + err = r.collectVariableReferences() + if err != nil { + return dyn.InvalidValue, err + } + + err = r.resolveVariableReferences() + if err != nil { + return dyn.InvalidValue, err + } + + out, err = r.replaceVariableReferences() + if err != nil { + return dyn.InvalidValue, err + } + + return out, nil +} + +func (r *resolver) collectVariableReferences() (err error) { + r.refs = make(map[string]ref) + + // First walk the input to gather all values with a variable reference. + _, err = dyn.Walk(r.in, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + ref, ok := newRef(v) + if !ok { + // Skip values without variable references. + return v, nil + } + + r.refs[p.String()] = ref + return v, nil + }) + + return err +} + +func (r *resolver) resolveVariableReferences() (err error) { + // Initialize map for resolved variables. + // We use this for memoization. + r.resolved = make(map[string]dyn.Value) + + // Resolve each variable reference (in order). + // We sort the keys here to ensure that we always resolve the same variable reference first. + // This is done such that the cycle detection error is deterministic. If we did not do this, + // we could enter the cycle at any point in the cycle and return varying errors. + keys := maps.Keys(r.refs) + sort.Strings(keys) + for _, key := range keys { + _, err := r.resolve(key, []string{key}) + if err != nil { + return err + } + } + + return nil +} + +func (r *resolver) resolve(key string, seen []string) (dyn.Value, error) { + // Check if we have already resolved this variable reference. + if v, ok := r.resolved[key]; ok { + return v, nil + } + + ref, ok := r.refs[key] + if !ok { + // Perform lookup in the input. + p, err := dyn.NewPathFromString(key) + if err != nil { + return dyn.InvalidValue, err + } + v, err := r.fn(p) + if err != nil && dyn.IsNoSuchKeyError(err) { + return dyn.InvalidValue, fmt.Errorf( + "reference does not exist: ${%s}", + key, + ) + } + return v, err + } + + // This is an unresolved variable reference. + deps := ref.references() + + // Resolve each of the dependencies, then interpolate them in the ref. + resolved := make([]dyn.Value, len(deps)) + complete := true + + for j, dep := range deps { + // Cycle detection. + if slices.Contains(seen, dep) { + return dyn.InvalidValue, fmt.Errorf( + "cycle detected in field resolution: %s", + strings.Join(append(seen, dep), " -> "), + ) + } + + v, err := r.resolve(dep, append(seen, dep)) + + // If we should skip resolution of this key, index j will hold an invalid [dyn.Value]. + if errors.Is(err, ErrSkipResolution) { + complete = false + continue + } else if err != nil { + // Otherwise, propagate the error. + return dyn.InvalidValue, err + } + + resolved[j] = v + } + + // Interpolate the resolved values. + if ref.isPure() && complete { + // If the variable reference is pure, we can substitute it. + // This is useful for interpolating values of non-string types. + r.resolved[key] = resolved[0] + return resolved[0], nil + } + + // Not pure; perform string interpolation. + for j := range ref.matches { + // The value is invalid if resolution returned [ErrSkipResolution]. + // We must skip those and leave the original variable reference in place. + if !resolved[j].IsValid() { + continue + } + + // Try to turn the resolved value into a string. + s, ok := resolved[j].AsString() + if !ok { + return dyn.InvalidValue, fmt.Errorf( + "cannot interpolate non-string value: %s", + ref.matches[j][0], + ) + } + + ref.str = strings.Replace(ref.str, ref.matches[j][0], s, 1) + } + + // Store the interpolated value. + v := dyn.NewValue(ref.str, ref.value.Location()) + r.resolved[key] = v + return v, nil +} + +func (r *resolver) replaceVariableReferences() (dyn.Value, error) { + // Walk the input and replace all variable references. + return dyn.Walk(r.in, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + nv, ok := r.resolved[p.String()] + if !ok { + // No variable reference; return the original value. + return v, nil + } + + // We have a variable reference; return the resolved value. + return nv, nil + }) +} diff --git a/libs/dyn/dynvar/resolve_test.go b/libs/dyn/dynvar/resolve_test.go new file mode 100644 index 00000000..ba700503 --- /dev/null +++ b/libs/dyn/dynvar/resolve_test.go @@ -0,0 +1,184 @@ +package dynvar_test + +import ( + "testing" + + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/dynvar" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func getByPath(t *testing.T, v dyn.Value, path string) dyn.Value { + v, err := dyn.Get(v, path) + require.NoError(t, err) + return v +} + +func TestResolve(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "a": dyn.V("a"), + "b": dyn.V("${a}"), + "c": dyn.V("${a}"), + }) + + out, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.NoError(t, err) + + assert.Equal(t, "a", getByPath(t, out, "a").MustString()) + assert.Equal(t, "a", getByPath(t, out, "b").MustString()) + assert.Equal(t, "a", getByPath(t, out, "c").MustString()) +} + +func TestResolveNotFound(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "b": dyn.V("${a}"), + }) + + _, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.ErrorContains(t, err, `reference does not exist: ${a}`) +} + +func TestResolveWithNesting(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "a": dyn.V("${f.a}"), + "f": dyn.V(map[string]dyn.Value{ + "a": dyn.V("a"), + "b": dyn.V("${f.a}"), + }), + }) + + out, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.NoError(t, err) + + assert.Equal(t, "a", getByPath(t, out, "a").MustString()) + assert.Equal(t, "a", getByPath(t, out, "f.a").MustString()) + assert.Equal(t, "a", getByPath(t, out, "f.b").MustString()) +} + +func TestResolveWithRecursion(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "a": dyn.V("a"), + "b": dyn.V("${a}"), + "c": dyn.V("${b}"), + }) + + out, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.NoError(t, err) + + assert.Equal(t, "a", getByPath(t, out, "a").MustString()) + assert.Equal(t, "a", getByPath(t, out, "b").MustString()) + assert.Equal(t, "a", getByPath(t, out, "c").MustString()) +} + +func TestResolveWithRecursionLoop(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "a": dyn.V("a"), + "b": dyn.V("${c}"), + "c": dyn.V("${d}"), + "d": dyn.V("${b}"), + }) + + _, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + assert.ErrorContains(t, err, "cycle detected in field resolution: b -> c -> d -> b") +} + +func TestResolveWithRecursionLoopSelf(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "a": dyn.V("${a}"), + }) + + _, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + assert.ErrorContains(t, err, "cycle detected in field resolution: a -> a") +} + +func TestResolveWithStringConcatenation(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "a": dyn.V("a"), + "b": dyn.V("b"), + "c": dyn.V("${a}${b}${a}"), + }) + + out, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.NoError(t, err) + + assert.Equal(t, "a", getByPath(t, out, "a").MustString()) + assert.Equal(t, "b", getByPath(t, out, "b").MustString()) + assert.Equal(t, "aba", getByPath(t, out, "c").MustString()) +} + +func TestResolveWithTypeRetentionFailure(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "a": dyn.V(1), + "b": dyn.V(2), + "c": dyn.V("${a} ${b}"), + }) + + _, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.ErrorContains(t, err, "cannot interpolate non-string value: ${a}") +} + +func TestResolveWithTypeRetention(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "int": dyn.V(1), + "int_var": dyn.V("${int}"), + "bool_true": dyn.V(true), + "bool_true_var": dyn.V("${bool_true}"), + "bool_false": dyn.V(false), + "bool_false_var": dyn.V("${bool_false}"), + "float": dyn.V(1.0), + "float_var": dyn.V("${float}"), + "string": dyn.V("a"), + "string_var": dyn.V("${string}"), + }) + + out, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.NoError(t, err) + + assert.EqualValues(t, 1, getByPath(t, out, "int").MustInt()) + assert.EqualValues(t, 1, getByPath(t, out, "int_var").MustInt()) + + assert.EqualValues(t, true, getByPath(t, out, "bool_true").MustBool()) + assert.EqualValues(t, true, getByPath(t, out, "bool_true_var").MustBool()) + + assert.EqualValues(t, false, getByPath(t, out, "bool_false").MustBool()) + assert.EqualValues(t, false, getByPath(t, out, "bool_false_var").MustBool()) + + assert.EqualValues(t, 1.0, getByPath(t, out, "float").MustFloat()) + assert.EqualValues(t, 1.0, getByPath(t, out, "float_var").MustFloat()) + + assert.EqualValues(t, "a", getByPath(t, out, "string").MustString()) + assert.EqualValues(t, "a", getByPath(t, out, "string_var").MustString()) +} + +func TestResolveWithSkip(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "a": dyn.V("a"), + "b": dyn.V("b"), + "c": dyn.V("${a}"), + "d": dyn.V("${b}"), + "e": dyn.V("${a} ${b}"), + "f": dyn.V("${b} ${a} ${a} ${b}"), + }) + + fallback := dynvar.DefaultLookup(in) + ignore := func(path dyn.Path) (dyn.Value, error) { + // If the variable reference to look up starts with "b", skip it. + if path.HasPrefix(dyn.NewPath(dyn.Key("b"))) { + return dyn.InvalidValue, dynvar.ErrSkipResolution + } + return fallback(path) + } + + out, err := dynvar.Resolve(in, ignore) + require.NoError(t, err) + + assert.Equal(t, "a", getByPath(t, out, "a").MustString()) + assert.Equal(t, "b", getByPath(t, out, "b").MustString()) + assert.Equal(t, "a", getByPath(t, out, "c").MustString()) + + // Check that the skipped variable references are not interpolated. + assert.Equal(t, "${b}", getByPath(t, out, "d").MustString()) + assert.Equal(t, "a ${b}", getByPath(t, out, "e").MustString()) + assert.Equal(t, "${b} a a ${b}", getByPath(t, out, "f").MustString()) +}