From a1297d71fd1646051612d7a4170394f93994b72c Mon Sep 17 00:00:00 2001
From: Pieter Noordhuis <pieter.noordhuis@databricks.com>
Date: Fri, 22 Dec 2023 11:38:09 +0100
Subject: [PATCH] Functionality to walk a `config.Value` tree (#1081)

## Changes

This change adds:
* A `config.Walk` function to walk a configuration tree
* A `config.Path` type to represent a value's path inside a tree
* Functions to create a `config.Path` from a string, or convert one to a
string

## Tests

Additional unit tests with full coverage.
---
 libs/config/path.go             |  96 ++++++++++++
 libs/config/path_string.go      |  89 +++++++++++
 libs/config/path_string_test.go | 100 +++++++++++++
 libs/config/path_test.go        |  76 ++++++++++
 libs/config/walk.go             |  66 +++++++++
 libs/config/walk_test.go        | 254 ++++++++++++++++++++++++++++++++
 6 files changed, 681 insertions(+)
 create mode 100644 libs/config/path.go
 create mode 100644 libs/config/path_string.go
 create mode 100644 libs/config/path_string_test.go
 create mode 100644 libs/config/path_test.go
 create mode 100644 libs/config/walk.go
 create mode 100644 libs/config/walk_test.go

diff --git a/libs/config/path.go b/libs/config/path.go
new file mode 100644
index 00000000..f1abf48c
--- /dev/null
+++ b/libs/config/path.go
@@ -0,0 +1,96 @@
+package config
+
+import (
+	"bytes"
+	"fmt"
+)
+
+type pathComponent struct {
+	key   string
+	index int
+}
+
+// Path represents a path to a value in a [Value] configuration tree.
+type Path []pathComponent
+
+// EmptyPath is the empty path.
+// It is defined for convenience and clarity.
+var EmptyPath = Path{}
+
+// Key returns a path component for a key.
+func Key(k string) pathComponent {
+	return pathComponent{key: k}
+}
+
+// Index returns a path component for an index.
+func Index(i int) pathComponent {
+	return pathComponent{index: i}
+}
+
+// NewPath returns a new path from the given components.
+// The individual components may be created with [Key] or [Index].
+func NewPath(cs ...pathComponent) Path {
+	return cs
+}
+
+// Join joins the given paths.
+func (p Path) Join(qs ...Path) Path {
+	for _, q := range qs {
+		p = p.Append(q...)
+	}
+	return p
+}
+
+// Append appends the given components to the path.
+func (p Path) Append(cs ...pathComponent) Path {
+	return append(p, cs...)
+}
+
+// Equal returns true if the paths are equal.
+func (p Path) Equal(q Path) bool {
+	pl := len(p)
+	ql := len(q)
+	if pl != ql {
+		return false
+	}
+	for i := 0; i < pl; i++ {
+		if p[i] != q[i] {
+			return false
+		}
+	}
+	return true
+}
+
+// HasPrefix returns true if the path has the specified prefix.
+// The empty path is a prefix of all paths.
+func (p Path) HasPrefix(q Path) bool {
+	pl := len(p)
+	ql := len(q)
+	if pl < ql {
+		return false
+	}
+	for i := 0; i < ql; i++ {
+		if p[i] != q[i] {
+			return false
+		}
+	}
+	return true
+}
+
+// String returns a string representation of the path.
+func (p Path) String() string {
+	var buf bytes.Buffer
+
+	for i, c := range p {
+		if i > 0 && c.key != "" {
+			buf.WriteRune('.')
+		}
+		if c.key != "" {
+			buf.WriteString(c.key)
+		} else {
+			buf.WriteString(fmt.Sprintf("[%d]", c.index))
+		}
+	}
+
+	return buf.String()
+}
diff --git a/libs/config/path_string.go b/libs/config/path_string.go
new file mode 100644
index 00000000..9538ad27
--- /dev/null
+++ b/libs/config/path_string.go
@@ -0,0 +1,89 @@
+package config
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// MustPathFromString is like NewPathFromString but panics on error.
+func MustPathFromString(input string) Path {
+	p, err := NewPathFromString(input)
+	if err != nil {
+		panic(err)
+	}
+	return p
+}
+
+// NewPathFromString parses a path from a string.
+//
+// The string must be a sequence of keys and indices separated by dots.
+// Indices must be enclosed in square brackets.
+// The string may include a leading dot.
+//
+// Examples:
+//   - foo.bar
+//   - foo[1].bar
+//   - foo.bar[1]
+//   - foo.bar[1][2]
+//   - .
+func NewPathFromString(input string) (Path, error) {
+	var path Path
+
+	p := input
+
+	// Trim leading dot.
+	if p != "" && p[0] == '.' {
+		p = p[1:]
+	}
+
+	for p != "" {
+		// Every component may have a leading dot.
+		if p != "" && p[0] == '.' {
+			p = p[1:]
+		}
+
+		if p == "" {
+			return nil, fmt.Errorf("invalid path: %s", input)
+		}
+
+		if p[0] == '[' {
+			// Find next ]
+			i := strings.Index(p, "]")
+			if i < 0 {
+				return nil, fmt.Errorf("invalid path: %s", input)
+			}
+
+			// Parse index
+			j, err := strconv.Atoi(p[1:i])
+			if err != nil {
+				return nil, fmt.Errorf("invalid path: %s", input)
+			}
+
+			// Append index
+			path = append(path, Index(j))
+			p = p[i+1:]
+
+			// The next character must be a . or [
+			if p != "" && strings.IndexAny(p, ".[") != 0 {
+				return nil, fmt.Errorf("invalid path: %s", input)
+			}
+		} else {
+			// Find next . or [
+			i := strings.IndexAny(p, ".[")
+			if i < 0 {
+				i = len(p)
+			}
+
+			if i == 0 {
+				return nil, fmt.Errorf("invalid path: %s", input)
+			}
+
+			// Append key
+			path = append(path, Key(p[:i]))
+			p = p[i:]
+		}
+	}
+
+	return path, nil
+}
diff --git a/libs/config/path_string_test.go b/libs/config/path_string_test.go
new file mode 100644
index 00000000..89e64561
--- /dev/null
+++ b/libs/config/path_string_test.go
@@ -0,0 +1,100 @@
+package config_test
+
+import (
+	"fmt"
+	"testing"
+
+	. "github.com/databricks/cli/libs/config"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestNewPathFromString(t *testing.T) {
+	for _, tc := range []struct {
+		input  string
+		output Path
+		err    error
+	}{
+		{
+			input:  "",
+			output: NewPath(),
+		},
+		{
+			input:  ".",
+			output: NewPath(),
+		},
+		{
+			input:  "foo.bar",
+			output: NewPath(Key("foo"), Key("bar")),
+		},
+		{
+			input:  "[1]",
+			output: NewPath(Index(1)),
+		},
+		{
+			input:  "foo[1].bar",
+			output: NewPath(Key("foo"), Index(1), Key("bar")),
+		},
+		{
+			input:  "foo.bar[1]",
+			output: NewPath(Key("foo"), Key("bar"), Index(1)),
+		},
+		{
+			input:  "foo.bar[1][2]",
+			output: NewPath(Key("foo"), Key("bar"), Index(1), Index(2)),
+		},
+		{
+			input:  "foo.bar[1][2][3]",
+			output: NewPath(Key("foo"), Key("bar"), Index(1), Index(2), Index(3)),
+		},
+		{
+			input:  "foo[1234]",
+			output: NewPath(Key("foo"), Index(1234)),
+		},
+		{
+			input: "foo[123",
+			err:   fmt.Errorf("invalid path: foo[123"),
+		},
+		{
+			input: "foo[123]]",
+			err:   fmt.Errorf("invalid path: foo[123]]"),
+		},
+		{
+			input: "foo[[123]",
+			err:   fmt.Errorf("invalid path: foo[[123]"),
+		},
+		{
+			input: "foo[[123]]",
+			err:   fmt.Errorf("invalid path: foo[[123]]"),
+		},
+		{
+			input: "foo[foo]",
+			err:   fmt.Errorf("invalid path: foo[foo]"),
+		},
+		{
+			input: "foo..bar",
+			err:   fmt.Errorf("invalid path: foo..bar"),
+		},
+		{
+			input: "foo.bar.",
+			err:   fmt.Errorf("invalid path: foo.bar."),
+		},
+		{
+			// Every component may have a leading dot.
+			input:  ".foo.[1].bar",
+			output: NewPath(Key("foo"), Index(1), Key("bar")),
+		},
+		{
+			// But after an index there must be a dot.
+			input: "foo[1]bar",
+			err:   fmt.Errorf("invalid path: foo[1]bar"),
+		},
+	} {
+		p, err := NewPathFromString(tc.input)
+		if tc.err != nil {
+			assert.EqualError(t, err, tc.err.Error(), tc.input)
+		} else {
+			assert.NoError(t, err)
+			assert.Equal(t, tc.output, p)
+		}
+	}
+}
diff --git a/libs/config/path_test.go b/libs/config/path_test.go
new file mode 100644
index 00000000..3fdd848e
--- /dev/null
+++ b/libs/config/path_test.go
@@ -0,0 +1,76 @@
+package config_test
+
+import (
+	"testing"
+
+	"github.com/databricks/cli/libs/config"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestPathAppend(t *testing.T) {
+	p := config.NewPath(config.Key("foo"))
+
+	// Single arg.
+	p1 := p.Append(config.Key("bar"))
+	assert.True(t, p1.Equal(config.NewPath(config.Key("foo"), config.Key("bar"))))
+
+	// Multiple args.
+	p2 := p.Append(config.Key("bar"), config.Index(1))
+	assert.True(t, p2.Equal(config.NewPath(config.Key("foo"), config.Key("bar"), config.Index(1))))
+}
+
+func TestPathJoin(t *testing.T) {
+	p := config.NewPath(config.Key("foo"))
+
+	// Single arg.
+	p1 := p.Join(config.NewPath(config.Key("bar")))
+	assert.True(t, p1.Equal(config.NewPath(config.Key("foo"), config.Key("bar"))))
+
+	// Multiple args.
+	p2 := p.Join(config.NewPath(config.Key("bar")), config.NewPath(config.Index(1)))
+	assert.True(t, p2.Equal(config.NewPath(config.Key("foo"), config.Key("bar"), config.Index(1))))
+}
+
+func TestPathEqualEmpty(t *testing.T) {
+	assert.True(t, config.EmptyPath.Equal(config.EmptyPath))
+}
+
+func TestPathEqual(t *testing.T) {
+	p1 := config.NewPath(config.Key("foo"), config.Index(1))
+	p2 := config.NewPath(config.Key("bar"), config.Index(2))
+	assert.False(t, p1.Equal(p2), "expected %q to not equal %q", p1, p2)
+
+	p3 := config.NewPath(config.Key("foo"), config.Index(1))
+	assert.True(t, p1.Equal(p3), "expected %q to equal %q", p1, p3)
+
+	p4 := config.NewPath(config.Key("foo"), config.Index(1), config.Key("bar"), config.Index(2))
+	assert.False(t, p1.Equal(p4), "expected %q to not equal %q", p1, p4)
+}
+
+func TestPathHasPrefixEmpty(t *testing.T) {
+	empty := config.EmptyPath
+	nonEmpty := config.NewPath(config.Key("foo"))
+	assert.True(t, empty.HasPrefix(empty))
+	assert.True(t, nonEmpty.HasPrefix(empty))
+	assert.False(t, empty.HasPrefix(nonEmpty))
+}
+
+func TestPathHasPrefix(t *testing.T) {
+	p1 := config.NewPath(config.Key("foo"), config.Index(1))
+	p2 := config.NewPath(config.Key("bar"), config.Index(2))
+	assert.False(t, p1.HasPrefix(p2), "expected %q to not have prefix %q", p1, p2)
+
+	p3 := config.NewPath(config.Key("foo"))
+	assert.True(t, p1.HasPrefix(p3), "expected %q to have prefix %q", p1, p3)
+}
+
+func TestPathString(t *testing.T) {
+	p1 := config.NewPath(config.Key("foo"), config.Index(1))
+	assert.Equal(t, "foo[1]", p1.String())
+
+	p2 := config.NewPath(config.Key("bar"), config.Index(2), config.Key("baz"))
+	assert.Equal(t, "bar[2].baz", p2.String())
+
+	p3 := config.NewPath(config.Key("foo"), config.Index(1), config.Key("bar"), config.Index(2), config.Key("baz"))
+	assert.Equal(t, "foo[1].bar[2].baz", p3.String())
+}
diff --git a/libs/config/walk.go b/libs/config/walk.go
new file mode 100644
index 00000000..ce058338
--- /dev/null
+++ b/libs/config/walk.go
@@ -0,0 +1,66 @@
+package config
+
+import "errors"
+
+// WalkValueFunc is the type of the function called by Walk to traverse the configuration tree.
+type WalkValueFunc func(p Path, v Value) (Value, error)
+
+// ErrDrop may be returned by WalkValueFunc to remove a value from the subtree.
+var ErrDrop = errors.New("drop value from subtree")
+
+// ErrSkip may be returned by WalkValueFunc to skip traversal of a subtree.
+var ErrSkip = errors.New("skip traversal of subtree")
+
+// Walk walks the configuration tree and calls the given function on each node.
+// The callback may return ErrDrop to remove a value from the subtree.
+// The callback may return ErrSkip to skip traversal of a subtree.
+// If the callback returns another error, the walk is aborted, and the error is returned.
+func Walk(v Value, fn func(p Path, v Value) (Value, error)) (Value, error) {
+	return walk(v, EmptyPath, fn)
+}
+
+// Unexported counterpart to Walk.
+// It carries the path leading up to the current node,
+// such that it can be passed to the WalkValueFunc.
+func walk(v Value, p Path, fn func(p Path, v Value) (Value, error)) (Value, error) {
+	v, err := fn(p, v)
+	if err != nil {
+		if err == ErrSkip {
+			return v, nil
+		}
+		return NilValue, err
+	}
+
+	switch v.Kind() {
+	case KindMap:
+		m := v.MustMap()
+		out := make(map[string]Value, len(m))
+		for k := range m {
+			nv, err := walk(m[k], p.Append(Key(k)), fn)
+			if err == ErrDrop {
+				continue
+			}
+			if err != nil {
+				return NilValue, err
+			}
+			out[k] = nv
+		}
+		v.v = out
+	case KindSequence:
+		s := v.MustSequence()
+		out := make([]Value, 0, len(s))
+		for i := range s {
+			nv, err := walk(s[i], p.Append(Index(i)), fn)
+			if err == ErrDrop {
+				continue
+			}
+			if err != nil {
+				return NilValue, err
+			}
+			out = append(out, nv)
+		}
+		v.v = out
+	}
+
+	return v, nil
+}
diff --git a/libs/config/walk_test.go b/libs/config/walk_test.go
new file mode 100644
index 00000000..806ca256
--- /dev/null
+++ b/libs/config/walk_test.go
@@ -0,0 +1,254 @@
+package config_test
+
+import (
+	"errors"
+	"testing"
+
+	. "github.com/databricks/cli/libs/config"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// Return values for specific paths.
+type walkReturn struct {
+	path Path
+
+	// Return values.
+	fn  func(Value) Value
+	err error
+}
+
+// Track the calls to the callback.
+type walkCall struct {
+	path  Path
+	value Value
+}
+
+// Track the calls to the callback.
+type walkCallTracker struct {
+	returns []walkReturn
+	calls   []walkCall
+}
+
+func (w *walkCallTracker) on(path string, fn func(Value) Value, err error) {
+	w.returns = append(w.returns, walkReturn{MustPathFromString(path), fn, err})
+}
+
+func (w *walkCallTracker) returnSkip(path string) {
+	w.on(path, func(v Value) Value { return v }, ErrSkip)
+}
+
+func (w *walkCallTracker) returnDrop(path string) {
+	w.on(path, func(v Value) Value { return NilValue }, ErrDrop)
+}
+
+func (w *walkCallTracker) track(p Path, v Value) (Value, error) {
+	w.calls = append(w.calls, walkCall{p, v})
+
+	// Look for matching return.
+	for _, r := range w.returns {
+		if p.Equal(r.path) {
+			return r.fn(v), r.err
+		}
+	}
+
+	return v, nil
+}
+
+func TestWalkEmpty(t *testing.T) {
+	var tracker walkCallTracker
+
+	value := V(nil)
+	out, err := Walk(value, tracker.track)
+	require.NoError(t, err)
+	assert.Equal(t, value, out)
+
+	// The callback should have been called once.
+	assert.Len(t, tracker.calls, 1)
+
+	// The call should have been made with the empty path.
+	assert.Equal(t, EmptyPath, tracker.calls[0].path)
+
+	// The value should be the same as the input.
+	assert.Equal(t, value, tracker.calls[0].value)
+}
+
+func TestWalkMapSkip(t *testing.T) {
+	var tracker walkCallTracker
+
+	// Skip traversal of the root value.
+	tracker.returnSkip(".")
+
+	value := V(map[string]Value{
+		"key": V("value"),
+	})
+	out, err := Walk(value, tracker.track)
+	require.NoError(t, err)
+	assert.Equal(
+		t,
+		V(map[string]Value{
+			"key": V("value"),
+		}),
+		out,
+	)
+
+	// The callback should have been called once.
+	assert.Len(t, tracker.calls, 1)
+
+	// The call should have been made with the empty path.
+	assert.Equal(t, EmptyPath, tracker.calls[0].path)
+
+	// The value should be the same as the input.
+	assert.Equal(t, value, tracker.calls[0].value)
+}
+
+func TestWalkMapDrop(t *testing.T) {
+	var tracker walkCallTracker
+
+	// Drop the value at key "foo".
+	tracker.returnDrop(".foo")
+
+	value := V(map[string]Value{
+		"foo": V("bar"),
+		"bar": V("baz"),
+	})
+	out, err := Walk(value, tracker.track)
+	require.NoError(t, err)
+	assert.Equal(
+		t,
+		V(map[string]Value{
+			"bar": V("baz"),
+		}),
+		out,
+	)
+
+	// The callback should have been called for the root and every key in the map.
+	assert.Len(t, tracker.calls, 3)
+
+	// Calls 2 and 3 have been made for the keys in the map.
+	assert.ElementsMatch(t,
+		[]Path{
+			tracker.calls[1].path,
+			tracker.calls[2].path,
+		}, []Path{
+			MustPathFromString(".foo"),
+			MustPathFromString(".bar"),
+		})
+}
+
+func TestWalkMapError(t *testing.T) {
+	var tracker walkCallTracker
+
+	// Return an error from the callback for key "foo".
+	cerr := errors.New("error!")
+	tracker.on(".foo", func(v Value) Value { return v }, cerr)
+
+	value := V(map[string]Value{
+		"foo": V("bar"),
+	})
+	out, err := Walk(value, tracker.track)
+	assert.Equal(t, cerr, err)
+	assert.Equal(t, NilValue, out)
+
+	// The callback should have been called twice.
+	assert.Len(t, tracker.calls, 2)
+
+	// The second call was for the value at key "foo".
+	assert.Equal(t, MustPathFromString(".foo"), tracker.calls[1].path)
+}
+
+func TestWalkSequenceSkip(t *testing.T) {
+	var tracker walkCallTracker
+
+	// Skip traversal of the root value.
+	tracker.returnSkip(".")
+
+	value := V([]Value{
+		V("foo"),
+		V("bar"),
+	})
+	out, err := Walk(value, tracker.track)
+	require.NoError(t, err)
+	assert.Equal(
+		t,
+		V([]Value{
+			V("foo"),
+			V("bar"),
+		}),
+		out,
+	)
+
+	// The callback should have been called once.
+	assert.Len(t, tracker.calls, 1)
+
+	// The call should have been made with the empty path.
+	assert.Equal(t, EmptyPath, tracker.calls[0].path)
+
+	// The value should be the same as the input.
+	assert.Equal(t, value, tracker.calls[0].value)
+}
+
+func TestWalkSequenceDrop(t *testing.T) {
+	var tracker walkCallTracker
+
+	// Drop the value at index 1.
+	tracker.returnDrop(".[1]")
+
+	value := V([]Value{
+		V("foo"),
+		V("bar"),
+		V("baz"),
+	})
+	out, err := Walk(value, tracker.track)
+	require.NoError(t, err)
+	assert.Equal(
+		t,
+		V([]Value{
+			V("foo"),
+			V("baz"),
+		}),
+		out,
+	)
+
+	// The callback should have been called for the root and every value in the sequence.
+	assert.Len(t, tracker.calls, 4)
+
+	// The second call was for the value at index 0.
+	assert.Equal(t, MustPathFromString(".[0]"), tracker.calls[1].path)
+	assert.Equal(t, V("foo"), tracker.calls[1].value)
+
+	// The third call was for the value at index 1.
+	assert.Equal(t, MustPathFromString(".[1]"), tracker.calls[2].path)
+	assert.Equal(t, V("bar"), tracker.calls[2].value)
+
+	// The fourth call was for the value at index 2.
+	assert.Equal(t, MustPathFromString(".[2]"), tracker.calls[3].path)
+	assert.Equal(t, V("baz"), tracker.calls[3].value)
+}
+
+func TestWalkSequenceError(t *testing.T) {
+	var tracker walkCallTracker
+
+	// Return an error from the callback for index 1.
+	cerr := errors.New("error!")
+	tracker.on(".[1]", func(v Value) Value { return v }, cerr)
+
+	value := V([]Value{
+		V("foo"),
+		V("bar"),
+	})
+	out, err := Walk(value, tracker.track)
+	assert.Equal(t, cerr, err)
+	assert.Equal(t, NilValue, out)
+
+	// The callback should have been called three times.
+	assert.Len(t, tracker.calls, 3)
+
+	// The second call was for the value at index 0.
+	assert.Equal(t, MustPathFromString(".[0]"), tracker.calls[1].path)
+	assert.Equal(t, V("foo"), tracker.calls[1].value)
+
+	// The third call was for the value at index 1.
+	assert.Equal(t, MustPathFromString(".[1]"), tracker.calls[2].path)
+	assert.Equal(t, V("bar"), tracker.calls[2].value)
+}