New YAML loader to support configuration location (#828)

## Changes

In order to support variable interpolation on fields that aren't a
string in the resource types, we need a separate representation of the
bundle configuration tree with the type equivalent of Go's `any`. But
instead of using `any` directly, we can do better and use a custom type
equivalent to `any` that captures additional metadata. In this PR, the
additional metadata is limited to the origin of the configuration value
(file, line number, and column).

The YAML in this commit uses the upstream YAML parser's `yaml.Node` type
to get access to location information. It reimplements the loader that
takes the `yaml.Node` structure and turns it into the configuration tree
we need.

Next steps after this PR:
* Implement configuration tree type checking (against a Go type)
* Implement configuration tree merging (to replace the current merge
functionality)
* Implement conversion to and from the bundle configuration struct
* Perform variable interpolation against this configuration tree (to
support variable interpolation for ints)
* (later) Implement a `jsonloader` that produces the same tree and
includes location information

## Tests

The tests in `yamlloader` perform an equality check on the untyped
output of loading a YAML file between the upstream YAML loader and this
loader. The YAML examples were generated by prompting ChatGPT for
examples that showcase anchors, primitive values, edge cases, etc.
This commit is contained in:
Pieter Noordhuis 2023-10-20 14:56:59 +02:00 committed by GitHub
parent 7b1d972b33
commit ab05f8e6e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 876 additions and 1 deletions

3
go.mod
View File

@ -31,6 +31,8 @@ require (
gopkg.in/ini.v1 v1.67.0 // Apache 2.0
)
require gopkg.in/yaml.v3 v3.0.1
require (
cloud.google.com/go/compute v1.23.0 // indirect
cloud.google.com/go/compute/metadata v0.2.3 // indirect
@ -60,5 +62,4 @@ require (
google.golang.org/grpc v1.58.2 // indirect
google.golang.org/protobuf v1.31.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

13
libs/config/location.go Normal file
View File

@ -0,0 +1,13 @@
package config
import "fmt"
type Location struct {
File string
Line int
Column int
}
func (l Location) String() string {
return fmt.Sprintf("%s:%d:%d", l.File, l.Line, l.Column)
}

View File

@ -0,0 +1,13 @@
package config_test
import (
"testing"
"github.com/databricks/cli/libs/config"
"github.com/stretchr/testify/assert"
)
func TestLocation(t *testing.T) {
loc := config.Location{File: "file", Line: 1, Column: 2}
assert.Equal(t, "file:1:2", loc.String())
}

110
libs/config/value.go Normal file
View File

@ -0,0 +1,110 @@
package config
import "time"
type Value struct {
v any
l Location
// Whether or not this value is an anchor.
// If this node doesn't map to a type, we don't need to warn about it.
anchor bool
}
// NilValue is equal to the zero-value of Value.
var NilValue = Value{}
// NewValue constructs a new Value with the given value and location.
func NewValue(v any, loc Location) Value {
return Value{
v: v,
l: loc,
}
}
func (v Value) AsMap() (map[string]Value, bool) {
m, ok := v.v.(map[string]Value)
return m, ok
}
func (v Value) Location() Location {
return v.l
}
func (v Value) AsAny() any {
switch vv := v.v.(type) {
case map[string]Value:
m := make(map[string]any)
for k, v := range vv {
m[k] = v.AsAny()
}
return m
case []Value:
a := make([]any, len(vv))
for i, v := range vv {
a[i] = v.AsAny()
}
return a
case string:
return vv
case bool:
return vv
case int:
return vv
case int32:
return vv
case int64:
return vv
case float32:
return vv
case float64:
return vv
case time.Time:
return vv
case nil:
return nil
default:
// Panic because we only want to deal with known types.
panic("not handled")
}
}
func (v Value) Get(key string) Value {
m, ok := v.AsMap()
if !ok {
return NilValue
}
vv, ok := m[key]
if !ok {
return NilValue
}
return vv
}
func (v Value) Index(i int) Value {
s, ok := v.v.([]Value)
if !ok {
return NilValue
}
if i < 0 || i >= len(s) {
return NilValue
}
return s[i]
}
func (v Value) MarkAnchor() Value {
return Value{
v: v.v,
l: v.l,
anchor: true,
}
}
func (v Value) IsAnchor() bool {
return v.anchor
}

37
libs/config/value_test.go Normal file
View File

@ -0,0 +1,37 @@
package config_test
import (
"testing"
"github.com/databricks/cli/libs/config"
"github.com/stretchr/testify/assert"
)
func TestValueIsAnchor(t *testing.T) {
var zero config.Value
assert.False(t, zero.IsAnchor())
mark := zero.MarkAnchor()
assert.True(t, mark.IsAnchor())
}
func TestValueAsMap(t *testing.T) {
var zeroValue config.Value
m, ok := zeroValue.AsMap()
assert.False(t, ok)
assert.Nil(t, m)
var intValue = config.NewValue(1, config.Location{})
m, ok = intValue.AsMap()
assert.False(t, ok)
assert.Nil(t, m)
var mapValue = config.NewValue(
map[string]config.Value{
"key": config.NewValue("value", config.Location{File: "file", Line: 1, Column: 2}),
},
config.Location{File: "file", Line: 1, Column: 2},
)
m, ok = mapValue.AsMap()
assert.True(t, ok)
assert.Len(t, m, 1)
}

View File

@ -0,0 +1,227 @@
package yamlloader
import (
"fmt"
"math"
"strconv"
"strings"
"time"
"github.com/databricks/cli/libs/config"
"gopkg.in/yaml.v3"
)
type loader struct {
path string
}
func errorf(loc config.Location, format string, args ...interface{}) error {
return fmt.Errorf("yaml (%s): %s", loc, fmt.Sprintf(format, args...))
}
func newLoader(path string) *loader {
return &loader{
path: path,
}
}
func (d *loader) location(node *yaml.Node) config.Location {
return config.Location{
File: d.path,
Line: node.Line,
Column: node.Column,
}
}
func (d *loader) load(node *yaml.Node) (config.Value, error) {
loc := config.Location{
File: d.path,
Line: node.Line,
Column: node.Column,
}
var value config.Value
var err error
switch node.Kind {
case yaml.DocumentNode:
value, err = d.loadDocument(node, loc)
case yaml.SequenceNode:
value, err = d.loadSequence(node, loc)
case yaml.MappingNode:
value, err = d.loadMapping(node, loc)
case yaml.ScalarNode:
value, err = d.loadScalar(node, loc)
case yaml.AliasNode:
value, err = d.loadAlias(node, loc)
default:
return config.NilValue, errorf(loc, "unknown node kind: %v", node.Kind)
}
if err != nil {
return value, err
}
// Mark value as anchor if needed.
// If this node doesn't map to a type, we don't need to warn about it.
if node.Anchor != "" {
value = value.MarkAnchor()
}
return value, nil
}
func (d *loader) loadDocument(node *yaml.Node, loc config.Location) (config.Value, error) {
return d.load(node.Content[0])
}
func (d *loader) loadSequence(node *yaml.Node, loc config.Location) (config.Value, error) {
acc := make([]config.Value, len(node.Content))
for i, n := range node.Content {
v, err := d.load(n)
if err != nil {
return config.NilValue, err
}
acc[i] = v
}
return config.NewValue(acc, loc), nil
}
func (d *loader) loadMapping(node *yaml.Node, loc config.Location) (config.Value, error) {
var merge *yaml.Node
acc := make(map[string]config.Value)
for i := 0; i < len(node.Content); i += 2 {
key := node.Content[i]
val := node.Content[i+1]
// Assert that keys are strings
if key.Kind != yaml.ScalarNode {
return config.NilValue, errorf(loc, "key is not a scalar")
}
st := key.ShortTag()
switch st {
case "!!str":
// OK
case "!!merge":
if merge != nil {
panic("merge node already set")
}
merge = val
continue
default:
return config.NilValue, errorf(loc, "invalid key tag: %v", st)
}
v, err := d.load(val)
if err != nil {
return config.NilValue, err
}
acc[key.Value] = v
}
if merge == nil {
return config.NewValue(acc, loc), nil
}
// Build location for the merge node.
var mloc = d.location(merge)
var merr = errorf(mloc, "map merge requires map or sequence of maps as the value")
// Flatten the merge node into a slice of nodes.
// It can be either a single node or a sequence of nodes.
var mnodes []*yaml.Node
switch merge.Kind {
case yaml.SequenceNode:
mnodes = merge.Content
case yaml.AliasNode:
mnodes = []*yaml.Node{merge}
default:
return config.NilValue, merr
}
// Build a sequence of values to merge.
// The entries that we already accumulated have precedence.
var seq []map[string]config.Value
for _, n := range mnodes {
v, err := d.load(n)
if err != nil {
return config.NilValue, err
}
m, ok := v.AsMap()
if !ok {
return config.NilValue, merr
}
seq = append(seq, m)
}
// Append the accumulated entries to the sequence.
seq = append(seq, acc)
out := make(map[string]config.Value)
for _, m := range seq {
for k, v := range m {
out[k] = v
}
}
return config.NewValue(out, loc), nil
}
func (d *loader) loadScalar(node *yaml.Node, loc config.Location) (config.Value, error) {
st := node.ShortTag()
switch st {
case "!!str":
return config.NewValue(node.Value, loc), nil
case "!!bool":
switch strings.ToLower(node.Value) {
case "true":
return config.NewValue(true, loc), nil
case "false":
return config.NewValue(false, loc), nil
default:
return config.NilValue, errorf(loc, "invalid bool value: %v", node.Value)
}
case "!!int":
i64, err := strconv.ParseInt(node.Value, 10, 64)
if err != nil {
return config.NilValue, errorf(loc, "invalid int value: %v", node.Value)
}
// Use regular int type instead of int64 if possible.
if i64 >= math.MinInt32 && i64 <= math.MaxInt32 {
return config.NewValue(int(i64), loc), nil
}
return config.NewValue(i64, loc), nil
case "!!float":
f64, err := strconv.ParseFloat(node.Value, 64)
if err != nil {
return config.NilValue, errorf(loc, "invalid float value: %v", node.Value)
}
return config.NewValue(f64, loc), nil
case "!!null":
return config.NewValue(nil, loc), nil
case "!!timestamp":
// Try a couple of layouts
for _, layout := range []string{
"2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields.
"2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t".
"2006-1-2 15:4:5.999999999", // space separated with no time zone
"2006-1-2", // date only
} {
t, terr := time.Parse(layout, node.Value)
if terr == nil {
return config.NewValue(t, loc), nil
}
}
return config.NilValue, errorf(loc, "invalid timestamp value: %v", node.Value)
default:
return config.NilValue, errorf(loc, "unknown tag: %v", st)
}
}
func (d *loader) loadAlias(node *yaml.Node, loc config.Location) (config.Value, error) {
return d.load(node.Alias)
}

View File

@ -0,0 +1,12 @@
# 1. Basic Anchor and Alias
defaults: &DEFAULTS
color: red
size: large
shirt1:
<<: *DEFAULTS
pattern: striped
shirt2:
<<: *DEFAULTS
pattern: plain

View File

@ -0,0 +1,13 @@
# 2. Merging Anchors
# Here, multiple anchors can be merged into a single item.
defaults: &DEFAULTS
color: red
size: large
materials: &MATERIALS
primary: cotton
secondary: polyester
shirt:
<<: [*DEFAULTS, *MATERIALS]
pattern: striped

View File

@ -0,0 +1,10 @@
# 3. Overriding Merged Anchor Values
# You can override values when merging.
defaults: &DEFAULTS
color: red
size: large
pattern: plain
shirt:
<<: *DEFAULTS
color: blue

View File

@ -0,0 +1,16 @@
# 4. Nested Anchors
# This demonstrates the reuse of nested content.
address: &ADDRESS
city: San Francisco
state: CA
country: USA
person1:
name: Alice
address: *ADDRESS
person2:
name: Bob
address:
<<: *ADDRESS
city: Los Angeles

View File

@ -0,0 +1,15 @@
# 5. Using Anchors for List Items
# You can use anchors for list items too.
features: &FEATURES
- wifi
- bluetooth
phone1:
name: PhoneA
features: *FEATURES
phone2:
name: PhoneB
features:
- camera
- *FEATURES

View File

@ -0,0 +1,5 @@
# 6. String Anchors
commonMessage: &msg "Hello, World!"
greeting1: *msg
greeting2: *msg

View File

@ -0,0 +1,10 @@
# 7. Number Anchors
defaultAge: &age 25
person1:
name: Alice
age: *age
person2:
name: Bob
age: *age

View File

@ -0,0 +1,10 @@
# 8. Boolean Anchors
isActive: &active true
user1:
username: user1
active: *active
user2:
username: user2
active: *active

View File

@ -0,0 +1,6 @@
# Use string anchor to extend a mapping.
str: &str "Hello world!"
map:
<<: *str
key: value

View File

@ -0,0 +1,6 @@
# Use string anchor inside sequence to extend a mapping.
str: &str "Hello world!"
map:
<<: [*str]
key: value

View File

@ -0,0 +1,4 @@
# Extend a mapping with a literal string.
map:
<<: value
key: value

View File

@ -0,0 +1,90 @@
# Scalars
# Strings can be unquoted
name: John Doe
# Strings can be single quoted
single_quoted_string: 'This is a single quoted string'
# Strings can be double quoted (allows for escape sequences)
double_quoted_string: "This is a double quoted string with an escaped newline \n and tab \t."
# Multiline string with folded style (newlines become spaces)
folded_style: >
This is a very
long string that
spans several lines.
# Multiline string with literal block style (newlines are preserved)
literal_style: |
Line 1
Line 2
Line 3
# Integers
age: 30
# Floating point
price: 19.99
# Boolean values
is_student: true
is_employed: false
# Null value
middle_name: ~ # null can also be represented with 'null'
# Timestamp
timestamp: 2023-01-01T12:00:00Z
# Sequences (lists)
# Inline list
fruits: [apple, banana, cherry]
# Block style list
books:
- 'Moby Dick'
- '1984'
- 'Pride and Prejudice'
# Mappings (dictionaries)
# Inline dictionary
address: {street: '123 Main St', city: 'Anytown', zip: '12345'}
# Block style dictionary
employee:
first_name: Jane
last_name: Smith
age: 40
# Edge cases and advanced features
# Strings that look like other types must be quoted
looks_like_number: '12345'
looks_like_boolean: 'yes'
looks_like_null: 'null'
looks_like_timestamp: '2023-01-01T12:00:00Z'
# Using anchors and aliases to reuse properties
base_address: &base
street: '456 Elm St'
city: 'Sometown'
office_address:
<<: *base # Merge the base address into this mapping
suite: 500
# Nested structures
users:
- name: Alice
age: 28
interests:
- reading
- cycling
- name: Bob
age: 35
interests:
- cooking
- hiking

View File

@ -0,0 +1,49 @@
# Scalars with special characters
# Commas and square brackets in strings should be enclosed in quotes
special_chars: "[This, string] has, special chars."
# Strings starting with reserved indicators must be quoted
reserved_string: "@not_a_directive"
colon_string: "this: looks like a mapping, but it's not"
# Explicit data type declaration
explicit_string: !!str 12345
# Sequences with nested mappings and lists
teams:
-
name: Team A
members:
- Alice
- Bob
tasks:
- task1:
status: incomplete
due_date: 2023-01-15
- task2:
status: complete
# Complex mapping keys
? |
Multi-line key
which is unusual but valid
: multi-line key's value
"complex key: with colon": complex key's value
# Set (a mapping with null values)
set_example:
item1: ~
item2: ~
# Merge multiple mappings (with override)
base_colors: &colors
red: "#FF0000"
blue: "#0000FF"
green: "#00FF00"
theme:
<<: *colors
blue: "#001122" # Overriding the blue color from base_colors

View File

@ -0,0 +1,19 @@
package yamlloader
import (
"io"
"github.com/databricks/cli/libs/config"
"gopkg.in/yaml.v3"
)
func LoadYAML(path string, r io.Reader) (config.Value, error) {
var node yaml.Node
dec := yaml.NewDecoder(r)
err := dec.Decode(&node)
if err != nil {
return config.NilValue, err
}
return newLoader(path).load(&node)
}

View File

@ -0,0 +1,117 @@
package yamlloader_test
import (
"testing"
"github.com/databricks/cli/libs/config"
"github.com/stretchr/testify/assert"
)
func TestYAMLAnchor01(t *testing.T) {
file := "testdata/anchor_01.yml"
self := loadYAML(t, file)
assert.NotEqual(t, config.NilValue, self)
assert.True(t, self.Get("defaults").IsAnchor())
assert.False(t, self.Get("shirt1").IsAnchor())
assert.False(t, self.Get("shirt2").IsAnchor())
pattern := self.Get("shirt1").Get("pattern")
assert.Equal(t, "striped", pattern.AsAny())
assert.Equal(t, config.Location{File: file, Line: 8, Column: 12}, pattern.Location())
}
func TestYAMLAnchor02(t *testing.T) {
file := "testdata/anchor_02.yml"
self := loadYAML(t, file)
assert.NotEqual(t, config.NilValue, self)
color := self.Get("shirt").Get("color")
assert.Equal(t, "red", color.AsAny())
assert.Equal(t, config.Location{File: file, Line: 4, Column: 10}, color.Location())
primary := self.Get("shirt").Get("primary")
assert.Equal(t, "cotton", primary.AsAny())
assert.Equal(t, config.Location{File: file, Line: 8, Column: 12}, primary.Location())
pattern := self.Get("shirt").Get("pattern")
assert.Equal(t, "striped", pattern.AsAny())
assert.Equal(t, config.Location{File: file, Line: 13, Column: 12}, pattern.Location())
}
func TestYAMLAnchor03(t *testing.T) {
file := "testdata/anchor_03.yml"
self := loadYAML(t, file)
assert.NotEqual(t, config.NilValue, self)
// Assert the override took place.
blue := self.Get("shirt").Get("color")
assert.Equal(t, "blue", blue.AsAny())
assert.Equal(t, file, blue.Location().File)
assert.Equal(t, 10, blue.Location().Line)
assert.Equal(t, 10, blue.Location().Column)
}
func TestYAMLAnchor04(t *testing.T) {
file := "testdata/anchor_04.yml"
self := loadYAML(t, file)
assert.NotEqual(t, config.NilValue, self)
p1 := self.Get("person1").Get("address").Get("city")
assert.Equal(t, "San Francisco", p1.AsAny())
assert.Equal(t, config.Location{File: file, Line: 4, Column: 9}, p1.Location())
p2 := self.Get("person2").Get("address").Get("city")
assert.Equal(t, "Los Angeles", p2.AsAny())
assert.Equal(t, config.Location{File: file, Line: 16, Column: 11}, p2.Location())
}
func TestYAMLAnchor05(t *testing.T) {
file := "testdata/anchor_05.yml"
self := loadYAML(t, file)
assert.NotEqual(t, config.NilValue, self)
features := self.Get("phone1").Get("features")
assert.Equal(t, "wifi", features.Index(0).AsAny())
assert.Equal(t, config.Location{File: file, Line: 4, Column: 5}, features.Index(0).Location())
assert.Equal(t, "bluetooth", features.Index(1).AsAny())
assert.Equal(t, config.Location{File: file, Line: 5, Column: 5}, features.Index(1).Location())
}
func TestYAMLAnchor06(t *testing.T) {
file := "testdata/anchor_06.yml"
self := loadYAML(t, file)
assert.NotEqual(t, config.NilValue, self)
greeting := self.Get("greeting1")
assert.Equal(t, "Hello, World!", greeting.AsAny())
assert.Equal(t, config.Location{File: file, Line: 2, Column: 16}, greeting.Location())
}
func TestYAMLAnchor07(t *testing.T) {
file := "testdata/anchor_07.yml"
self := loadYAML(t, file)
assert.NotEqual(t, config.NilValue, self)
name := self.Get("person1").Get("name")
assert.Equal(t, "Alice", name.AsAny())
assert.Equal(t, config.Location{File: file, Line: 5, Column: 9}, name.Location())
age := self.Get("person1").Get("age")
assert.Equal(t, 25, age.AsAny())
assert.Equal(t, config.Location{File: file, Line: 2, Column: 13}, age.Location())
}
func TestYAMLAnchor08(t *testing.T) {
file := "testdata/anchor_08.yml"
self := loadYAML(t, file)
assert.NotEqual(t, config.NilValue, self)
username := self.Get("user1").Get("username")
assert.Equal(t, "user1", username.AsAny())
assert.Equal(t, config.Location{File: file, Line: 5, Column: 13}, username.Location())
active := self.Get("user1").Get("active")
assert.Equal(t, true, active.AsAny())
assert.Equal(t, config.Location{File: file, Line: 2, Column: 11}, active.Location())
}

View File

@ -0,0 +1,36 @@
package yamlloader_test
import (
"bytes"
"os"
"testing"
"github.com/databricks/cli/libs/config/yamlloader"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
)
func TestYAMLErrorMapMerge(t *testing.T) {
for _, file := range []string{
"testdata/error_01.yml",
"testdata/error_02.yml",
"testdata/error_03.yml",
} {
input, err := os.ReadFile(file)
require.NoError(t, err)
t.Run(file, func(t *testing.T) {
t.Run("reference", func(t *testing.T) {
var ref any
err = yaml.Unmarshal(input, &ref)
assert.ErrorContains(t, err, "map merge requires map or sequence of maps as the value")
})
t.Run("self", func(t *testing.T) {
_, err := yamlloader.LoadYAML(file, bytes.NewBuffer(input))
assert.ErrorContains(t, err, "map merge requires map or sequence of maps as the value")
})
})
}
}

View File

@ -0,0 +1,26 @@
package yamlloader_test
import (
"testing"
"github.com/databricks/cli/libs/config"
"github.com/stretchr/testify/assert"
)
func TestYAMLMix01(t *testing.T) {
file := "testdata/mix_01.yml"
self := loadYAML(t, file)
assert.NotEqual(t, config.NilValue, self)
assert.True(t, self.Get("base_address").IsAnchor())
assert.False(t, self.Get("office_address").IsAnchor())
}
func TestYAMLMix02(t *testing.T) {
file := "testdata/mix_02.yml"
self := loadYAML(t, file)
assert.NotEqual(t, config.NilValue, self)
assert.True(t, self.Get("base_colors").IsAnchor())
assert.False(t, self.Get("theme").IsAnchor())
}

View File

@ -0,0 +1,30 @@
package yamlloader_test
import (
"bytes"
"os"
"testing"
"github.com/databricks/cli/libs/config"
"github.com/databricks/cli/libs/config/yamlloader"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
)
func loadYAML(t *testing.T, path string) config.Value {
input, err := os.ReadFile(path)
require.NoError(t, err)
var ref any
err = yaml.Unmarshal(input, &ref)
require.NoError(t, err)
self, err := yamlloader.LoadYAML(path, bytes.NewBuffer(input))
require.NoError(t, err)
assert.NotNil(t, self)
// Deep-equal the two values to ensure that the loader is producing
assert.EqualValues(t, ref, self.AsAny())
return self
}