Retain location annotation when expanding globs for pipeline libraries (#1274)

## Changes

We now keep location metadata associated with every configuration value.
When expanding globs for pipeline libraries, this annotation was erased
because of the conversion to/from the typed structure. This change
modifies the expansion mutator to work with `dyn.Value` and retain the
location of the value that holds the glob pattern.

## Tests

Unit tests pass.
This commit is contained in:
Pieter Noordhuis 2024-03-11 22:59:36 +01:00 committed by GitHub
parent a44c52a399
commit 4a9a12af19
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 111 additions and 55 deletions

View File

@ -7,7 +7,7 @@ import (
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/libraries"
"github.com/databricks/databricks-sdk-go/service/pipelines"
"github.com/databricks/cli/libs/dyn"
)
type expandPipelineGlobPaths struct{}
@ -16,77 +16,94 @@ func ExpandPipelineGlobPaths() bundle.Mutator {
return &expandPipelineGlobPaths{}
}
func (m *expandPipelineGlobPaths) Apply(_ context.Context, b *bundle.Bundle) error {
for key, pipeline := range b.Config.Resources.Pipelines {
dir, err := pipeline.ConfigFileDirectory()
func (m *expandPipelineGlobPaths) expandLibrary(v dyn.Value) ([]dyn.Value, error) {
// Probe for the path field in the library.
for _, p := range []dyn.Path{
dyn.NewPath(dyn.Key("notebook"), dyn.Key("path")),
dyn.NewPath(dyn.Key("file"), dyn.Key("path")),
} {
pv, err := dyn.GetByPath(v, p)
if dyn.IsNoSuchKeyError(err) {
continue
}
if err != nil {
return fmt.Errorf("unable to determine directory for pipeline %s: %w", key, err)
return nil, err
}
expandedLibraries := make([]pipelines.PipelineLibrary, 0)
for i := 0; i < len(pipeline.Libraries); i++ {
library := &pipeline.Libraries[i]
path := getGlobPatternToExpand(library)
// If the path is empty or not a local path, return the original value.
path := pv.MustString()
if path == "" || !libraries.IsLocalPath(path) {
expandedLibraries = append(expandedLibraries, *library)
continue
return []dyn.Value{v}, nil
}
dir, err := v.Location().Directory()
if err != nil {
return nil, err
}
matches, err := filepath.Glob(filepath.Join(dir, path))
if err != nil {
return err
return nil, err
}
// If there are no matches, return the original value.
if len(matches) == 0 {
expandedLibraries = append(expandedLibraries, *library)
continue
return []dyn.Value{v}, nil
}
// Emit a new value for each match.
var ev []dyn.Value
for _, match := range matches {
m, err := filepath.Rel(dir, match)
if err != nil {
return err
return nil, err
}
expandedLibraries = append(expandedLibraries, cloneWithPath(library, m))
nv, err := dyn.SetByPath(v, p, dyn.NewValue(m, pv.Location()))
if err != nil {
return nil, err
}
}
pipeline.Libraries = expandedLibraries
ev = append(ev, nv)
}
return nil
return ev, nil
}
func getGlobPatternToExpand(library *pipelines.PipelineLibrary) string {
if library.File != nil {
return library.File.Path
// Neither of the library paths were found. This is likely an invalid node,
// but it isn't this mutator's job to enforce that. Return the original value.
return []dyn.Value{v}, nil
}
if library.Notebook != nil {
return library.Notebook.Path
func (m *expandPipelineGlobPaths) expandSequence(p dyn.Path, v dyn.Value) (dyn.Value, error) {
s, ok := v.AsSequence()
if !ok {
return dyn.InvalidValue, fmt.Errorf("expected sequence, got %s", v.Kind())
}
return ""
var vs []dyn.Value
for _, sv := range s {
v, err := m.expandLibrary(sv)
if err != nil {
return dyn.InvalidValue, err
}
func cloneWithPath(library *pipelines.PipelineLibrary, path string) pipelines.PipelineLibrary {
if library.File != nil {
return pipelines.PipelineLibrary{
File: &pipelines.FileLibrary{
Path: path,
},
}
vs = append(vs, v...)
}
if library.Notebook != nil {
return pipelines.PipelineLibrary{
Notebook: &pipelines.NotebookLibrary{
Path: path,
},
}
return dyn.NewValue(vs, v.Location()), nil
}
return pipelines.PipelineLibrary{}
func (m *expandPipelineGlobPaths) Apply(_ context.Context, b *bundle.Bundle) error {
return b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
p := dyn.NewPattern(
dyn.Key("resources"),
dyn.Key("pipelines"),
dyn.AnyKey(),
dyn.Key("libraries"),
)
// Visit each pipeline's "libraries" field and expand any glob patterns.
return dyn.MapByPattern(v, p, m.expandSequence)
})
}
func (*expandPipelineGlobPaths) Name() string {

View File

@ -35,6 +35,10 @@ func TestExpandGlobPathsInPipelines(t *testing.T) {
touchEmptyFile(t, filepath.Join(dir, "test1.py"))
touchEmptyFile(t, filepath.Join(dir, "test/test2.py"))
touchEmptyFile(t, filepath.Join(dir, "test/test3.py"))
touchEmptyFile(t, filepath.Join(dir, "relative/test4.py"))
touchEmptyFile(t, filepath.Join(dir, "relative/test5.py"))
touchEmptyFile(t, filepath.Join(dir, "skip/test6.py"))
touchEmptyFile(t, filepath.Join(dir, "skip/test7.py"))
b := &bundle.Bundle{
Config: config.Root{
@ -54,7 +58,13 @@ func TestExpandGlobPathsInPipelines(t *testing.T) {
},
{
File: &pipelines.FileLibrary{
Path: "./**/*.py",
Path: "./test/*.py",
},
},
{
// This value is annotated to be defined in the "./relative" directory.
File: &pipelines.FileLibrary{
Path: "./*.py",
},
},
{
@ -96,13 +106,14 @@ func TestExpandGlobPathsInPipelines(t *testing.T) {
}
bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml"))
bundletest.SetLocation(b, "resources.pipelines.pipeline.libraries[3]", filepath.Join(dir, "relative", "resource.yml"))
m := ExpandPipelineGlobPaths()
err := bundle.Apply(context.Background(), b, m)
require.NoError(t, err)
libraries := b.Config.Resources.Pipelines["pipeline"].Libraries
require.Len(t, libraries, 11)
require.Len(t, libraries, 13)
// Making sure glob patterns are expanded correctly
require.True(t, containsNotebook(libraries, filepath.Join("test", "test2.ipynb")))
@ -110,6 +121,10 @@ func TestExpandGlobPathsInPipelines(t *testing.T) {
require.True(t, containsFile(libraries, filepath.Join("test", "test2.py")))
require.True(t, containsFile(libraries, filepath.Join("test", "test3.py")))
// These patterns are defined relative to "./relative"
require.True(t, containsFile(libraries, "test4.py"))
require.True(t, containsFile(libraries, "test5.py"))
// Making sure exact file references work as well
require.True(t, containsNotebook(libraries, "test1.ipynb"))

View File

@ -1,6 +1,9 @@
package dyn
import "fmt"
import (
"fmt"
"path/filepath"
)
type Location struct {
File string
@ -11,3 +14,11 @@ type Location struct {
func (l Location) String() string {
return fmt.Sprintf("%s:%d:%d", l.File, l.Line, l.Column)
}
func (l Location) Directory() (string, error) {
if l.File == "" {
return "", fmt.Errorf("no file in location")
}
return filepath.Dir(l.File), nil
}

View File

@ -11,3 +11,16 @@ func TestLocation(t *testing.T) {
loc := dyn.Location{File: "file", Line: 1, Column: 2}
assert.Equal(t, "file:1:2", loc.String())
}
func TestLocationDirectory(t *testing.T) {
loc := dyn.Location{File: "file", Line: 1, Column: 2}
dir, err := loc.Directory()
assert.NoError(t, err)
assert.Equal(t, ".", dir)
}
func TestLocationDirectoryNoFile(t *testing.T) {
loc := dyn.Location{}
_, err := loc.Directory()
assert.Error(t, err)
}