mirror of https://github.com/databricks/cli.git
PythonMutator: propagate source locations (#1783)
## Changes Add a mechanism to load Python source locations in the Python mutator. Previously, locations pointed to generated YAML. Now, they point to Python sources instead. Python process outputs "locations.json" containing locations of bundle paths, examples: ```json {"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5} {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5} {"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7} ``` Such locations form a tree, and we assign locations of the closest ancestor to each `dyn.Value` based on its path. For example, `resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5` and `resources.jobs.job_0.tasks[0].email_notifications` is located at `job_0.py:3:5`, because we use the location of the job as the most precise approximation. This feature is only enabled if `experimental/python` is used. Note: for now, we don't update locations with relative paths, because it has a side effect in changing how these paths are resolved ## Example ``` % databricks bundle validate Warning: job_cluster_key abc is not defined at resources.jobs.examples.tasks[0].job_cluster_key in resources/example.py:10:1 ``` ## Tests Unit tests and manually
This commit is contained in:
parent
54a470837c
commit
3d91691f25
|
@ -9,6 +9,7 @@ import (
|
||||||
"github.com/databricks/cli/libs/dyn"
|
"github.com/databricks/cli/libs/dyn"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// pythonDiagnostic is a single entry in diagnostics.json
|
||||||
type pythonDiagnostic struct {
|
type pythonDiagnostic struct {
|
||||||
Severity pythonSeverity `json:"severity"`
|
Severity pythonSeverity `json:"severity"`
|
||||||
Summary string `json:"summary"`
|
Summary string `json:"summary"`
|
||||||
|
|
|
@ -0,0 +1,194 @@
|
||||||
|
package python
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/databricks/cli/libs/dyn"
|
||||||
|
)
|
||||||
|
|
||||||
|
// generatedFileName is used as the virtual file name for YAML generated by Python code.
|
||||||
|
//
|
||||||
|
// mergePythonLocations replaces dyn.Location with generatedFileName with locations loaded
|
||||||
|
// from locations.json
|
||||||
|
const generatedFileName = "__generated_by_python__.yml"
|
||||||
|
|
||||||
|
// pythonLocations is data structure for efficient location lookup for a given path
|
||||||
|
//
|
||||||
|
// Locations form a tree, and we assign locations of the closest ancestor to each dyn.Value based on its path.
|
||||||
|
// We implement it as a trie (prefix tree) where keys are components of the path. With that, lookups are O(n)
|
||||||
|
// where n is the number of components in the path.
|
||||||
|
//
|
||||||
|
// For example, with locations.json:
|
||||||
|
//
|
||||||
|
// {"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
|
||||||
|
// {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
|
||||||
|
// {"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
|
||||||
|
//
|
||||||
|
// - resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5
|
||||||
|
//
|
||||||
|
// - resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5,
|
||||||
|
// because we use the location of the job as the most precise approximation.
|
||||||
|
//
|
||||||
|
// See pythonLocationEntry for the structure of a single entry in locations.json
|
||||||
|
type pythonLocations struct {
|
||||||
|
// descendants referenced by index, e.g. '.foo'
|
||||||
|
keys map[string]*pythonLocations
|
||||||
|
|
||||||
|
// descendants referenced by key, e.g. '[0]'
|
||||||
|
indexes map[int]*pythonLocations
|
||||||
|
|
||||||
|
// location for the current node if it exists
|
||||||
|
location dyn.Location
|
||||||
|
|
||||||
|
// if true, location is present
|
||||||
|
exists bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// pythonLocationEntry is a single entry in locations.json
|
||||||
|
type pythonLocationEntry struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
File string `json:"file"`
|
||||||
|
Line int `json:"line"`
|
||||||
|
Column int `json:"column"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// mergePythonLocations applies locations from Python mutator into given dyn.Value
|
||||||
|
//
|
||||||
|
// The primary use-case is to merge locations.json with output.json, so that any
|
||||||
|
// validation errors will point to Python source code instead of generated YAML.
|
||||||
|
func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) {
|
||||||
|
return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) {
|
||||||
|
newLocation, ok := findPythonLocation(locations, path)
|
||||||
|
if !ok {
|
||||||
|
return value, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// The first item in the list is the "last" location used for error reporting
|
||||||
|
//
|
||||||
|
// Loaded YAML uses virtual file path as location, we remove any of such references,
|
||||||
|
// because they should use 'newLocation' instead.
|
||||||
|
//
|
||||||
|
// We preserve any previous non-virtual locations in case when Python function modified
|
||||||
|
// resource defined in YAML.
|
||||||
|
newLocations := append(
|
||||||
|
[]dyn.Location{newLocation},
|
||||||
|
removeVirtualLocations(value.Locations())...,
|
||||||
|
)
|
||||||
|
|
||||||
|
return value.WithLocations(newLocations), nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func removeVirtualLocations(locations []dyn.Location) []dyn.Location {
|
||||||
|
var newLocations []dyn.Location
|
||||||
|
|
||||||
|
for _, location := range locations {
|
||||||
|
if filepath.Base(location.File) == generatedFileName {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
newLocations = append(newLocations, location)
|
||||||
|
}
|
||||||
|
|
||||||
|
return newLocations
|
||||||
|
}
|
||||||
|
|
||||||
|
// parsePythonLocations parses locations.json from the Python mutator.
|
||||||
|
//
|
||||||
|
// locations file is newline-separated JSON objects with pythonLocationEntry structure.
|
||||||
|
func parsePythonLocations(input io.Reader) (*pythonLocations, error) {
|
||||||
|
decoder := json.NewDecoder(input)
|
||||||
|
locations := newPythonLocations()
|
||||||
|
|
||||||
|
for decoder.More() {
|
||||||
|
var entry pythonLocationEntry
|
||||||
|
|
||||||
|
err := decoder.Decode(&entry)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse python location: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
path, err := dyn.NewPathFromString(entry.Path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse python location: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
location := dyn.Location{
|
||||||
|
File: entry.File,
|
||||||
|
Line: entry.Line,
|
||||||
|
Column: entry.Column,
|
||||||
|
}
|
||||||
|
|
||||||
|
putPythonLocation(locations, path, location)
|
||||||
|
}
|
||||||
|
|
||||||
|
return locations, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// putPythonLocation puts the location to the trie for the given path
|
||||||
|
func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) {
|
||||||
|
currentNode := trie
|
||||||
|
|
||||||
|
for _, component := range path {
|
||||||
|
if key := component.Key(); key != "" {
|
||||||
|
if _, ok := currentNode.keys[key]; !ok {
|
||||||
|
currentNode.keys[key] = newPythonLocations()
|
||||||
|
}
|
||||||
|
|
||||||
|
currentNode = currentNode.keys[key]
|
||||||
|
} else {
|
||||||
|
index := component.Index()
|
||||||
|
if _, ok := currentNode.indexes[index]; !ok {
|
||||||
|
currentNode.indexes[index] = newPythonLocations()
|
||||||
|
}
|
||||||
|
|
||||||
|
currentNode = currentNode.indexes[index]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
currentNode.location = location
|
||||||
|
currentNode.exists = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// newPythonLocations creates a new trie node
|
||||||
|
func newPythonLocations() *pythonLocations {
|
||||||
|
return &pythonLocations{
|
||||||
|
keys: make(map[string]*pythonLocations),
|
||||||
|
indexes: make(map[int]*pythonLocations),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// findPythonLocation finds the location or closest ancestor location in the trie for the given path
|
||||||
|
// if no ancestor or exact location is found, false is returned.
|
||||||
|
func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) {
|
||||||
|
currentNode := locations
|
||||||
|
lastLocation := locations.location
|
||||||
|
exists := locations.exists
|
||||||
|
|
||||||
|
for _, component := range path {
|
||||||
|
if key := component.Key(); key != "" {
|
||||||
|
if _, ok := currentNode.keys[key]; !ok {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
currentNode = currentNode.keys[key]
|
||||||
|
} else {
|
||||||
|
index := component.Index()
|
||||||
|
if _, ok := currentNode.indexes[index]; !ok {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
currentNode = currentNode.indexes[index]
|
||||||
|
}
|
||||||
|
|
||||||
|
if currentNode.exists {
|
||||||
|
lastLocation = currentNode.location
|
||||||
|
exists = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return lastLocation, exists
|
||||||
|
}
|
|
@ -0,0 +1,179 @@
|
||||||
|
package python
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/databricks/cli/libs/diag"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/databricks/cli/libs/dyn"
|
||||||
|
assert "github.com/databricks/cli/libs/dyn/dynassert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestMergeLocations(t *testing.T) {
|
||||||
|
pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1}
|
||||||
|
generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1}
|
||||||
|
yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1}
|
||||||
|
|
||||||
|
locations := newPythonLocations()
|
||||||
|
putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation)
|
||||||
|
|
||||||
|
input := dyn.NewValue(
|
||||||
|
map[string]dyn.Value{
|
||||||
|
"foo": dyn.NewValue(
|
||||||
|
map[string]dyn.Value{
|
||||||
|
"baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}),
|
||||||
|
"qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}),
|
||||||
|
},
|
||||||
|
[]dyn.Location{},
|
||||||
|
),
|
||||||
|
"bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}),
|
||||||
|
},
|
||||||
|
[]dyn.Location{yamlLocation},
|
||||||
|
)
|
||||||
|
|
||||||
|
expected := dyn.NewValue(
|
||||||
|
map[string]dyn.Value{
|
||||||
|
"foo": dyn.NewValue(
|
||||||
|
map[string]dyn.Value{
|
||||||
|
// pythonLocation is appended to the beginning of the list if absent
|
||||||
|
"baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
|
||||||
|
// generatedLocation is replaced by pythonLocation
|
||||||
|
"qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
|
||||||
|
},
|
||||||
|
[]dyn.Location{pythonLocation},
|
||||||
|
),
|
||||||
|
// if location is unknown, we keep it as-is
|
||||||
|
"bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}),
|
||||||
|
},
|
||||||
|
[]dyn.Location{yamlLocation},
|
||||||
|
)
|
||||||
|
|
||||||
|
actual, err := mergePythonLocations(input, locations)
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, expected, actual)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindLocation(t *testing.T) {
|
||||||
|
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
|
||||||
|
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
|
||||||
|
|
||||||
|
locations := newPythonLocations()
|
||||||
|
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
|
||||||
|
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
|
||||||
|
|
||||||
|
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar"))
|
||||||
|
|
||||||
|
assert.True(t, exists)
|
||||||
|
assert.Equal(t, location1, actual)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindLocation_indexPathComponent(t *testing.T) {
|
||||||
|
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
|
||||||
|
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
|
||||||
|
location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1}
|
||||||
|
|
||||||
|
locations := newPythonLocations()
|
||||||
|
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
|
||||||
|
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
|
||||||
|
putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2)
|
||||||
|
|
||||||
|
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"))
|
||||||
|
|
||||||
|
assert.True(t, exists)
|
||||||
|
assert.Equal(t, location2, actual)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindLocation_closestAncestorLocation(t *testing.T) {
|
||||||
|
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
|
||||||
|
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
|
||||||
|
|
||||||
|
locations := newPythonLocations()
|
||||||
|
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
|
||||||
|
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
|
||||||
|
|
||||||
|
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz"))
|
||||||
|
|
||||||
|
assert.True(t, exists)
|
||||||
|
assert.Equal(t, location1, actual)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindLocation_unknownLocation(t *testing.T) {
|
||||||
|
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
|
||||||
|
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
|
||||||
|
|
||||||
|
locations := newPythonLocations()
|
||||||
|
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
|
||||||
|
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
|
||||||
|
|
||||||
|
_, exists := findPythonLocation(locations, dyn.MustPathFromString("bar"))
|
||||||
|
|
||||||
|
assert.False(t, exists)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadOutput(t *testing.T) {
|
||||||
|
location := dyn.Location{File: "my_job.py", Line: 1, Column: 1}
|
||||||
|
bundleRoot := t.TempDir()
|
||||||
|
output := `{
|
||||||
|
"resources": {
|
||||||
|
"jobs": {
|
||||||
|
"my_job": {
|
||||||
|
"name": "my_job",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"task_key": "my_task",
|
||||||
|
"notebook_task": {
|
||||||
|
"notebook_path": "my_notebook"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}`
|
||||||
|
|
||||||
|
locations := newPythonLocations()
|
||||||
|
putPythonLocation(
|
||||||
|
locations,
|
||||||
|
dyn.MustPathFromString("resources.jobs.my_job"),
|
||||||
|
location,
|
||||||
|
)
|
||||||
|
|
||||||
|
value, diags := loadOutput(
|
||||||
|
bundleRoot,
|
||||||
|
bytes.NewReader([]byte(output)),
|
||||||
|
locations,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert.Equal(t, diag.Diagnostics{}, diags)
|
||||||
|
|
||||||
|
name, err := dyn.Get(value, "resources.jobs.my_job.name")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, []dyn.Location{location}, name.Locations())
|
||||||
|
|
||||||
|
// until we implement path normalization, we have to keep locations of values
|
||||||
|
// that change semantic depending on their location
|
||||||
|
//
|
||||||
|
// note: it's important to have absolute path including 'bundleRoot'
|
||||||
|
// because mutator pipeline already has expanded locations into absolute path
|
||||||
|
notebookPath, err := dyn.Get(value, "resources.jobs.my_job.tasks[0].notebook_task.notebook_path")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, notebookPath.Locations(), 1)
|
||||||
|
require.Equal(t, filepath.Join(bundleRoot, generatedFileName), notebookPath.Locations()[0].File)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParsePythonLocations(t *testing.T) {
|
||||||
|
expected := dyn.Location{File: "foo.py", Line: 1, Column: 2}
|
||||||
|
|
||||||
|
input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}`
|
||||||
|
reader := bytes.NewReader([]byte(input))
|
||||||
|
locations, err := parsePythonLocations(reader)
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
assert.True(t, locations.keys["foo"].exists)
|
||||||
|
assert.Equal(t, expected, locations.keys["foo"].location)
|
||||||
|
}
|
|
@ -7,11 +7,14 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"io/fs"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/databricks/cli/bundle/config/mutator/paths"
|
||||||
|
|
||||||
"github.com/databricks/databricks-sdk-go/logger"
|
"github.com/databricks/databricks-sdk-go/logger"
|
||||||
"github.com/fatih/color"
|
"github.com/fatih/color"
|
||||||
|
|
||||||
|
@ -124,6 +127,15 @@ type opts struct {
|
||||||
enabled bool
|
enabled bool
|
||||||
|
|
||||||
venvPath string
|
venvPath string
|
||||||
|
|
||||||
|
loadLocations bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type runPythonMutatorOpts struct {
|
||||||
|
cacheDir string
|
||||||
|
bundleRootPath string
|
||||||
|
pythonPath string
|
||||||
|
loadLocations bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// getOpts adapts deprecated PyDABs and upcoming Python configuration
|
// getOpts adapts deprecated PyDABs and upcoming Python configuration
|
||||||
|
@ -150,6 +162,7 @@ func getOpts(b *bundle.Bundle, phase phase) (opts, error) {
|
||||||
return opts{
|
return opts{
|
||||||
enabled: true,
|
enabled: true,
|
||||||
venvPath: experimental.PyDABs.VEnvPath,
|
venvPath: experimental.PyDABs.VEnvPath,
|
||||||
|
loadLocations: false, // not supported in PyDABs
|
||||||
}, nil
|
}, nil
|
||||||
} else {
|
} else {
|
||||||
return opts{}, nil
|
return opts{}, nil
|
||||||
|
@ -160,6 +173,7 @@ func getOpts(b *bundle.Bundle, phase phase) (opts, error) {
|
||||||
return opts{
|
return opts{
|
||||||
enabled: true,
|
enabled: true,
|
||||||
venvPath: experimental.Python.VEnvPath,
|
venvPath: experimental.Python.VEnvPath,
|
||||||
|
loadLocations: true,
|
||||||
}, nil
|
}, nil
|
||||||
} else {
|
} else {
|
||||||
return opts{}, nil
|
return opts{}, nil
|
||||||
|
@ -194,7 +208,12 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
|
||||||
return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
|
return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.BundleRootPath, pythonPath, leftRoot)
|
rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{
|
||||||
|
cacheDir: cacheDir,
|
||||||
|
bundleRootPath: b.BundleRootPath,
|
||||||
|
pythonPath: pythonPath,
|
||||||
|
loadLocations: opts.loadLocations,
|
||||||
|
})
|
||||||
mutateDiags = diags
|
mutateDiags = diags
|
||||||
if diags.HasError() {
|
if diags.HasError() {
|
||||||
return dyn.InvalidValue, mutateDiagsHasError
|
return dyn.InvalidValue, mutateDiagsHasError
|
||||||
|
@ -238,13 +257,14 @@ func createCacheDir(ctx context.Context) (string, error) {
|
||||||
return os.MkdirTemp("", "-python")
|
return os.MkdirTemp("", "-python")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) {
|
func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) {
|
||||||
inputPath := filepath.Join(cacheDir, "input.json")
|
inputPath := filepath.Join(opts.cacheDir, "input.json")
|
||||||
outputPath := filepath.Join(cacheDir, "output.json")
|
outputPath := filepath.Join(opts.cacheDir, "output.json")
|
||||||
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
|
diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json")
|
||||||
|
locationsPath := filepath.Join(opts.cacheDir, "locations.json")
|
||||||
|
|
||||||
args := []string{
|
args := []string{
|
||||||
pythonPath,
|
opts.pythonPath,
|
||||||
"-m",
|
"-m",
|
||||||
"databricks.bundles.build",
|
"databricks.bundles.build",
|
||||||
"--phase",
|
"--phase",
|
||||||
|
@ -257,6 +277,10 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath
|
||||||
diagnosticsPath,
|
diagnosticsPath,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if opts.loadLocations {
|
||||||
|
args = append(args, "--locations", locationsPath)
|
||||||
|
}
|
||||||
|
|
||||||
if err := writeInputFile(inputPath, root); err != nil {
|
if err := writeInputFile(inputPath, root); err != nil {
|
||||||
return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err)
|
return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err)
|
||||||
}
|
}
|
||||||
|
@ -271,7 +295,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath
|
||||||
_, processErr := process.Background(
|
_, processErr := process.Background(
|
||||||
ctx,
|
ctx,
|
||||||
args,
|
args,
|
||||||
process.WithDir(rootPath),
|
process.WithDir(opts.bundleRootPath),
|
||||||
process.WithStderrWriter(stderrWriter),
|
process.WithStderrWriter(stderrWriter),
|
||||||
process.WithStdoutWriter(stdoutWriter),
|
process.WithStdoutWriter(stdoutWriter),
|
||||||
)
|
)
|
||||||
|
@ -307,7 +331,12 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath
|
||||||
return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr)
|
return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
output, outputDiags := loadOutputFile(rootPath, outputPath)
|
locations, err := loadLocationsFile(locationsPath)
|
||||||
|
if err != nil {
|
||||||
|
return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
output, outputDiags := loadOutputFile(opts.bundleRootPath, outputPath, locations)
|
||||||
pythonDiagnostics = pythonDiagnostics.Extend(outputDiags)
|
pythonDiagnostics = pythonDiagnostics.Extend(outputDiags)
|
||||||
|
|
||||||
// we pass through pythonDiagnostic because it contains warnings
|
// we pass through pythonDiagnostic because it contains warnings
|
||||||
|
@ -351,7 +380,21 @@ func writeInputFile(inputPath string, input dyn.Value) error {
|
||||||
return os.WriteFile(inputPath, rootConfigJson, 0o600)
|
return os.WriteFile(inputPath, rootConfigJson, 0o600)
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) {
|
// loadLocationsFile loads locations.json containing source locations for generated YAML.
|
||||||
|
func loadLocationsFile(locationsPath string) (*pythonLocations, error) {
|
||||||
|
locationsFile, err := os.Open(locationsPath)
|
||||||
|
if errors.Is(err, fs.ErrNotExist) {
|
||||||
|
return newPythonLocations(), nil
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to open locations file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
defer locationsFile.Close()
|
||||||
|
|
||||||
|
return parsePythonLocations(locationsFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadOutputFile(rootPath, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) {
|
||||||
outputFile, err := os.Open(outputPath)
|
outputFile, err := os.Open(outputPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err))
|
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err))
|
||||||
|
@ -359,15 +402,19 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) {
|
||||||
|
|
||||||
defer outputFile.Close()
|
defer outputFile.Close()
|
||||||
|
|
||||||
|
return loadOutput(rootPath, outputFile, locations)
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadOutput(rootPath string, outputFile io.Reader, locations *pythonLocations) (dyn.Value, diag.Diagnostics) {
|
||||||
// we need absolute path because later parts of pipeline assume all paths are absolute
|
// we need absolute path because later parts of pipeline assume all paths are absolute
|
||||||
// and this file will be used as location to resolve relative paths.
|
// and this file will be used as location to resolve relative paths.
|
||||||
//
|
//
|
||||||
// virtualPath has to stay in rootPath, because locations outside root path are not allowed:
|
// virtualPath has to stay in bundleRootPath, because locations outside root path are not allowed:
|
||||||
//
|
//
|
||||||
// Error: path /var/folders/.../python/dist/*.whl is not contained in bundle root path
|
// Error: path /var/folders/.../python/dist/*.whl is not contained in bundle root path
|
||||||
//
|
//
|
||||||
// for that, we pass virtualPath instead of outputPath as file location
|
// for that, we pass virtualPath instead of outputPath as file location
|
||||||
virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_python__.yml"))
|
virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err))
|
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err))
|
||||||
}
|
}
|
||||||
|
@ -377,7 +424,29 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) {
|
||||||
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err))
|
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
return strictNormalize(config.Root{}, generated)
|
// paths are resolved relative to locations of their values, if we change location
|
||||||
|
// we have to update each path, until we simplify that, we don't update locations
|
||||||
|
// for such values, so we don't change how paths are resolved
|
||||||
|
//
|
||||||
|
// we can remove this once we:
|
||||||
|
// - add variable interpolation before and after PythonMutator
|
||||||
|
// - implement path normalization (aka path normal form)
|
||||||
|
_, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) {
|
||||||
|
putPythonLocation(locations, p, v.Location())
|
||||||
|
return v, nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
// generated has dyn.Location as if it comes from generated YAML file
|
||||||
|
// earlier we loaded locations.json with source locations in Python code
|
||||||
|
generatedWithLocations, err := mergePythonLocations(generated, locations)
|
||||||
|
if err != nil {
|
||||||
|
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
return strictNormalize(config.Root{}, generatedWithLocations)
|
||||||
}
|
}
|
||||||
|
|
||||||
func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) {
|
func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) {
|
||||||
|
|
|
@ -7,7 +7,6 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
|
||||||
"runtime"
|
"runtime"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
@ -93,6 +92,8 @@ func TestPythonMutator_loadResources(t *testing.T) {
|
||||||
}
|
}
|
||||||
}`,
|
}`,
|
||||||
`{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`,
|
`{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`,
|
||||||
|
`{"path": "resources.jobs.job0", "file": "src/examples/job0.py", "line": 3, "column": 5}
|
||||||
|
{"path": "resources.jobs.job1", "file": "src/examples/job1.py", "line": 5, "column": 7}`,
|
||||||
)
|
)
|
||||||
|
|
||||||
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
|
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
|
||||||
|
@ -110,6 +111,25 @@ func TestPythonMutator_loadResources(t *testing.T) {
|
||||||
assert.Equal(t, "job_1", job1.Name)
|
assert.Equal(t, "job_1", job1.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// output of locations.json should be applied to underlying dyn.Value
|
||||||
|
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
|
||||||
|
name1, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job1.name"))
|
||||||
|
if err != nil {
|
||||||
|
return dyn.InvalidValue, err
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, []dyn.Location{
|
||||||
|
{
|
||||||
|
File: "src/examples/job1.py",
|
||||||
|
Line: 5,
|
||||||
|
Column: 7,
|
||||||
|
},
|
||||||
|
}, name1.Locations())
|
||||||
|
|
||||||
|
return v, nil
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
assert.Equal(t, 1, len(diags))
|
assert.Equal(t, 1, len(diags))
|
||||||
assert.Equal(t, "job doesn't have any tasks", diags[0].Summary)
|
assert.Equal(t, "job doesn't have any tasks", diags[0].Summary)
|
||||||
assert.Equal(t, []dyn.Location{
|
assert.Equal(t, []dyn.Location{
|
||||||
|
@ -157,7 +177,7 @@ func TestPythonMutator_loadResources_disallowed(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}`, "")
|
}`, "", "")
|
||||||
|
|
||||||
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
|
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
|
||||||
diag := bundle.Apply(ctx, b, mutator)
|
diag := bundle.Apply(ctx, b, mutator)
|
||||||
|
@ -202,7 +222,7 @@ func TestPythonMutator_applyMutators(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}`, "")
|
}`, "", "")
|
||||||
|
|
||||||
mutator := PythonMutator(PythonMutatorPhaseApplyMutators)
|
mutator := PythonMutator(PythonMutatorPhaseApplyMutators)
|
||||||
diag := bundle.Apply(ctx, b, mutator)
|
diag := bundle.Apply(ctx, b, mutator)
|
||||||
|
@ -224,7 +244,7 @@ func TestPythonMutator_applyMutators(t *testing.T) {
|
||||||
description, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job0.description"))
|
description, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job0.description"))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
expectedVirtualPath, err := filepath.Abs("__generated_by_python__.yml")
|
expectedVirtualPath, err := filepath.Abs(generatedFileName)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, expectedVirtualPath, description.Location().File)
|
assert.Equal(t, expectedVirtualPath, description.Location().File)
|
||||||
|
|
||||||
|
@ -263,7 +283,7 @@ func TestPythonMutator_badOutput(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}`, "")
|
}`, "", "")
|
||||||
|
|
||||||
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
|
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
|
||||||
diag := bundle.Apply(ctx, b, mutator)
|
diag := bundle.Apply(ctx, b, mutator)
|
||||||
|
@ -312,7 +332,7 @@ func TestGetOps_Python(t *testing.T) {
|
||||||
}, PythonMutatorPhaseLoadResources)
|
}, PythonMutatorPhaseLoadResources)
|
||||||
|
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, opts{venvPath: ".venv", enabled: true}, actual)
|
assert.Equal(t, opts{venvPath: ".venv", enabled: true, loadLocations: true}, actual)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetOps_PyDABs(t *testing.T) {
|
func TestGetOps_PyDABs(t *testing.T) {
|
||||||
|
@ -328,7 +348,7 @@ func TestGetOps_PyDABs(t *testing.T) {
|
||||||
}, PythonMutatorPhaseInit)
|
}, PythonMutatorPhaseInit)
|
||||||
|
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, opts{venvPath: ".venv", enabled: true}, actual)
|
assert.Equal(t, opts{venvPath: ".venv", enabled: true, loadLocations: false}, actual)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetOps_empty(t *testing.T) {
|
func TestGetOps_empty(t *testing.T) {
|
||||||
|
@ -661,7 +681,7 @@ or activate the environment before running CLI commands:
|
||||||
assert.Equal(t, expected, out)
|
assert.Equal(t, expected, out)
|
||||||
}
|
}
|
||||||
|
|
||||||
func withProcessStub(t *testing.T, args []string, output, diagnostics string) context.Context {
|
func withProcessStub(t *testing.T, args []string, output, diagnostics, locations string) context.Context {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
ctx, stub := process.WithStub(ctx)
|
ctx, stub := process.WithStub(ctx)
|
||||||
|
|
||||||
|
@ -673,32 +693,51 @@ func withProcessStub(t *testing.T, args []string, output, diagnostics string) co
|
||||||
|
|
||||||
inputPath := filepath.Join(cacheDir, "input.json")
|
inputPath := filepath.Join(cacheDir, "input.json")
|
||||||
outputPath := filepath.Join(cacheDir, "output.json")
|
outputPath := filepath.Join(cacheDir, "output.json")
|
||||||
|
locationsPath := filepath.Join(cacheDir, "locations.json")
|
||||||
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
|
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
|
||||||
|
|
||||||
args = append(args, "--input", inputPath)
|
|
||||||
args = append(args, "--output", outputPath)
|
|
||||||
args = append(args, "--diagnostics", diagnosticsPath)
|
|
||||||
|
|
||||||
stub.WithCallback(func(actual *exec.Cmd) error {
|
stub.WithCallback(func(actual *exec.Cmd) error {
|
||||||
_, err := os.Stat(inputPath)
|
_, err := os.Stat(inputPath)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
if reflect.DeepEqual(actual.Args, args) {
|
actualInputPath := getArg(actual.Args, "--input")
|
||||||
err := os.WriteFile(outputPath, []byte(output), 0o600)
|
actualOutputPath := getArg(actual.Args, "--output")
|
||||||
|
actualDiagnosticsPath := getArg(actual.Args, "--diagnostics")
|
||||||
|
actualLocationsPath := getArg(actual.Args, "--locations")
|
||||||
|
|
||||||
|
require.Equal(t, inputPath, actualInputPath)
|
||||||
|
require.Equal(t, outputPath, actualOutputPath)
|
||||||
|
require.Equal(t, diagnosticsPath, actualDiagnosticsPath)
|
||||||
|
|
||||||
|
// locations is an optional argument
|
||||||
|
if locations != "" {
|
||||||
|
require.Equal(t, locationsPath, actualLocationsPath)
|
||||||
|
|
||||||
|
err = os.WriteFile(locationsPath, []byte(locations), 0o600)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = os.WriteFile(outputPath, []byte(output), 0o600)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600)
|
err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
} else {
|
|
||||||
return fmt.Errorf("unexpected command: %v", actual.Args)
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
|
|
||||||
return ctx
|
return ctx
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getArg(args []string, name string) string {
|
||||||
|
for i := range args {
|
||||||
|
if args[i] == name {
|
||||||
|
return args[i+1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
func loadYaml(name, content string) *bundle.Bundle {
|
func loadYaml(name, content string) *bundle.Bundle {
|
||||||
v, diag := config.LoadFromBytes(name, []byte(content))
|
v, diag := config.LoadFromBytes(name, []byte(content))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue