PythonMutator: propagate source locations

This commit is contained in:
Gleb Kanterov 2024-09-20 16:36:43 +02:00
parent 667302b61b
commit d0f0012204
No known key found for this signature in database
GPG Key ID: 4D87C640DBD00176
5 changed files with 422 additions and 29 deletions

View File

@ -69,6 +69,12 @@ type PyDABs struct {
// These packages are imported to discover resources, resource generators, and mutators.
// This list can include namespace packages, which causes the import of nested packages.
Import []string `json:"import,omitempty"`
// LoadLocations is a flag to enable loading Python source locations from the PyDABs.
//
// Locations are only supported since PyDABs 0.6.0, and because of that,
// this flag is disabled by default.
LoadLocations bool `json:"load_locations,omitempty"`
}
type (

View File

@ -0,0 +1,163 @@
package python
import (
"encoding/json"
"fmt"
"io"
"path/filepath"
"github.com/databricks/cli/libs/dyn"
)
const generatedFileName = "__generated_by_pydabs__.yml"
// pythonLocations is data structure for efficient location lookup for a given path
type pythonLocations struct {
// descendants referenced by index, e.g. '.foo'
keys map[string]*pythonLocations
// descendants referenced by key, e.g. '[0]'
indexes map[int]*pythonLocations
// location for the current node if it exists
location dyn.Location
// if true, location is present
exists bool
}
type pythonLocationEntry struct {
Path string `json:"path"`
File string `json:"file"`
Line int `json:"line"`
Column int `json:"column"`
}
// mergePythonLocations applies locations from Python mutator into given dyn.Value
//
// The primary use-case is to merge locations.json with output.json, so that any
// validation errors will point to Python source code instead of generated YAML.
func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) {
return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) {
if newLocation, ok := findPythonLocation(locations, path); ok {
var newLocations []dyn.Location
// the first item in the list is the "last" location used for error reporting
newLocations = append(newLocations, newLocation)
for _, location := range value.Locations() {
if filepath.Base(location.File) == generatedFileName {
continue
}
// don't add duplicates if location already exists
if location == newLocation {
continue
}
newLocations = append(newLocations, location)
}
return value.WithLocations(newLocations), nil
} else {
return value, nil
}
})
}
// parsePythonLocations parses locations.json from the Python mutator.
//
// locations file is newline-separated JSON objects with pythonLocationEntry structure.
func parsePythonLocations(input io.Reader) (*pythonLocations, error) {
decoder := json.NewDecoder(input)
locations := newPythonLocations()
for decoder.More() {
var entry pythonLocationEntry
err := decoder.Decode(&entry)
if err != nil {
return nil, fmt.Errorf("failed to parse python location: %s", err)
}
path, err := dyn.NewPathFromString(entry.Path)
if err != nil {
return nil, fmt.Errorf("failed to parse python location: %s", err)
}
location := dyn.Location{
File: entry.File,
Line: entry.Line,
Column: entry.Column,
}
putPythonLocation(locations, path, location)
}
return locations, nil
}
// putPythonLocation puts the location to the trie for the given path
func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) {
var currentNode = trie
for _, component := range path {
if key := component.Key(); key != "" {
if _, ok := currentNode.keys[key]; !ok {
currentNode.keys[key] = newPythonLocations()
}
currentNode = currentNode.keys[key]
} else {
index := component.Index()
if _, ok := currentNode.indexes[index]; !ok {
currentNode.indexes[index] = newPythonLocations()
}
currentNode = currentNode.indexes[index]
}
}
currentNode.location = location
currentNode.exists = true
}
// newPythonLocations creates a new trie node
func newPythonLocations() *pythonLocations {
return &pythonLocations{
keys: make(map[string]*pythonLocations),
indexes: make(map[int]*pythonLocations),
}
}
// findPythonLocation finds the location or closest ancestor location in the trie for the given path
// if no ancestor or exact location is found, false is returned.
func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) {
var currentNode = locations
var lastLocation = locations.location
var exists = locations.exists
for _, component := range path {
if key := component.Key(); key != "" {
if _, ok := currentNode.keys[key]; !ok {
break
}
currentNode = currentNode.keys[key]
} else {
index := component.Index()
if _, ok := currentNode.indexes[index]; !ok {
break
}
currentNode = currentNode.indexes[index]
}
if currentNode.exists {
lastLocation = currentNode.location
exists = true
}
}
return lastLocation, exists
}

View File

@ -0,0 +1,127 @@
package python
import (
"bytes"
"testing"
"github.com/databricks/cli/libs/dyn"
assert "github.com/databricks/cli/libs/dyn/dynassert"
)
func TestMergeLocations(t *testing.T) {
pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1}
generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1}
yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation)
input := dyn.NewValue(
map[string]dyn.Value{
"foo": dyn.NewValue(
map[string]dyn.Value{
"bar": dyn.NewValue("baz", []dyn.Location{yamlLocation, pythonLocation}),
"baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}),
"qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}),
},
[]dyn.Location{},
),
"bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}),
},
[]dyn.Location{yamlLocation},
)
expected := dyn.NewValue(
map[string]dyn.Value{
"foo": dyn.NewValue(
map[string]dyn.Value{
// pythonLocation was present before, we don't duplicate it, only move it to the beginning
"bar": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
// pythonLocation is appended to the beginning of the list if absent
"baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
// generatedLocation is replaced by pythonLocation
"qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
},
[]dyn.Location{pythonLocation},
),
// if location is unknown, we keep it as-is
"bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}),
},
[]dyn.Location{yamlLocation},
)
actual, err := mergePythonLocations(input, locations)
assert.NoError(t, err)
assert.Equal(t, expected, actual)
}
func TestFindLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar"))
assert.True(t, exists)
assert.Equal(t, location1, actual)
}
func TestFindLocation_indexPathComponent(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2)
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"))
assert.True(t, exists)
assert.Equal(t, location2, actual)
}
func TestFindLocation_closestAncestorLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz"))
assert.True(t, exists)
assert.Equal(t, location1, actual)
}
func TestFindLocation_unknownLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
_, exists := findPythonLocation(locations, dyn.MustPathFromString("bar"))
assert.False(t, exists)
}
func TestParsePythonLocations(t *testing.T) {
expected := dyn.Location{File: "foo.py", Line: 1, Column: 2}
input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}`
reader := bytes.NewReader([]byte(input))
locations, err := parsePythonLocations(reader)
assert.NoError(t, err)
assert.True(t, locations.keys["foo"].exists)
assert.Equal(t, expected, locations.keys["foo"].location)
}

View File

@ -12,6 +12,8 @@ import (
"reflect"
"strings"
"github.com/databricks/cli/bundle/config/mutator/paths"
"github.com/databricks/databricks-sdk-go/logger"
"github.com/fatih/color"
@ -194,7 +196,12 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
}
rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.BundleRootPath, pythonPath, leftRoot)
rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{
cacheDir: cacheDir,
rootPath: b.BundleRootPath,
pythonPath: pythonPath,
loadLocations: experimental.PyDABs.LoadLocations,
})
mutateDiags = diags
if diags.HasError() {
return dyn.InvalidValue, mutateDiagsHasError
@ -238,13 +245,21 @@ func createCacheDir(ctx context.Context) (string, error) {
return os.MkdirTemp("", "-python")
}
func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) {
inputPath := filepath.Join(cacheDir, "input.json")
outputPath := filepath.Join(cacheDir, "output.json")
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
type runPythonMutatorOpts struct {
cacheDir string
rootPath string
pythonPath string
loadLocations bool
}
func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) {
inputPath := filepath.Join(opts.cacheDir, "input.json")
outputPath := filepath.Join(opts.cacheDir, "output.json")
diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json")
locationsPath := filepath.Join(opts.cacheDir, "locations.json")
args := []string{
pythonPath,
opts.pythonPath,
"-m",
"databricks.bundles.build",
"--phase",
@ -257,6 +272,10 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath
diagnosticsPath,
}
if opts.loadLocations {
args = append(args, "--locations", locationsPath)
}
if err := writeInputFile(inputPath, root); err != nil {
return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err)
}
@ -271,7 +290,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath
_, processErr := process.Background(
ctx,
args,
process.WithDir(rootPath),
process.WithDir(opts.rootPath),
process.WithStderrWriter(stderrWriter),
process.WithStdoutWriter(stdoutWriter),
)
@ -307,7 +326,12 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir, rootPath
return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr)
}
output, outputDiags := loadOutputFile(rootPath, outputPath)
locations, err := loadLocationsFile(locationsPath)
if err != nil {
return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err)
}
output, outputDiags := loadOutputFile(opts.rootPath, outputPath, locations)
pythonDiagnostics = pythonDiagnostics.Extend(outputDiags)
// we pass through pythonDiagnostic because it contains warnings
@ -351,7 +375,23 @@ func writeInputFile(inputPath string, input dyn.Value) error {
return os.WriteFile(inputPath, rootConfigJson, 0o600)
}
func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) {
// loadLocationsFile loads locations.json containing source locations for generated YAML.
func loadLocationsFile(locationsPath string) (*pythonLocations, error) {
if _, err := os.Stat(locationsPath); os.IsNotExist(err) {
return newPythonLocations(), nil
}
locationsFile, err := os.Open(locationsPath)
if err != nil {
return nil, fmt.Errorf("failed to open locations file: %w", err)
}
defer locationsFile.Close()
return parsePythonLocations(locationsFile)
}
func loadOutputFile(rootPath string, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) {
outputFile, err := os.Open(outputPath)
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err))
@ -367,7 +407,7 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) {
// Error: path /var/folders/.../python/dist/*.whl is not contained in bundle root path
//
// for that, we pass virtualPath instead of outputPath as file location
virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_python__.yml"))
virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName))
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err))
}
@ -377,7 +417,25 @@ func loadOutputFile(rootPath, outputPath string) (dyn.Value, diag.Diagnostics) {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err))
}
return strictNormalize(config.Root{}, generated)
// paths are resolved relative to locations of their values, if we change location
// we have to update each path, until we simplify that, we don't update locations
// for such values, so we don't change how paths are resolved
_, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) {
putPythonLocation(locations, p, v.Location())
return v, nil
})
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
}
// generated has dyn.Location as if it comes from generated YAML file
// earlier we loaded locations.json with source locations in Python code
generatedWithLocations, err := mergePythonLocations(generated, locations)
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
}
return strictNormalize(config.Root{}, generatedWithLocations)
}
func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) {

View File

@ -7,7 +7,6 @@ import (
"os"
"os/exec"
"path/filepath"
"reflect"
"runtime"
"testing"
@ -93,6 +92,8 @@ func TestPythonMutator_loadResources(t *testing.T) {
}
}`,
`{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`,
`{"path": "resources.jobs.job0", "file": "src/examples/job0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job1", "file": "src/examples/job1.py", "line": 5, "column": 7}`,
)
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
@ -110,6 +111,25 @@ func TestPythonMutator_loadResources(t *testing.T) {
assert.Equal(t, "job_1", job1.Name)
}
// output of locations.json should be applied to underlying dyn.Value
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
name1, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job1.name"))
if err != nil {
return dyn.InvalidValue, err
}
assert.Equal(t, []dyn.Location{
{
File: "src/examples/job1.py",
Line: 5,
Column: 7,
},
}, name1.Locations())
return v, nil
})
assert.NoError(t, err)
assert.Equal(t, 1, len(diags))
assert.Equal(t, "job doesn't have any tasks", diags[0].Summary)
assert.Equal(t, []dyn.Location{
@ -157,7 +177,7 @@ func TestPythonMutator_loadResources_disallowed(t *testing.T) {
}
}
}
}`, "")
}`, "", "")
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
diag := bundle.Apply(ctx, b, mutator)
@ -202,7 +222,7 @@ func TestPythonMutator_applyMutators(t *testing.T) {
}
}
}
}`, "")
}`, "", "")
mutator := PythonMutator(PythonMutatorPhaseApplyMutators)
diag := bundle.Apply(ctx, b, mutator)
@ -263,7 +283,7 @@ func TestPythonMutator_badOutput(t *testing.T) {
}
}
}
}`, "")
}`, "", "")
mutator := PythonMutator(PythonMutatorPhaseLoadResources)
diag := bundle.Apply(ctx, b, mutator)
@ -661,7 +681,7 @@ or activate the environment before running CLI commands:
assert.Equal(t, expected, out)
}
func withProcessStub(t *testing.T, args []string, output, diagnostics string) context.Context {
func withProcessStub(t *testing.T, args []string, output string, diagnostics string, locations string) context.Context {
ctx := context.Background()
ctx, stub := process.WithStub(ctx)
@ -673,33 +693,52 @@ func withProcessStub(t *testing.T, args []string, output, diagnostics string) co
inputPath := filepath.Join(cacheDir, "input.json")
outputPath := filepath.Join(cacheDir, "output.json")
locationsPath := filepath.Join(cacheDir, "locations.json")
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
args = append(args, "--input", inputPath)
args = append(args, "--output", outputPath)
args = append(args, "--diagnostics", diagnosticsPath)
stub.WithCallback(func(actual *exec.Cmd) error {
_, err := os.Stat(inputPath)
assert.NoError(t, err)
if reflect.DeepEqual(actual.Args, args) {
err := os.WriteFile(outputPath, []byte(output), 0o600)
require.NoError(t, err)
actualInputPath := getArg(actual.Args, "--input")
actualOutputPath := getArg(actual.Args, "--output")
actualDiagnosticsPath := getArg(actual.Args, "--diagnostics")
actualLocationsPath := getArg(actual.Args, "--locations")
err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600)
require.NoError(t, err)
require.Equal(t, inputPath, actualInputPath)
require.Equal(t, outputPath, actualOutputPath)
require.Equal(t, diagnosticsPath, actualDiagnosticsPath)
return nil
} else {
return fmt.Errorf("unexpected command: %v", actual.Args)
// locations is an optional argument
if locations != "" {
require.Equal(t, locationsPath, actualLocationsPath)
err = os.WriteFile(locationsPath, []byte(locations), 0o600)
require.NoError(t, err)
}
err = os.WriteFile(outputPath, []byte(output), 0o600)
require.NoError(t, err)
err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0o600)
require.NoError(t, err)
return nil
})
return ctx
}
func loadYaml(name, content string) *bundle.Bundle {
func getArg(args []string, name string) string {
for i := 0; i < len(args); i++ {
if args[i] == name {
return args[i+1]
}
}
return ""
}
func loadYaml(name string, content string) *bundle.Bundle {
v, diag := config.LoadFromBytes(name, []byte(content))
if diag.Error() != nil {