From 343845545926575611781e9df666629e9d4570f2 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Fri, 20 Sep 2024 16:36:43 +0200 Subject: [PATCH] PythonMutator: propagate source locations --- bundle/config/experimental.go | 6 + .../config/mutator/python/python_locations.go | 163 ++++++++++++++++++ .../mutator/python/python_locations_test.go | 127 ++++++++++++++ .../config/mutator/python/python_mutator.go | 80 +++++++-- .../mutator/python/python_mutator_test.go | 78 +++++++-- 5 files changed, 424 insertions(+), 30 deletions(-) create mode 100644 bundle/config/mutator/python/python_locations.go create mode 100644 bundle/config/mutator/python/python_locations_test.go diff --git a/bundle/config/experimental.go b/bundle/config/experimental.go index 061bbdae..bf5e0190 100644 --- a/bundle/config/experimental.go +++ b/bundle/config/experimental.go @@ -45,6 +45,12 @@ type PyDABs struct { // These packages are imported to discover resources, resource generators, and mutators. // This list can include namespace packages, which causes the import of nested packages. Import []string `json:"import,omitempty"` + + // LoadLocations is a flag to enable loading Python source locations from the PyDABs. + // + // Locations are only supported since PyDABs 0.6.0, and because of that, + // this flag is disabled by default. + LoadLocations bool `json:"load_locations,omitempty"` } type Command string diff --git a/bundle/config/mutator/python/python_locations.go b/bundle/config/mutator/python/python_locations.go new file mode 100644 index 00000000..97b7e170 --- /dev/null +++ b/bundle/config/mutator/python/python_locations.go @@ -0,0 +1,163 @@ +package python + +import ( + "encoding/json" + "fmt" + "io" + "path/filepath" + + "github.com/databricks/cli/libs/dyn" +) + +const generatedFileName = "__generated_by_pydabs__.yml" + +// pythonLocations is data structure for efficient location lookup for a given path +type pythonLocations struct { + // descendants referenced by index, e.g. '.foo' + keys map[string]*pythonLocations + + // descendants referenced by key, e.g. '[0]' + indexes map[int]*pythonLocations + + // location for the current node if it exists + location dyn.Location + + // if true, location is present + exists bool +} + +type pythonLocationEntry struct { + Path string `json:"path"` + File string `json:"file"` + Line int `json:"line"` + Column int `json:"column"` +} + +// mergePythonLocations applies locations from Python mutator into given dyn.Value +// +// The primary use-case is to merge locations.json with output.json, so that any +// validation errors will point to Python source code instead of generated YAML. +func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) { + return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) { + if newLocation, ok := findPythonLocation(locations, path); ok { + var newLocations []dyn.Location + + // the first item in the list is the "last" location used for error reporting + newLocations = append(newLocations, newLocation) + + for _, location := range value.Locations() { + if filepath.Base(location.File) == generatedFileName { + continue + } + + // don't add duplicates if location already exists + if location == newLocation { + continue + } + + newLocations = append(newLocations, location) + } + + return value.WithLocations(newLocations), nil + } else { + return value, nil + } + }) +} + +// parsePythonLocations parses locations.json from the Python mutator. +// +// locations file is newline-separated JSON objects with pythonLocationEntry structure. +func parsePythonLocations(input io.Reader) (*pythonLocations, error) { + decoder := json.NewDecoder(input) + locations := newPythonLocations() + + for decoder.More() { + var entry pythonLocationEntry + + err := decoder.Decode(&entry) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + path, err := dyn.NewPathFromString(entry.Path) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + location := dyn.Location{ + File: entry.File, + Line: entry.Line, + Column: entry.Column, + } + + putPythonLocation(locations, path, location) + } + + return locations, nil +} + +// putPythonLocation puts the location to the trie for the given path +func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) { + var currentNode = trie + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + currentNode.keys[key] = newPythonLocations() + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + currentNode.indexes[index] = newPythonLocations() + } + + currentNode = currentNode.indexes[index] + } + } + + currentNode.location = location + currentNode.exists = true +} + +// newPythonLocations creates a new trie node +func newPythonLocations() *pythonLocations { + return &pythonLocations{ + keys: make(map[string]*pythonLocations), + indexes: make(map[int]*pythonLocations), + } +} + +// findPythonLocation finds the location or closest ancestor location in the trie for the given path +// if no ancestor or exact location is found, false is returned. +func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) { + var currentNode = locations + var lastLocation = locations.location + var exists = locations.exists + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + break + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + break + } + + currentNode = currentNode.indexes[index] + } + + if currentNode.exists { + lastLocation = currentNode.location + exists = true + } + } + + return lastLocation, exists +} diff --git a/bundle/config/mutator/python/python_locations_test.go b/bundle/config/mutator/python/python_locations_test.go new file mode 100644 index 00000000..28cd33de --- /dev/null +++ b/bundle/config/mutator/python/python_locations_test.go @@ -0,0 +1,127 @@ +package python + +import ( + "bytes" + "testing" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestMergeLocations(t *testing.T) { + pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1} + generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1} + yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation) + + input := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + "bar": dyn.NewValue("baz", []dyn.Location{yamlLocation, pythonLocation}), + "baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}), + "qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}), + }, + []dyn.Location{}, + ), + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + expected := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + // pythonLocation was present before, we don't duplicate it, only move it to the beginning + "bar": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + // pythonLocation is appended to the beginning of the list if absent + "baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + // generatedLocation is replaced by pythonLocation + "qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + }, + []dyn.Location{pythonLocation}, + ), + // if location is unknown, we keep it as-is + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + actual, err := mergePythonLocations(input, locations) + + assert.NoError(t, err) + assert.Equal(t, expected, actual) +} + +func TestFindLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_indexPathComponent(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]")) + + assert.True(t, exists) + assert.Equal(t, location2, actual) +} + +func TestFindLocation_closestAncestorLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_unknownLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + _, exists := findPythonLocation(locations, dyn.MustPathFromString("bar")) + + assert.False(t, exists) +} + +func TestParsePythonLocations(t *testing.T) { + expected := dyn.Location{File: "foo.py", Line: 1, Column: 2} + + input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}` + reader := bytes.NewReader([]byte(input)) + locations, err := parsePythonLocations(reader) + + assert.NoError(t, err) + + assert.True(t, locations.keys["foo"].exists) + assert.Equal(t, expected, locations.keys["foo"].location) +} diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index da6c4d21..38e23291 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -10,6 +10,8 @@ import ( "os" "path/filepath" + "github.com/databricks/cli/bundle/config/mutator/paths" + "github.com/databricks/databricks-sdk-go/logger" "github.com/fatih/color" @@ -108,7 +110,12 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err) } - rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.BundleRootPath, pythonPath, leftRoot) + rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{ + cacheDir: cacheDir, + rootPath: b.BundleRootPath, + pythonPath: pythonPath, + loadLocations: experimental.PyDABs.LoadLocations, + }) mutateDiags = diags if diags.HasError() { return dyn.InvalidValue, mutateDiagsHasError @@ -152,13 +159,21 @@ func createCacheDir(ctx context.Context) (string, error) { return os.MkdirTemp("", "-pydabs") } -func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) { - inputPath := filepath.Join(cacheDir, "input.json") - outputPath := filepath.Join(cacheDir, "output.json") - diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") +type runPythonMutatorOpts struct { + cacheDir string + rootPath string + pythonPath string + loadLocations bool +} + +func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) { + inputPath := filepath.Join(opts.cacheDir, "input.json") + outputPath := filepath.Join(opts.cacheDir, "output.json") + diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json") + locationsPath := filepath.Join(opts.cacheDir, "locations.json") args := []string{ - pythonPath, + opts.pythonPath, "-m", "databricks.bundles.build", "--phase", @@ -171,6 +186,10 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r diagnosticsPath, } + if opts.loadLocations { + args = append(args, "--locations", locationsPath) + } + if err := writeInputFile(inputPath, root); err != nil { return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) } @@ -185,7 +204,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r _, processErr := process.Background( ctx, args, - process.WithDir(rootPath), + process.WithDir(opts.rootPath), process.WithStderrWriter(stderrWriter), process.WithStdoutWriter(stdoutWriter), ) @@ -221,7 +240,12 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) } - output, outputDiags := loadOutputFile(rootPath, outputPath) + locations, err := loadLocationsFile(locationsPath) + if err != nil { + return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err) + } + + output, outputDiags := loadOutputFile(opts.rootPath, outputPath, locations) pythonDiagnostics = pythonDiagnostics.Extend(outputDiags) // we pass through pythonDiagnostic because it contains warnings @@ -266,7 +290,23 @@ func writeInputFile(inputPath string, input dyn.Value) error { return os.WriteFile(inputPath, rootConfigJson, 0600) } -func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnostics) { +// loadLocationsFile loads locations.json containing source locations for generated YAML. +func loadLocationsFile(locationsPath string) (*pythonLocations, error) { + if _, err := os.Stat(locationsPath); os.IsNotExist(err) { + return newPythonLocations(), nil + } + + locationsFile, err := os.Open(locationsPath) + if err != nil { + return nil, fmt.Errorf("failed to open locations file: %w", err) + } + + defer locationsFile.Close() + + return parsePythonLocations(locationsFile) +} + +func loadOutputFile(rootPath string, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) { outputFile, err := os.Open(outputPath) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err)) @@ -282,7 +322,7 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnos // Error: path /var/folders/.../pydabs/dist/*.whl is not contained in bundle root path // // for that, we pass virtualPath instead of outputPath as file location - virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_pydabs__.yml")) + virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName)) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err)) } @@ -292,7 +332,25 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnos return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err)) } - return strictNormalize(config.Root{}, generated) + // paths are resolved relative to locations of their values, if we change location + // we have to update each path, until we simplify that, we don't update locations + // for such values, so we don't change how paths are resolved + _, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) { + putPythonLocation(locations, p, v.Location()) + return v, nil + }) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + // generated has dyn.Location as if it comes from generated YAML file + // earlier we loaded locations.json with source locations in Python code + generatedWithLocations, err := mergePythonLocations(generated, locations) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + return strictNormalize(config.Root{}, generatedWithLocations) } func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) { diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index 7a419d79..2fcc9b70 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -6,7 +6,6 @@ import ( "os" "os/exec" "path/filepath" - "reflect" "runtime" "testing" @@ -47,6 +46,7 @@ func TestPythonMutator_load(t *testing.T) { pydabs: enabled: true venv_path: .venv + load_locations: true resources: jobs: job0: @@ -65,7 +65,8 @@ func TestPythonMutator_load(t *testing.T) { "experimental": { "pydabs": { "enabled": true, - "venv_path": ".venv" + "venv_path": ".venv", + "load_locations": true } }, "resources": { @@ -80,6 +81,8 @@ func TestPythonMutator_load(t *testing.T) { } }`, `{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`, + `{"path": "resources.jobs.job0", "file": "src/examples/job0.py", "line": 3, "column": 5} + {"path": "resources.jobs.job1", "file": "src/examples/job1.py", "line": 5, "column": 7}`, ) mutator := PythonMutator(PythonMutatorPhaseLoad) @@ -97,6 +100,25 @@ func TestPythonMutator_load(t *testing.T) { assert.Equal(t, "job_1", job1.Name) } + // output of locations.json should be applied to underlying dyn.Value + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + name1, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job1.name")) + if err != nil { + return dyn.InvalidValue, err + } + + assert.Equal(t, []dyn.Location{ + { + File: "src/examples/job1.py", + Line: 5, + Column: 7, + }, + }, name1.Locations()) + + return v, nil + }) + assert.NoError(t, err) + assert.Equal(t, 1, len(diags)) assert.Equal(t, "job doesn't have any tasks", diags[0].Summary) assert.Equal(t, []dyn.Location{ @@ -106,7 +128,6 @@ func TestPythonMutator_load(t *testing.T) { Column: 5, }, }, diags[0].Locations) - } func TestPythonMutator_load_disallowed(t *testing.T) { @@ -146,7 +167,7 @@ func TestPythonMutator_load_disallowed(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) @@ -191,7 +212,7 @@ func TestPythonMutator_init(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseInit) diag := bundle.Apply(ctx, b, mutator) @@ -252,7 +273,7 @@ func TestPythonMutator_badOutput(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) @@ -588,7 +609,7 @@ or activate the environment before running CLI commands: assert.Equal(t, expected, out) } -func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context { +func withProcessStub(t *testing.T, args []string, output string, diagnostics string, locations string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) @@ -600,32 +621,51 @@ func withProcessStub(t *testing.T, args []string, output string, diagnostics str inputPath := filepath.Join(cacheDir, "input.json") outputPath := filepath.Join(cacheDir, "output.json") + locationsPath := filepath.Join(cacheDir, "locations.json") diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") - args = append(args, "--input", inputPath) - args = append(args, "--output", outputPath) - args = append(args, "--diagnostics", diagnosticsPath) - stub.WithCallback(func(actual *exec.Cmd) error { _, err := os.Stat(inputPath) assert.NoError(t, err) - if reflect.DeepEqual(actual.Args, args) { - err := os.WriteFile(outputPath, []byte(output), 0600) - require.NoError(t, err) + actualInputPath := getArg(actual.Args, "--input") + actualOutputPath := getArg(actual.Args, "--output") + actualDiagnosticsPath := getArg(actual.Args, "--diagnostics") + actualLocationsPath := getArg(actual.Args, "--locations") - err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600) - require.NoError(t, err) + require.Equal(t, inputPath, actualInputPath) + require.Equal(t, outputPath, actualOutputPath) + require.Equal(t, diagnosticsPath, actualDiagnosticsPath) - return nil - } else { - return fmt.Errorf("unexpected command: %v", actual.Args) + // locations is an optional argument + if locations != "" { + require.Equal(t, locationsPath, actualLocationsPath) + + err = os.WriteFile(locationsPath, []byte(locations), 0600) + require.NoError(t, err) } + + err = os.WriteFile(outputPath, []byte(output), 0600) + require.NoError(t, err) + + err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600) + require.NoError(t, err) + + return nil }) return ctx } +func getArg(args []string, name string) string { + for i := 0; i < len(args); i++ { + if args[i] == name { + return args[i+1] + } + } + return "" +} + func loadYaml(name string, content string) *bundle.Bundle { v, diag := config.LoadFromBytes(name, []byte(content))