From 343845545926575611781e9df666629e9d4570f2 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Fri, 20 Sep 2024 16:36:43 +0200 Subject: [PATCH 1/5] PythonMutator: propagate source locations --- bundle/config/experimental.go | 6 + .../config/mutator/python/python_locations.go | 163 ++++++++++++++++++ .../mutator/python/python_locations_test.go | 127 ++++++++++++++ .../config/mutator/python/python_mutator.go | 80 +++++++-- .../mutator/python/python_mutator_test.go | 78 +++++++-- 5 files changed, 424 insertions(+), 30 deletions(-) create mode 100644 bundle/config/mutator/python/python_locations.go create mode 100644 bundle/config/mutator/python/python_locations_test.go diff --git a/bundle/config/experimental.go b/bundle/config/experimental.go index 061bbdae..bf5e0190 100644 --- a/bundle/config/experimental.go +++ b/bundle/config/experimental.go @@ -45,6 +45,12 @@ type PyDABs struct { // These packages are imported to discover resources, resource generators, and mutators. // This list can include namespace packages, which causes the import of nested packages. Import []string `json:"import,omitempty"` + + // LoadLocations is a flag to enable loading Python source locations from the PyDABs. + // + // Locations are only supported since PyDABs 0.6.0, and because of that, + // this flag is disabled by default. + LoadLocations bool `json:"load_locations,omitempty"` } type Command string diff --git a/bundle/config/mutator/python/python_locations.go b/bundle/config/mutator/python/python_locations.go new file mode 100644 index 00000000..97b7e170 --- /dev/null +++ b/bundle/config/mutator/python/python_locations.go @@ -0,0 +1,163 @@ +package python + +import ( + "encoding/json" + "fmt" + "io" + "path/filepath" + + "github.com/databricks/cli/libs/dyn" +) + +const generatedFileName = "__generated_by_pydabs__.yml" + +// pythonLocations is data structure for efficient location lookup for a given path +type pythonLocations struct { + // descendants referenced by index, e.g. '.foo' + keys map[string]*pythonLocations + + // descendants referenced by key, e.g. '[0]' + indexes map[int]*pythonLocations + + // location for the current node if it exists + location dyn.Location + + // if true, location is present + exists bool +} + +type pythonLocationEntry struct { + Path string `json:"path"` + File string `json:"file"` + Line int `json:"line"` + Column int `json:"column"` +} + +// mergePythonLocations applies locations from Python mutator into given dyn.Value +// +// The primary use-case is to merge locations.json with output.json, so that any +// validation errors will point to Python source code instead of generated YAML. +func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) { + return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) { + if newLocation, ok := findPythonLocation(locations, path); ok { + var newLocations []dyn.Location + + // the first item in the list is the "last" location used for error reporting + newLocations = append(newLocations, newLocation) + + for _, location := range value.Locations() { + if filepath.Base(location.File) == generatedFileName { + continue + } + + // don't add duplicates if location already exists + if location == newLocation { + continue + } + + newLocations = append(newLocations, location) + } + + return value.WithLocations(newLocations), nil + } else { + return value, nil + } + }) +} + +// parsePythonLocations parses locations.json from the Python mutator. +// +// locations file is newline-separated JSON objects with pythonLocationEntry structure. +func parsePythonLocations(input io.Reader) (*pythonLocations, error) { + decoder := json.NewDecoder(input) + locations := newPythonLocations() + + for decoder.More() { + var entry pythonLocationEntry + + err := decoder.Decode(&entry) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + path, err := dyn.NewPathFromString(entry.Path) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + location := dyn.Location{ + File: entry.File, + Line: entry.Line, + Column: entry.Column, + } + + putPythonLocation(locations, path, location) + } + + return locations, nil +} + +// putPythonLocation puts the location to the trie for the given path +func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) { + var currentNode = trie + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + currentNode.keys[key] = newPythonLocations() + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + currentNode.indexes[index] = newPythonLocations() + } + + currentNode = currentNode.indexes[index] + } + } + + currentNode.location = location + currentNode.exists = true +} + +// newPythonLocations creates a new trie node +func newPythonLocations() *pythonLocations { + return &pythonLocations{ + keys: make(map[string]*pythonLocations), + indexes: make(map[int]*pythonLocations), + } +} + +// findPythonLocation finds the location or closest ancestor location in the trie for the given path +// if no ancestor or exact location is found, false is returned. +func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) { + var currentNode = locations + var lastLocation = locations.location + var exists = locations.exists + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + break + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + break + } + + currentNode = currentNode.indexes[index] + } + + if currentNode.exists { + lastLocation = currentNode.location + exists = true + } + } + + return lastLocation, exists +} diff --git a/bundle/config/mutator/python/python_locations_test.go b/bundle/config/mutator/python/python_locations_test.go new file mode 100644 index 00000000..28cd33de --- /dev/null +++ b/bundle/config/mutator/python/python_locations_test.go @@ -0,0 +1,127 @@ +package python + +import ( + "bytes" + "testing" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestMergeLocations(t *testing.T) { + pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1} + generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1} + yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation) + + input := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + "bar": dyn.NewValue("baz", []dyn.Location{yamlLocation, pythonLocation}), + "baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}), + "qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}), + }, + []dyn.Location{}, + ), + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + expected := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + // pythonLocation was present before, we don't duplicate it, only move it to the beginning + "bar": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + // pythonLocation is appended to the beginning of the list if absent + "baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + // generatedLocation is replaced by pythonLocation + "qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + }, + []dyn.Location{pythonLocation}, + ), + // if location is unknown, we keep it as-is + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + actual, err := mergePythonLocations(input, locations) + + assert.NoError(t, err) + assert.Equal(t, expected, actual) +} + +func TestFindLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_indexPathComponent(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]")) + + assert.True(t, exists) + assert.Equal(t, location2, actual) +} + +func TestFindLocation_closestAncestorLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_unknownLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + _, exists := findPythonLocation(locations, dyn.MustPathFromString("bar")) + + assert.False(t, exists) +} + +func TestParsePythonLocations(t *testing.T) { + expected := dyn.Location{File: "foo.py", Line: 1, Column: 2} + + input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}` + reader := bytes.NewReader([]byte(input)) + locations, err := parsePythonLocations(reader) + + assert.NoError(t, err) + + assert.True(t, locations.keys["foo"].exists) + assert.Equal(t, expected, locations.keys["foo"].location) +} diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index da6c4d21..38e23291 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -10,6 +10,8 @@ import ( "os" "path/filepath" + "github.com/databricks/cli/bundle/config/mutator/paths" + "github.com/databricks/databricks-sdk-go/logger" "github.com/fatih/color" @@ -108,7 +110,12 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err) } - rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.BundleRootPath, pythonPath, leftRoot) + rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{ + cacheDir: cacheDir, + rootPath: b.BundleRootPath, + pythonPath: pythonPath, + loadLocations: experimental.PyDABs.LoadLocations, + }) mutateDiags = diags if diags.HasError() { return dyn.InvalidValue, mutateDiagsHasError @@ -152,13 +159,21 @@ func createCacheDir(ctx context.Context) (string, error) { return os.MkdirTemp("", "-pydabs") } -func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) { - inputPath := filepath.Join(cacheDir, "input.json") - outputPath := filepath.Join(cacheDir, "output.json") - diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") +type runPythonMutatorOpts struct { + cacheDir string + rootPath string + pythonPath string + loadLocations bool +} + +func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) { + inputPath := filepath.Join(opts.cacheDir, "input.json") + outputPath := filepath.Join(opts.cacheDir, "output.json") + diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json") + locationsPath := filepath.Join(opts.cacheDir, "locations.json") args := []string{ - pythonPath, + opts.pythonPath, "-m", "databricks.bundles.build", "--phase", @@ -171,6 +186,10 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r diagnosticsPath, } + if opts.loadLocations { + args = append(args, "--locations", locationsPath) + } + if err := writeInputFile(inputPath, root); err != nil { return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) } @@ -185,7 +204,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r _, processErr := process.Background( ctx, args, - process.WithDir(rootPath), + process.WithDir(opts.rootPath), process.WithStderrWriter(stderrWriter), process.WithStdoutWriter(stdoutWriter), ) @@ -221,7 +240,12 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) } - output, outputDiags := loadOutputFile(rootPath, outputPath) + locations, err := loadLocationsFile(locationsPath) + if err != nil { + return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err) + } + + output, outputDiags := loadOutputFile(opts.rootPath, outputPath, locations) pythonDiagnostics = pythonDiagnostics.Extend(outputDiags) // we pass through pythonDiagnostic because it contains warnings @@ -266,7 +290,23 @@ func writeInputFile(inputPath string, input dyn.Value) error { return os.WriteFile(inputPath, rootConfigJson, 0600) } -func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnostics) { +// loadLocationsFile loads locations.json containing source locations for generated YAML. +func loadLocationsFile(locationsPath string) (*pythonLocations, error) { + if _, err := os.Stat(locationsPath); os.IsNotExist(err) { + return newPythonLocations(), nil + } + + locationsFile, err := os.Open(locationsPath) + if err != nil { + return nil, fmt.Errorf("failed to open locations file: %w", err) + } + + defer locationsFile.Close() + + return parsePythonLocations(locationsFile) +} + +func loadOutputFile(rootPath string, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) { outputFile, err := os.Open(outputPath) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err)) @@ -282,7 +322,7 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnos // Error: path /var/folders/.../pydabs/dist/*.whl is not contained in bundle root path // // for that, we pass virtualPath instead of outputPath as file location - virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_pydabs__.yml")) + virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName)) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err)) } @@ -292,7 +332,25 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnos return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err)) } - return strictNormalize(config.Root{}, generated) + // paths are resolved relative to locations of their values, if we change location + // we have to update each path, until we simplify that, we don't update locations + // for such values, so we don't change how paths are resolved + _, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) { + putPythonLocation(locations, p, v.Location()) + return v, nil + }) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + // generated has dyn.Location as if it comes from generated YAML file + // earlier we loaded locations.json with source locations in Python code + generatedWithLocations, err := mergePythonLocations(generated, locations) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + return strictNormalize(config.Root{}, generatedWithLocations) } func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) { diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index 7a419d79..2fcc9b70 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -6,7 +6,6 @@ import ( "os" "os/exec" "path/filepath" - "reflect" "runtime" "testing" @@ -47,6 +46,7 @@ func TestPythonMutator_load(t *testing.T) { pydabs: enabled: true venv_path: .venv + load_locations: true resources: jobs: job0: @@ -65,7 +65,8 @@ func TestPythonMutator_load(t *testing.T) { "experimental": { "pydabs": { "enabled": true, - "venv_path": ".venv" + "venv_path": ".venv", + "load_locations": true } }, "resources": { @@ -80,6 +81,8 @@ func TestPythonMutator_load(t *testing.T) { } }`, `{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`, + `{"path": "resources.jobs.job0", "file": "src/examples/job0.py", "line": 3, "column": 5} + {"path": "resources.jobs.job1", "file": "src/examples/job1.py", "line": 5, "column": 7}`, ) mutator := PythonMutator(PythonMutatorPhaseLoad) @@ -97,6 +100,25 @@ func TestPythonMutator_load(t *testing.T) { assert.Equal(t, "job_1", job1.Name) } + // output of locations.json should be applied to underlying dyn.Value + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + name1, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job1.name")) + if err != nil { + return dyn.InvalidValue, err + } + + assert.Equal(t, []dyn.Location{ + { + File: "src/examples/job1.py", + Line: 5, + Column: 7, + }, + }, name1.Locations()) + + return v, nil + }) + assert.NoError(t, err) + assert.Equal(t, 1, len(diags)) assert.Equal(t, "job doesn't have any tasks", diags[0].Summary) assert.Equal(t, []dyn.Location{ @@ -106,7 +128,6 @@ func TestPythonMutator_load(t *testing.T) { Column: 5, }, }, diags[0].Locations) - } func TestPythonMutator_load_disallowed(t *testing.T) { @@ -146,7 +167,7 @@ func TestPythonMutator_load_disallowed(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) @@ -191,7 +212,7 @@ func TestPythonMutator_init(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseInit) diag := bundle.Apply(ctx, b, mutator) @@ -252,7 +273,7 @@ func TestPythonMutator_badOutput(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) @@ -588,7 +609,7 @@ or activate the environment before running CLI commands: assert.Equal(t, expected, out) } -func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context { +func withProcessStub(t *testing.T, args []string, output string, diagnostics string, locations string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) @@ -600,32 +621,51 @@ func withProcessStub(t *testing.T, args []string, output string, diagnostics str inputPath := filepath.Join(cacheDir, "input.json") outputPath := filepath.Join(cacheDir, "output.json") + locationsPath := filepath.Join(cacheDir, "locations.json") diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") - args = append(args, "--input", inputPath) - args = append(args, "--output", outputPath) - args = append(args, "--diagnostics", diagnosticsPath) - stub.WithCallback(func(actual *exec.Cmd) error { _, err := os.Stat(inputPath) assert.NoError(t, err) - if reflect.DeepEqual(actual.Args, args) { - err := os.WriteFile(outputPath, []byte(output), 0600) - require.NoError(t, err) + actualInputPath := getArg(actual.Args, "--input") + actualOutputPath := getArg(actual.Args, "--output") + actualDiagnosticsPath := getArg(actual.Args, "--diagnostics") + actualLocationsPath := getArg(actual.Args, "--locations") - err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600) - require.NoError(t, err) + require.Equal(t, inputPath, actualInputPath) + require.Equal(t, outputPath, actualOutputPath) + require.Equal(t, diagnosticsPath, actualDiagnosticsPath) - return nil - } else { - return fmt.Errorf("unexpected command: %v", actual.Args) + // locations is an optional argument + if locations != "" { + require.Equal(t, locationsPath, actualLocationsPath) + + err = os.WriteFile(locationsPath, []byte(locations), 0600) + require.NoError(t, err) } + + err = os.WriteFile(outputPath, []byte(output), 0600) + require.NoError(t, err) + + err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600) + require.NoError(t, err) + + return nil }) return ctx } +func getArg(args []string, name string) string { + for i := 0; i < len(args); i++ { + if args[i] == name { + return args[i+1] + } + } + return "" +} + func loadYaml(name string, content string) *bundle.Bundle { v, diag := config.LoadFromBytes(name, []byte(content)) From df613759951bde518c40152fb806662a70271e79 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Tue, 8 Oct 2024 10:14:38 +0200 Subject: [PATCH 2/5] Address CR comments --- .../mutator/python/python_diagnostics.go | 1 + .../config/mutator/python/python_locations.go | 27 +++++++++++++++---- .../mutator/python/python_locations_test.go | 3 --- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/bundle/config/mutator/python/python_diagnostics.go b/bundle/config/mutator/python/python_diagnostics.go index 12822065..7a1e13b4 100644 --- a/bundle/config/mutator/python/python_diagnostics.go +++ b/bundle/config/mutator/python/python_diagnostics.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/libs/dyn" ) +// pythonDiagnostic is a single entry in diagnostics.json type pythonDiagnostic struct { Severity pythonSeverity `json:"severity"` Summary string `json:"summary"` diff --git a/bundle/config/mutator/python/python_locations.go b/bundle/config/mutator/python/python_locations.go index 97b7e170..eb6698b4 100644 --- a/bundle/config/mutator/python/python_locations.go +++ b/bundle/config/mutator/python/python_locations.go @@ -9,9 +9,28 @@ import ( "github.com/databricks/cli/libs/dyn" ) +// generatedFileName is used as the virtual file name for YAML generated by PyDABs. +// +// mergePythonLocations replaces dyn.Location with generatedFileName with locations loaded +// from locations.json const generatedFileName = "__generated_by_pydabs__.yml" // pythonLocations is data structure for efficient location lookup for a given path +// +// Locations form a tree, and we assign locations of the closest ancestor to each dyn.Value based on its path. +// We implement it as a trie (prefix tree) where keys are components of the path. With that, lookups are O(n) +// where n is the number of components in the path. +// +// For example, with locations.json: +// +// {"path": "resources.jobs.job_0", "file": "src/examples/job_0.py", "line": 3, "column": 5} +// {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "src/examples/job_0.py", "line": 10, "column": 5} +// {"path": "resources.jobs.job_1", "file": "src/examples/job_1.py", "line": 5, "column": 7} +// +// - resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5 +// +// - resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5, +// because we use the location of the job as the most precise approximation. type pythonLocations struct { // descendants referenced by index, e.g. '.foo' keys map[string]*pythonLocations @@ -26,6 +45,7 @@ type pythonLocations struct { exists bool } +// pythonLocationEntry is a single entry in locations.json type pythonLocationEntry struct { Path string `json:"path"` File string `json:"file"` @@ -46,15 +66,12 @@ func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Valu newLocations = append(newLocations, newLocation) for _, location := range value.Locations() { + // When loaded, dyn.Value created by PyDABs use the virtual file path as their location, + // we replace it with newLocation. if filepath.Base(location.File) == generatedFileName { continue } - // don't add duplicates if location already exists - if location == newLocation { - continue - } - newLocations = append(newLocations, location) } diff --git a/bundle/config/mutator/python/python_locations_test.go b/bundle/config/mutator/python/python_locations_test.go index 28cd33de..2dd2785d 100644 --- a/bundle/config/mutator/python/python_locations_test.go +++ b/bundle/config/mutator/python/python_locations_test.go @@ -20,7 +20,6 @@ func TestMergeLocations(t *testing.T) { map[string]dyn.Value{ "foo": dyn.NewValue( map[string]dyn.Value{ - "bar": dyn.NewValue("baz", []dyn.Location{yamlLocation, pythonLocation}), "baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}), "qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}), }, @@ -35,8 +34,6 @@ func TestMergeLocations(t *testing.T) { map[string]dyn.Value{ "foo": dyn.NewValue( map[string]dyn.Value{ - // pythonLocation was present before, we don't duplicate it, only move it to the beginning - "bar": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), // pythonLocation is appended to the beginning of the list if absent "baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), // generatedLocation is replaced by pythonLocation From 43ce278299a71da4f091a820a7d1a31ad22af967 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Tue, 8 Oct 2024 10:18:52 +0200 Subject: [PATCH 3/5] Rename bundle root path --- .../config/mutator/python/python_mutator.go | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index 38e23291..e259f372 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -111,10 +111,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno } rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{ - cacheDir: cacheDir, - rootPath: b.BundleRootPath, - pythonPath: pythonPath, - loadLocations: experimental.PyDABs.LoadLocations, + cacheDir: cacheDir, + bundleRootPath: b.BundleRootPath, + pythonPath: pythonPath, + loadLocations: experimental.PyDABs.LoadLocations, }) mutateDiags = diags if diags.HasError() { @@ -160,10 +160,10 @@ func createCacheDir(ctx context.Context) (string, error) { } type runPythonMutatorOpts struct { - cacheDir string - rootPath string - pythonPath string - loadLocations bool + cacheDir string + bundleRootPath string + pythonPath string + loadLocations bool } func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) { @@ -204,7 +204,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, op _, processErr := process.Background( ctx, args, - process.WithDir(opts.rootPath), + process.WithDir(opts.bundleRootPath), process.WithStderrWriter(stderrWriter), process.WithStdoutWriter(stdoutWriter), ) @@ -245,7 +245,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, op return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err) } - output, outputDiags := loadOutputFile(opts.rootPath, outputPath, locations) + output, outputDiags := loadOutputFile(opts.bundleRootPath, outputPath, locations) pythonDiagnostics = pythonDiagnostics.Extend(outputDiags) // we pass through pythonDiagnostic because it contains warnings @@ -317,7 +317,7 @@ func loadOutputFile(rootPath string, outputPath string, locations *pythonLocatio // we need absolute path because later parts of pipeline assume all paths are absolute // and this file will be used as location to resolve relative paths. // - // virtualPath has to stay in rootPath, because locations outside root path are not allowed: + // virtualPath has to stay in bundleRootPath, because locations outside root path are not allowed: // // Error: path /var/folders/.../pydabs/dist/*.whl is not contained in bundle root path // From bfb13afa8e09526fcebfe65f81289dc5f9061d5e Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Tue, 8 Oct 2024 10:26:53 +0200 Subject: [PATCH 4/5] Address more feedback --- .../config/mutator/python/python_locations.go | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/bundle/config/mutator/python/python_locations.go b/bundle/config/mutator/python/python_locations.go index eb6698b4..0551edac 100644 --- a/bundle/config/mutator/python/python_locations.go +++ b/bundle/config/mutator/python/python_locations.go @@ -59,26 +59,27 @@ type pythonLocationEntry struct { // validation errors will point to Python source code instead of generated YAML. func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) { return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) { - if newLocation, ok := findPythonLocation(locations, path); ok { - var newLocations []dyn.Location - - // the first item in the list is the "last" location used for error reporting - newLocations = append(newLocations, newLocation) - - for _, location := range value.Locations() { - // When loaded, dyn.Value created by PyDABs use the virtual file path as their location, - // we replace it with newLocation. - if filepath.Base(location.File) == generatedFileName { - continue - } - - newLocations = append(newLocations, location) - } - - return value.WithLocations(newLocations), nil - } else { + newLocation, ok := findPythonLocation(locations, path) + if !ok { return value, nil } + + var newLocations []dyn.Location + + // the first item in the list is the "last" location used for error reporting + newLocations = append(newLocations, newLocation) + + for _, location := range value.Locations() { + // When loaded, dyn.Value created by PyDABs use the virtual file path as their location, + // we replace it with newLocation. + if filepath.Base(location.File) == generatedFileName { + continue + } + + newLocations = append(newLocations, location) + } + + return value.WithLocations(newLocations), nil }) } From 96a6cef0d6a697172b707d8aae85c0fcbdd69566 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Tue, 8 Oct 2024 10:28:51 +0200 Subject: [PATCH 5/5] Address feedbacK --- bundle/config/mutator/python/python_mutator.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index e259f372..7ee35da2 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "io" + "io/fs" "os" "path/filepath" @@ -292,12 +293,10 @@ func writeInputFile(inputPath string, input dyn.Value) error { // loadLocationsFile loads locations.json containing source locations for generated YAML. func loadLocationsFile(locationsPath string) (*pythonLocations, error) { - if _, err := os.Stat(locationsPath); os.IsNotExist(err) { - return newPythonLocations(), nil - } - locationsFile, err := os.Open(locationsPath) - if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return newPythonLocations(), nil + } else if err != nil { return nil, fmt.Errorf("failed to open locations file: %w", err) }