From a462c9ffe24a6ddcf41885fbcb96fab3f1c9ac24 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Wed, 22 Jan 2025 15:12:03 +0100 Subject: [PATCH] Address CR feedback --- .../config/mutator/python/python_locations.go | 53 +++++++++++------- .../mutator/python/python_locations_test.go | 54 +++++++++++++++++++ .../config/mutator/python/python_mutator.go | 8 +++ 3 files changed, 95 insertions(+), 20 deletions(-) diff --git a/bundle/config/mutator/python/python_locations.go b/bundle/config/mutator/python/python_locations.go index 005ff3f50..2fa86bea0 100644 --- a/bundle/config/mutator/python/python_locations.go +++ b/bundle/config/mutator/python/python_locations.go @@ -23,14 +23,16 @@ const generatedFileName = "__generated_by_python__.yml" // // For example, with locations.json: // -// {"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5} -// {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5} -// {"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7} +// {"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5} +// {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5} +// {"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7} // -// - resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5 +// - resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5 // -// - resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5, -// because we use the location of the job as the most precise approximation. +// - resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5, +// because we use the location of the job as the most precise approximation. +// +// See pythonLocationEntry for the structure of a single entry in locations.json type pythonLocations struct { // descendants referenced by index, e.g. '.foo' keys map[string]*pythonLocations @@ -64,25 +66,36 @@ func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Valu return value, nil } - var newLocations []dyn.Location - - // the first item in the list is the "last" location used for error reporting - newLocations = append(newLocations, newLocation) - - for _, location := range value.Locations() { - // When loaded, dyn.Value created by Python code uses the virtual file path as their location, - // we replace it with newLocation. - if filepath.Base(location.File) == generatedFileName { - continue - } - - newLocations = append(newLocations, location) - } + // The first item in the list is the "last" location used for error reporting + // + // Loaded YAML uses virtual file path as location, we remove any of such references, + // because they should use 'newLocation' instead. + // + // We preserve any previous non-virtual locations in case when Python function modified + // resource defined in YAML. + newLocations := append( + []dyn.Location{newLocation}, + removeVirtualLocations(value.Locations())..., + ) return value.WithLocations(newLocations), nil }) } +func removeVirtualLocations(locations []dyn.Location) []dyn.Location { + var newLocations []dyn.Location + + for _, location := range locations { + if filepath.Base(location.File) == generatedFileName { + continue + } + + newLocations = append(newLocations, location) + } + + return newLocations +} + // parsePythonLocations parses locations.json from the Python mutator. // // locations file is newline-separated JSON objects with pythonLocationEntry structure. diff --git a/bundle/config/mutator/python/python_locations_test.go b/bundle/config/mutator/python/python_locations_test.go index 2dd2785dd..61a307bf9 100644 --- a/bundle/config/mutator/python/python_locations_test.go +++ b/bundle/config/mutator/python/python_locations_test.go @@ -2,7 +2,10 @@ package python import ( "bytes" + "path" "testing" + "github.com/databricks/cli/libs/diag" + "github.com/stretchr/testify/require" "github.com/databricks/cli/libs/dyn" assert "github.com/databricks/cli/libs/dyn/dynassert" @@ -110,6 +113,57 @@ func TestFindLocation_unknownLocation(t *testing.T) { assert.False(t, exists) } +func TestLoadOutput(t *testing.T) { + location := dyn.Location{File: "my_job.py", Line: 1, Column: 1} + bundleRoot := t.TempDir() + output := `{ + "resources": { + "jobs": { + "my_job": { + "name": "my_job", + "tasks": [ + { + "task_key": "my_task", + "notebook_task": { + "notebook_path": "my_notebook" + } + } + ] + } + } + } + }` + + locations := newPythonLocations() + putPythonLocation( + locations, + dyn.MustPathFromString("resources.jobs.my_job"), + location, + ) + + value, diags := loadOutput( + bundleRoot, + bytes.NewReader([]byte(output)), + locations, + ) + + assert.Equal(t, diag.Diagnostics{}, diags) + + name, err := dyn.Get(value, "resources.jobs.my_job.name") + require.NoError(t, err) + require.Equal(t, []dyn.Location{location}, name.Locations()) + + // until we implement path normalization, we have to keep locations of values + // that change semantic depending on their location + // + // note: it's important to have absolute path including 'bundleRoot' + // because mutator pipeline already has expanded locations into absolute path + notebookPath, err := dyn.Get(value, "resources.jobs.my_job.tasks[0].notebook_task.notebook_path") + require.NoError(t, err) + require.Equal(t, 1, len(notebookPath.Locations())) + require.Equal(t, path.Join(bundleRoot, generatedFileName), notebookPath.Locations()[0].File) +} + func TestParsePythonLocations(t *testing.T) { expected := dyn.Location{File: "foo.py", Line: 1, Column: 2} diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index 563ce89da..cd2e286e5 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -402,6 +402,10 @@ func loadOutputFile(rootPath, outputPath string, locations *pythonLocations) (dy defer outputFile.Close() + return loadOutput(rootPath, outputFile, locations) +} + +func loadOutput(rootPath string, outputFile io.Reader, locations *pythonLocations) (dyn.Value, diag.Diagnostics) { // we need absolute path because later parts of pipeline assume all paths are absolute // and this file will be used as location to resolve relative paths. // @@ -423,6 +427,10 @@ func loadOutputFile(rootPath, outputPath string, locations *pythonLocations) (dy // paths are resolved relative to locations of their values, if we change location // we have to update each path, until we simplify that, we don't update locations // for such values, so we don't change how paths are resolved + // + // we can remove this once we: + // - add variable interpolation before and after PythonMutator + // - implement path normalization (aka path normal form) _, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) { putPythonLocation(locations, p, v.Location()) return v, nil