This commit is contained in:
Gleb Kanterov 2024-10-17 14:19:52 +00:00 committed by GitHub
commit 5f4bb73671
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 440 additions and 31 deletions

View File

@ -45,6 +45,12 @@ type PyDABs struct {
// These packages are imported to discover resources, resource generators, and mutators.
// This list can include namespace packages, which causes the import of nested packages.
Import []string `json:"import,omitempty"`
// LoadLocations is a flag to enable loading Python source locations from the PyDABs.
//
// Locations are only supported since PyDABs 0.6.0, and because of that,
// this flag is disabled by default.
LoadLocations bool `json:"load_locations,omitempty"`
}
type Command string

View File

@ -9,6 +9,7 @@ import (
"github.com/databricks/cli/libs/dyn"
)
// pythonDiagnostic is a single entry in diagnostics.json
type pythonDiagnostic struct {
Severity pythonSeverity `json:"severity"`
Summary string `json:"summary"`

View File

@ -0,0 +1,181 @@
package python
import (
"encoding/json"
"fmt"
"io"
"path/filepath"
"github.com/databricks/cli/libs/dyn"
)
// generatedFileName is used as the virtual file name for YAML generated by PyDABs.
//
// mergePythonLocations replaces dyn.Location with generatedFileName with locations loaded
// from locations.json
const generatedFileName = "__generated_by_pydabs__.yml"
// pythonLocations is data structure for efficient location lookup for a given path
//
// Locations form a tree, and we assign locations of the closest ancestor to each dyn.Value based on its path.
// We implement it as a trie (prefix tree) where keys are components of the path. With that, lookups are O(n)
// where n is the number of components in the path.
//
// For example, with locations.json:
//
// {"path": "resources.jobs.job_0", "file": "src/examples/job_0.py", "line": 3, "column": 5}
// {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "src/examples/job_0.py", "line": 10, "column": 5}
// {"path": "resources.jobs.job_1", "file": "src/examples/job_1.py", "line": 5, "column": 7}
//
// - resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5
//
// - resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5,
// because we use the location of the job as the most precise approximation.
type pythonLocations struct {
// descendants referenced by index, e.g. '.foo'
keys map[string]*pythonLocations
// descendants referenced by key, e.g. '[0]'
indexes map[int]*pythonLocations
// location for the current node if it exists
location dyn.Location
// if true, location is present
exists bool
}
// pythonLocationEntry is a single entry in locations.json
type pythonLocationEntry struct {
Path string `json:"path"`
File string `json:"file"`
Line int `json:"line"`
Column int `json:"column"`
}
// mergePythonLocations applies locations from Python mutator into given dyn.Value
//
// The primary use-case is to merge locations.json with output.json, so that any
// validation errors will point to Python source code instead of generated YAML.
func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) {
return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) {
newLocation, ok := findPythonLocation(locations, path)
if !ok {
return value, nil
}
var newLocations []dyn.Location
// the first item in the list is the "last" location used for error reporting
newLocations = append(newLocations, newLocation)
for _, location := range value.Locations() {
// When loaded, dyn.Value created by PyDABs use the virtual file path as their location,
// we replace it with newLocation.
if filepath.Base(location.File) == generatedFileName {
continue
}
newLocations = append(newLocations, location)
}
return value.WithLocations(newLocations), nil
})
}
// parsePythonLocations parses locations.json from the Python mutator.
//
// locations file is newline-separated JSON objects with pythonLocationEntry structure.
func parsePythonLocations(input io.Reader) (*pythonLocations, error) {
decoder := json.NewDecoder(input)
locations := newPythonLocations()
for decoder.More() {
var entry pythonLocationEntry
err := decoder.Decode(&entry)
if err != nil {
return nil, fmt.Errorf("failed to parse python location: %s", err)
}
path, err := dyn.NewPathFromString(entry.Path)
if err != nil {
return nil, fmt.Errorf("failed to parse python location: %s", err)
}
location := dyn.Location{
File: entry.File,
Line: entry.Line,
Column: entry.Column,
}
putPythonLocation(locations, path, location)
}
return locations, nil
}
// putPythonLocation puts the location to the trie for the given path
func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) {
var currentNode = trie
for _, component := range path {
if key := component.Key(); key != "" {
if _, ok := currentNode.keys[key]; !ok {
currentNode.keys[key] = newPythonLocations()
}
currentNode = currentNode.keys[key]
} else {
index := component.Index()
if _, ok := currentNode.indexes[index]; !ok {
currentNode.indexes[index] = newPythonLocations()
}
currentNode = currentNode.indexes[index]
}
}
currentNode.location = location
currentNode.exists = true
}
// newPythonLocations creates a new trie node
func newPythonLocations() *pythonLocations {
return &pythonLocations{
keys: make(map[string]*pythonLocations),
indexes: make(map[int]*pythonLocations),
}
}
// findPythonLocation finds the location or closest ancestor location in the trie for the given path
// if no ancestor or exact location is found, false is returned.
func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) {
var currentNode = locations
var lastLocation = locations.location
var exists = locations.exists
for _, component := range path {
if key := component.Key(); key != "" {
if _, ok := currentNode.keys[key]; !ok {
break
}
currentNode = currentNode.keys[key]
} else {
index := component.Index()
if _, ok := currentNode.indexes[index]; !ok {
break
}
currentNode = currentNode.indexes[index]
}
if currentNode.exists {
lastLocation = currentNode.location
exists = true
}
}
return lastLocation, exists
}

View File

@ -0,0 +1,124 @@
package python
import (
"bytes"
"testing"
"github.com/databricks/cli/libs/dyn"
assert "github.com/databricks/cli/libs/dyn/dynassert"
)
func TestMergeLocations(t *testing.T) {
pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1}
generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1}
yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation)
input := dyn.NewValue(
map[string]dyn.Value{
"foo": dyn.NewValue(
map[string]dyn.Value{
"baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}),
"qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}),
},
[]dyn.Location{},
),
"bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}),
},
[]dyn.Location{yamlLocation},
)
expected := dyn.NewValue(
map[string]dyn.Value{
"foo": dyn.NewValue(
map[string]dyn.Value{
// pythonLocation is appended to the beginning of the list if absent
"baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
// generatedLocation is replaced by pythonLocation
"qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
},
[]dyn.Location{pythonLocation},
),
// if location is unknown, we keep it as-is
"bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}),
},
[]dyn.Location{yamlLocation},
)
actual, err := mergePythonLocations(input, locations)
assert.NoError(t, err)
assert.Equal(t, expected, actual)
}
func TestFindLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar"))
assert.True(t, exists)
assert.Equal(t, location1, actual)
}
func TestFindLocation_indexPathComponent(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2)
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"))
assert.True(t, exists)
assert.Equal(t, location2, actual)
}
func TestFindLocation_closestAncestorLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz"))
assert.True(t, exists)
assert.Equal(t, location1, actual)
}
func TestFindLocation_unknownLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
_, exists := findPythonLocation(locations, dyn.MustPathFromString("bar"))
assert.False(t, exists)
}
func TestParsePythonLocations(t *testing.T) {
expected := dyn.Location{File: "foo.py", Line: 1, Column: 2}
input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}`
reader := bytes.NewReader([]byte(input))
locations, err := parsePythonLocations(reader)
assert.NoError(t, err)
assert.True(t, locations.keys["foo"].exists)
assert.Equal(t, expected, locations.keys["foo"].location)
}

View File

@ -7,9 +7,12 @@ import (
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"github.com/databricks/cli/bundle/config/mutator/paths"
"github.com/databricks/databricks-sdk-go/logger"
"github.com/fatih/color"
@ -108,7 +111,12 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
}
rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.BundleRootPath, pythonPath, leftRoot)
rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{
cacheDir: cacheDir,
bundleRootPath: b.BundleRootPath,
pythonPath: pythonPath,
loadLocations: experimental.PyDABs.LoadLocations,
})
mutateDiags = diags
if diags.HasError() {
return dyn.InvalidValue, mutateDiagsHasError
@ -152,13 +160,21 @@ func createCacheDir(ctx context.Context) (string, error) {
return os.MkdirTemp("", "-pydabs")
}
func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) {
inputPath := filepath.Join(cacheDir, "input.json")
outputPath := filepath.Join(cacheDir, "output.json")
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
type runPythonMutatorOpts struct {
cacheDir string
bundleRootPath string
pythonPath string
loadLocations bool
}
func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) {
inputPath := filepath.Join(opts.cacheDir, "input.json")
outputPath := filepath.Join(opts.cacheDir, "output.json")
diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json")
locationsPath := filepath.Join(opts.cacheDir, "locations.json")
args := []string{
pythonPath,
opts.pythonPath,
"-m",
"databricks.bundles.build",
"--phase",
@ -171,6 +187,10 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r
diagnosticsPath,
}
if opts.loadLocations {
args = append(args, "--locations", locationsPath)
}
if err := writeInputFile(inputPath, root); err != nil {
return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err)
}
@ -185,7 +205,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r
_, processErr := process.Background(
ctx,
args,
process.WithDir(rootPath),
process.WithDir(opts.bundleRootPath),
process.WithStderrWriter(stderrWriter),
process.WithStdoutWriter(stdoutWriter),
)
@ -221,7 +241,12 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r
return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr)
}
output, outputDiags := loadOutputFile(rootPath, outputPath)
locations, err := loadLocationsFile(locationsPath)
if err != nil {
return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err)
}
output, outputDiags := loadOutputFile(opts.bundleRootPath, outputPath, locations)
pythonDiagnostics = pythonDiagnostics.Extend(outputDiags)
// we pass through pythonDiagnostic because it contains warnings
@ -266,7 +291,21 @@ func writeInputFile(inputPath string, input dyn.Value) error {
return os.WriteFile(inputPath, rootConfigJson, 0600)
}
func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnostics) {
// loadLocationsFile loads locations.json containing source locations for generated YAML.
func loadLocationsFile(locationsPath string) (*pythonLocations, error) {
locationsFile, err := os.Open(locationsPath)
if errors.Is(err, fs.ErrNotExist) {
return newPythonLocations(), nil
} else if err != nil {
return nil, fmt.Errorf("failed to open locations file: %w", err)
}
defer locationsFile.Close()
return parsePythonLocations(locationsFile)
}
func loadOutputFile(rootPath string, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) {
outputFile, err := os.Open(outputPath)
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err))
@ -277,12 +316,12 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnos
// we need absolute path because later parts of pipeline assume all paths are absolute
// and this file will be used as location to resolve relative paths.
//
// virtualPath has to stay in rootPath, because locations outside root path are not allowed:
// virtualPath has to stay in bundleRootPath, because locations outside root path are not allowed:
//
// Error: path /var/folders/.../pydabs/dist/*.whl is not contained in bundle root path
//
// for that, we pass virtualPath instead of outputPath as file location
virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_pydabs__.yml"))
virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName))
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err))
}
@ -292,7 +331,25 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnos
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err))
}
return strictNormalize(config.Root{}, generated)
// paths are resolved relative to locations of their values, if we change location
// we have to update each path, until we simplify that, we don't update locations
// for such values, so we don't change how paths are resolved
_, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) {
putPythonLocation(locations, p, v.Location())
return v, nil
})
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
}
// generated has dyn.Location as if it comes from generated YAML file
// earlier we loaded locations.json with source locations in Python code
generatedWithLocations, err := mergePythonLocations(generated, locations)
if err != nil {
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
}
return strictNormalize(config.Root{}, generatedWithLocations)
}
func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) {

View File

@ -6,7 +6,6 @@ import (
"os"
"os/exec"
"path/filepath"
"reflect"
"runtime"
"testing"
@ -47,6 +46,7 @@ func TestPythonMutator_load(t *testing.T) {
pydabs:
enabled: true
venv_path: .venv
load_locations: true
resources:
jobs:
job0:
@ -65,7 +65,8 @@ func TestPythonMutator_load(t *testing.T) {
"experimental": {
"pydabs": {
"enabled": true,
"venv_path": ".venv"
"venv_path": ".venv",
"load_locations": true
}
},
"resources": {
@ -80,6 +81,8 @@ func TestPythonMutator_load(t *testing.T) {
}
}`,
`{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`,
`{"path": "resources.jobs.job0", "file": "src/examples/job0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job1", "file": "src/examples/job1.py", "line": 5, "column": 7}`,
)
mutator := PythonMutator(PythonMutatorPhaseLoad)
@ -97,6 +100,25 @@ func TestPythonMutator_load(t *testing.T) {
assert.Equal(t, "job_1", job1.Name)
}
// output of locations.json should be applied to underlying dyn.Value
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
name1, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job1.name"))
if err != nil {
return dyn.InvalidValue, err
}
assert.Equal(t, []dyn.Location{
{
File: "src/examples/job1.py",
Line: 5,
Column: 7,
},
}, name1.Locations())
return v, nil
})
assert.NoError(t, err)
assert.Equal(t, 1, len(diags))
assert.Equal(t, "job doesn't have any tasks", diags[0].Summary)
assert.Equal(t, []dyn.Location{
@ -106,7 +128,6 @@ func TestPythonMutator_load(t *testing.T) {
Column: 5,
},
}, diags[0].Locations)
}
func TestPythonMutator_load_disallowed(t *testing.T) {
@ -146,7 +167,7 @@ func TestPythonMutator_load_disallowed(t *testing.T) {
}
}
}
}`, "")
}`, "", "")
mutator := PythonMutator(PythonMutatorPhaseLoad)
diag := bundle.Apply(ctx, b, mutator)
@ -191,7 +212,7 @@ func TestPythonMutator_init(t *testing.T) {
}
}
}
}`, "")
}`, "", "")
mutator := PythonMutator(PythonMutatorPhaseInit)
diag := bundle.Apply(ctx, b, mutator)
@ -252,7 +273,7 @@ func TestPythonMutator_badOutput(t *testing.T) {
}
}
}
}`, "")
}`, "", "")
mutator := PythonMutator(PythonMutatorPhaseLoad)
diag := bundle.Apply(ctx, b, mutator)
@ -588,7 +609,7 @@ or activate the environment before running CLI commands:
assert.Equal(t, expected, out)
}
func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context {
func withProcessStub(t *testing.T, args []string, output string, diagnostics string, locations string) context.Context {
ctx := context.Background()
ctx, stub := process.WithStub(ctx)
@ -600,32 +621,51 @@ func withProcessStub(t *testing.T, args []string, output string, diagnostics str
inputPath := filepath.Join(cacheDir, "input.json")
outputPath := filepath.Join(cacheDir, "output.json")
locationsPath := filepath.Join(cacheDir, "locations.json")
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
args = append(args, "--input", inputPath)
args = append(args, "--output", outputPath)
args = append(args, "--diagnostics", diagnosticsPath)
stub.WithCallback(func(actual *exec.Cmd) error {
_, err := os.Stat(inputPath)
assert.NoError(t, err)
if reflect.DeepEqual(actual.Args, args) {
err := os.WriteFile(outputPath, []byte(output), 0600)
require.NoError(t, err)
actualInputPath := getArg(actual.Args, "--input")
actualOutputPath := getArg(actual.Args, "--output")
actualDiagnosticsPath := getArg(actual.Args, "--diagnostics")
actualLocationsPath := getArg(actual.Args, "--locations")
err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600)
require.NoError(t, err)
require.Equal(t, inputPath, actualInputPath)
require.Equal(t, outputPath, actualOutputPath)
require.Equal(t, diagnosticsPath, actualDiagnosticsPath)
return nil
} else {
return fmt.Errorf("unexpected command: %v", actual.Args)
// locations is an optional argument
if locations != "" {
require.Equal(t, locationsPath, actualLocationsPath)
err = os.WriteFile(locationsPath, []byte(locations), 0600)
require.NoError(t, err)
}
err = os.WriteFile(outputPath, []byte(output), 0600)
require.NoError(t, err)
err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600)
require.NoError(t, err)
return nil
})
return ctx
}
func getArg(args []string, name string) string {
for i := 0; i < len(args); i++ {
if args[i] == name {
return args[i+1]
}
}
return ""
}
func loadYaml(name string, content string) *bundle.Bundle {
v, diag := config.LoadFromBytes(name, []byte(content))