PythonMutator: add diagnostics (#1531)

## Changes
Allow PyDABs to report `dyn.Diagnostics` by writing to
`diagnostics.json` supplied as an argument, similar to `input.json` and
`output.json`

Such errors are not yet properly printed in `databricks bundle
validate`, which will be fixed in a follow-up PR.

## Tests
Unit tests
This commit is contained in:
Gleb Kanterov 2024-07-02 17:10:53 +02:00 committed by GitHub
parent 3d2f7622bc
commit 5a0a6d7334
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 334 additions and 35 deletions

View File

@ -0,0 +1,97 @@
package python
import (
"encoding/json"
"fmt"
"io"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)
type pythonDiagnostic struct {
Severity pythonSeverity `json:"severity"`
Summary string `json:"summary"`
Detail string `json:"detail,omitempty"`
Location pythonDiagnosticLocation `json:"location,omitempty"`
Path string `json:"path,omitempty"`
}
type pythonDiagnosticLocation struct {
File string `json:"file"`
Line int `json:"line"`
Column int `json:"column"`
}
type pythonSeverity = string
const (
pythonError pythonSeverity = "error"
pythonWarning pythonSeverity = "warning"
)
// parsePythonDiagnostics parses diagnostics from the Python mutator.
//
// diagnostics file is newline-separated JSON objects with pythonDiagnostic structure.
func parsePythonDiagnostics(input io.Reader) (diag.Diagnostics, error) {
diags := diag.Diagnostics{}
decoder := json.NewDecoder(input)
for decoder.More() {
var parsedLine pythonDiagnostic
err := decoder.Decode(&parsedLine)
if err != nil {
return nil, fmt.Errorf("failed to parse diags: %s", err)
}
severity, err := convertPythonSeverity(parsedLine.Severity)
if err != nil {
return nil, fmt.Errorf("failed to parse severity: %s", err)
}
path, err := convertPythonPath(parsedLine.Path)
if err != nil {
return nil, fmt.Errorf("failed to parse path: %s", err)
}
diag := diag.Diagnostic{
Severity: severity,
Summary: parsedLine.Summary,
Detail: parsedLine.Detail,
Location: convertPythonLocation(parsedLine.Location),
Path: path,
}
diags = diags.Append(diag)
}
return diags, nil
}
func convertPythonPath(path string) (dyn.Path, error) {
if path == "" {
return nil, nil
}
return dyn.NewPathFromString(path)
}
func convertPythonSeverity(severity pythonSeverity) (diag.Severity, error) {
switch severity {
case pythonError:
return diag.Error, nil
case pythonWarning:
return diag.Warning, nil
default:
return 0, fmt.Errorf("unexpected value: %s", severity)
}
}
func convertPythonLocation(location pythonDiagnosticLocation) dyn.Location {
return dyn.Location{
File: location.File,
Line: location.Line,
Column: location.Column,
}
}

View File

@ -0,0 +1,107 @@
package python
import (
"bytes"
"testing"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
assert "github.com/databricks/cli/libs/dyn/dynassert"
)
func TestConvertPythonLocation(t *testing.T) {
location := convertPythonLocation(pythonDiagnosticLocation{
File: "src/examples/file.py",
Line: 1,
Column: 2,
})
assert.Equal(t, dyn.Location{
File: "src/examples/file.py",
Line: 1,
Column: 2,
}, location)
}
type parsePythonDiagnosticsTest struct {
name string
input string
expected diag.Diagnostics
}
func TestParsePythonDiagnostics(t *testing.T) {
testCases := []parsePythonDiagnosticsTest{
{
name: "short error with location",
input: `{"severity": "error", "summary": "error summary", "location": {"file": "src/examples/file.py", "line": 1, "column": 2}}`,
expected: diag.Diagnostics{
{
Severity: diag.Error,
Summary: "error summary",
Location: dyn.Location{
File: "src/examples/file.py",
Line: 1,
Column: 2,
},
},
},
},
{
name: "short error with path",
input: `{"severity": "error", "summary": "error summary", "path": "resources.jobs.job0.name"}`,
expected: diag.Diagnostics{
{
Severity: diag.Error,
Summary: "error summary",
Path: dyn.MustPathFromString("resources.jobs.job0.name"),
},
},
},
{
name: "empty file",
input: "",
expected: diag.Diagnostics{},
},
{
name: "newline file",
input: "\n",
expected: diag.Diagnostics{},
},
{
name: "warning with detail",
input: `{"severity": "warning", "summary": "warning summary", "detail": "warning detail"}`,
expected: diag.Diagnostics{
{
Severity: diag.Warning,
Summary: "warning summary",
Detail: "warning detail",
},
},
},
{
name: "multiple errors",
input: `{"severity": "error", "summary": "error summary (1)"}` + "\n" +
`{"severity": "error", "summary": "error summary (2)"}`,
expected: diag.Diagnostics{
{
Severity: diag.Error,
Summary: "error summary (1)",
},
{
Severity: diag.Error,
Summary: "error summary (2)",
},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
diags, err := parsePythonDiagnostics(bytes.NewReader([]byte(tc.input)))
assert.NoError(t, err)
assert.Equal(t, tc.expected, diags)
})
}
}

View File

@ -3,11 +3,14 @@ package python
import (
"context"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"github.com/databricks/databricks-sdk-go/logger"
"github.com/databricks/cli/bundle/env"
"github.com/databricks/cli/bundle"
@ -87,6 +90,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
return diag.Errorf("\"experimental.pydabs.enabled\" can only be used when \"experimental.pydabs.venv_path\" is set")
}
// mutateDiags is used because Mutate returns 'error' instead of 'diag.Diagnostics'
var mutateDiags diag.Diagnostics
var mutateDiagsHasError = errors.New("unexpected error")
err := b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) {
pythonPath := interpreterPath(experimental.PyDABs.VEnvPath)
@ -103,9 +110,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
}
rightRoot, err := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot)
if err != nil {
return dyn.InvalidValue, err
rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot)
mutateDiags = diags
if diags.HasError() {
return dyn.InvalidValue, mutateDiagsHasError
}
visitor, err := createOverrideVisitor(ctx, m.phase)
@ -116,7 +124,15 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
return merge.Override(leftRoot, rightRoot, visitor)
})
return diag.FromErr(err)
if err == mutateDiagsHasError {
if !mutateDiags.HasError() {
panic("mutateDiags has no error, but error is expected")
}
return mutateDiags
}
return mutateDiags.Extend(diag.FromErr(err))
}
func createCacheDir(ctx context.Context) (string, error) {
@ -138,9 +154,10 @@ func createCacheDir(ctx context.Context) (string, error) {
return os.MkdirTemp("", "-pydabs")
}
func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, error) {
func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) {
inputPath := filepath.Join(cacheDir, "input.json")
outputPath := filepath.Join(cacheDir, "output.json")
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
args := []string{
pythonPath,
@ -152,42 +169,77 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r
inputPath,
"--output",
outputPath,
"--diagnostics",
diagnosticsPath,
}
// we need to marshal dyn.Value instead of bundle.Config to JSON to support
// non-string fields assigned with bundle variables
rootConfigJson, err := json.Marshal(root.AsAny())
if err != nil {
return dyn.InvalidValue, fmt.Errorf("failed to marshal root config: %w", err)
}
err = os.WriteFile(inputPath, rootConfigJson, 0600)
if err != nil {
return dyn.InvalidValue, fmt.Errorf("failed to write input file: %w", err)
if err := writeInputFile(inputPath, root); err != nil {
return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err)
}
stderrWriter := newLogWriter(ctx, "stderr: ")
stdoutWriter := newLogWriter(ctx, "stdout: ")
_, err = process.Background(
_, processErr := process.Background(
ctx,
args,
process.WithDir(rootPath),
process.WithStderrWriter(stderrWriter),
process.WithStdoutWriter(stdoutWriter),
)
if err != nil {
return dyn.InvalidValue, fmt.Errorf("python mutator process failed: %w", err)
if processErr != nil {
logger.Debugf(ctx, "python mutator process failed: %s", processErr)
}
pythonDiagnostics, pythonDiagnosticsErr := loadDiagnosticsFile(diagnosticsPath)
if pythonDiagnosticsErr != nil {
logger.Debugf(ctx, "failed to load diagnostics: %s", pythonDiagnosticsErr)
}
// if diagnostics file exists, it gives the most descriptive errors
// if there is any error, we treat it as fatal error, and stop processing
if pythonDiagnostics.HasError() {
return dyn.InvalidValue, pythonDiagnostics
}
// process can fail without reporting errors in diagnostics file or creating it, for instance,
// venv doesn't have PyDABs library installed
if processErr != nil {
return dyn.InvalidValue, diag.Errorf("python mutator process failed: %sw, use --debug to enable logging", processErr)
}
// or we can fail to read diagnostics file, that should always be created
if pythonDiagnosticsErr != nil {
return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr)
}
output, err := loadOutputFile(rootPath, outputPath)
if err != nil {
return dyn.InvalidValue, diag.Errorf("failed to load Python mutator output: %s", err)
}
// we pass through pythonDiagnostic because it contains warnings
return output, pythonDiagnostics
}
func writeInputFile(inputPath string, input dyn.Value) error {
// we need to marshal dyn.Value instead of bundle.Config to JSON to support
// non-string fields assigned with bundle variables
rootConfigJson, err := json.Marshal(input.AsAny())
if err != nil {
return fmt.Errorf("failed to marshal input: %w", err)
}
return os.WriteFile(inputPath, rootConfigJson, 0600)
}
func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) {
outputFile, err := os.Open(outputPath)
if err != nil {
return dyn.InvalidValue, fmt.Errorf("failed to open Python mutator output: %w", err)
return dyn.InvalidValue, fmt.Errorf("failed to open output file: %w", err)
}
defer func() {
_ = outputFile.Close()
}()
defer outputFile.Close()
// we need absolute path because later parts of pipeline assume all paths are absolute
// and this file will be used as location to resolve relative paths.
@ -204,24 +256,43 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r
generated, err := yamlloader.LoadYAML(virtualPath, outputFile)
if err != nil {
return dyn.InvalidValue, fmt.Errorf("failed to parse Python mutator output: %w", err)
return dyn.InvalidValue, fmt.Errorf("failed to parse output file: %w", err)
}
normalized, diagnostic := convert.Normalize(config.Root{}, generated)
if diagnostic.Error() != nil {
return dyn.InvalidValue, fmt.Errorf("failed to normalize Python mutator output: %w", diagnostic.Error())
return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %w", diagnostic.Error())
}
// warnings shouldn't happen because output should be already normalized
// when it happens, it's a bug in the mutator, and should be treated as an error
for _, d := range diagnostic.Filter(diag.Warning) {
return dyn.InvalidValue, fmt.Errorf("failed to normalize Python mutator output: %s", d.Summary)
return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %s", d.Summary)
}
return normalized, nil
}
// loadDiagnosticsFile loads diagnostics from a file.
//
// It contains a list of warnings and errors that we should print to users.
//
// If the file doesn't exist, we return an error. We expect the file to always be
// created by the Python mutator, and it's absence means there are integration problems,
// and the diagnostics file was lost. If we treat non-existence as an empty diag.Diagnostics
// we risk loosing errors and warnings.
func loadDiagnosticsFile(path string) (diag.Diagnostics, error) {
file, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("failed to open diagnostics file: %w", err)
}
defer file.Close()
return parsePythonDiagnostics(file)
}
func createOverrideVisitor(ctx context.Context, phase phase) (merge.OverrideVisitor, error) {
switch phase {
case PythonMutatorPhaseLoad:

View File

@ -74,12 +74,14 @@ func TestPythonMutator_load(t *testing.T) {
},
}
}
}`)
}`,
`{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`,
)
mutator := PythonMutator(PythonMutatorPhaseLoad)
diag := bundle.Apply(ctx, b, mutator)
diags := bundle.Apply(ctx, b, mutator)
assert.NoError(t, diag.Error())
assert.NoError(t, diags.Error())
assert.ElementsMatch(t, []string{"job0", "job1"}, maps.Keys(b.Config.Resources.Jobs))
@ -90,6 +92,14 @@ func TestPythonMutator_load(t *testing.T) {
if job1, ok := b.Config.Resources.Jobs["job1"]; ok {
assert.Equal(t, "job_1", job1.Name)
}
assert.Equal(t, 1, len(diags))
assert.Equal(t, "job doesn't have any tasks", diags[0].Summary)
assert.Equal(t, dyn.Location{
File: "src/examples/file.py",
Line: 10,
Column: 5,
}, diags[0].Location)
}
func TestPythonMutator_load_disallowed(t *testing.T) {
@ -129,7 +139,7 @@ func TestPythonMutator_load_disallowed(t *testing.T) {
}
}
}
}`)
}`, "")
mutator := PythonMutator(PythonMutatorPhaseLoad)
diag := bundle.Apply(ctx, b, mutator)
@ -174,7 +184,7 @@ func TestPythonMutator_init(t *testing.T) {
}
}
}
}`)
}`, "")
mutator := PythonMutator(PythonMutatorPhaseInit)
diag := bundle.Apply(ctx, b, mutator)
@ -235,12 +245,12 @@ func TestPythonMutator_badOutput(t *testing.T) {
}
}
}
}`)
}`, "")
mutator := PythonMutator(PythonMutatorPhaseLoad)
diag := bundle.Apply(ctx, b, mutator)
assert.EqualError(t, diag.Error(), "failed to normalize Python mutator output: unknown field: unknown_property")
assert.EqualError(t, diag.Error(), "failed to load Python mutator output: failed to normalize output: unknown field: unknown_property")
}
func TestPythonMutator_disabled(t *testing.T) {
@ -409,6 +419,13 @@ func TestCreateOverrideVisitor(t *testing.T) {
}
}
func TestLoadDiagnosticsFile_nonExistent(t *testing.T) {
// this is an important behaviour, see loadDiagnosticsFile docstring
_, err := loadDiagnosticsFile("non_existent_file.json")
assert.Error(t, err)
}
func TestInterpreterPath(t *testing.T) {
if runtime.GOOS == "windows" {
assert.Equal(t, "venv\\Scripts\\python3.exe", interpreterPath("venv"))
@ -417,7 +434,7 @@ func TestInterpreterPath(t *testing.T) {
}
}
func withProcessStub(t *testing.T, args []string, stdout string) context.Context {
func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context {
ctx := context.Background()
ctx, stub := process.WithStub(ctx)
@ -429,17 +446,24 @@ func withProcessStub(t *testing.T, args []string, stdout string) context.Context
inputPath := filepath.Join(cacheDir, "input.json")
outputPath := filepath.Join(cacheDir, "output.json")
diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")
args = append(args, "--input", inputPath)
args = append(args, "--output", outputPath)
args = append(args, "--diagnostics", diagnosticsPath)
stub.WithCallback(func(actual *exec.Cmd) error {
_, err := os.Stat(inputPath)
assert.NoError(t, err)
if reflect.DeepEqual(actual.Args, args) {
err := os.WriteFile(outputPath, []byte(stdout), 0600)
return err
err := os.WriteFile(outputPath, []byte(output), 0600)
require.NoError(t, err)
err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600)
require.NoError(t, err)
return nil
} else {
return fmt.Errorf("unexpected command: %v", actual.Args)
}