PythonMutator: add diagnostics (#1531)

## Changes Allow PyDABs to report `dyn.Diagnostics` by writing to `diagnostics.json` supplied as an argument, similar to `input.json` and `output.json` Such errors are not yet properly printed in `databricks bundle validate`, which will be fixed in a follow-up PR. ## Tests Unit tests
2024-07-02 17:10:53 +02:00 · 2024-07-02 17:10:53 +02:00 · 5a0a6d7334
parent 3d2f7622bc
commit 5a0a6d7334
4 changed files with 334 additions and 35 deletions
--- a/bundle/config/mutator/python/python_diagnostics.go
+++ b/bundle/config/mutator/python/python_diagnostics.go
@ -0,0 +1,97 @@
+package python
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+
+	"github.com/databricks/cli/libs/diag"
+	"github.com/databricks/cli/libs/dyn"
+)
+
+type pythonDiagnostic struct {
+	Severity pythonSeverity           `json:"severity"`
+	Summary  string                   `json:"summary"`
+	Detail   string                   `json:"detail,omitempty"`
+	Location pythonDiagnosticLocation `json:"location,omitempty"`
+	Path     string                   `json:"path,omitempty"`
+}
+
+type pythonDiagnosticLocation struct {
+	File   string `json:"file"`
+	Line   int    `json:"line"`
+	Column int    `json:"column"`
+}
+
+type pythonSeverity = string
+
+const (
+	pythonError   pythonSeverity = "error"
+	pythonWarning pythonSeverity = "warning"
+)
+
+// parsePythonDiagnostics parses diagnostics from the Python mutator.
+//
+// diagnostics file is newline-separated JSON objects with pythonDiagnostic structure.
+func parsePythonDiagnostics(input io.Reader) (diag.Diagnostics, error) {
+	diags := diag.Diagnostics{}
+	decoder := json.NewDecoder(input)
+
+	for decoder.More() {
+		var parsedLine pythonDiagnostic
+
+		err := decoder.Decode(&parsedLine)
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse diags: %s", err)
+		}
+
+		severity, err := convertPythonSeverity(parsedLine.Severity)
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse severity: %s", err)
+		}
+
+		path, err := convertPythonPath(parsedLine.Path)
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse path: %s", err)
+		}
+
+		diag := diag.Diagnostic{
+			Severity: severity,
+			Summary:  parsedLine.Summary,
+			Detail:   parsedLine.Detail,
+			Location: convertPythonLocation(parsedLine.Location),
+			Path:     path,
+		}
+
+		diags = diags.Append(diag)
+	}
+
+	return diags, nil
+}
+
+func convertPythonPath(path string) (dyn.Path, error) {
+	if path == "" {
+		return nil, nil
+	}
+
+	return dyn.NewPathFromString(path)
+}
+
+func convertPythonSeverity(severity pythonSeverity) (diag.Severity, error) {
+	switch severity {
+	case pythonError:
+		return diag.Error, nil
+	case pythonWarning:
+		return diag.Warning, nil
+	default:
+		return 0, fmt.Errorf("unexpected value: %s", severity)
+	}
+}
+
+func convertPythonLocation(location pythonDiagnosticLocation) dyn.Location {
+	return dyn.Location{
+		File:   location.File,
+		Line:   location.Line,
+		Column: location.Column,
+	}
+}
--- a/bundle/config/mutator/python/python_diagnostics_test.go
+++ b/bundle/config/mutator/python/python_diagnostics_test.go
@ -0,0 +1,107 @@
+package python
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/databricks/cli/libs/diag"
+	"github.com/databricks/cli/libs/dyn"
+	assert "github.com/databricks/cli/libs/dyn/dynassert"
+)
+
+func TestConvertPythonLocation(t *testing.T) {
+	location := convertPythonLocation(pythonDiagnosticLocation{
+		File:   "src/examples/file.py",
+		Line:   1,
+		Column: 2,
+	})
+
+	assert.Equal(t, dyn.Location{
+		File:   "src/examples/file.py",
+		Line:   1,
+		Column: 2,
+	}, location)
+}
+
+type parsePythonDiagnosticsTest struct {
+	name     string
+	input    string
+	expected diag.Diagnostics
+}
+
+func TestParsePythonDiagnostics(t *testing.T) {
+
+	testCases := []parsePythonDiagnosticsTest{
+		{
+			name:  "short error with location",
+			input: `{"severity": "error", "summary": "error summary", "location": {"file": "src/examples/file.py", "line": 1, "column": 2}}`,
+			expected: diag.Diagnostics{
+				{
+					Severity: diag.Error,
+					Summary:  "error summary",
+					Location: dyn.Location{
+						File:   "src/examples/file.py",
+						Line:   1,
+						Column: 2,
+					},
+				},
+			},
+		},
+		{
+			name:  "short error with path",
+			input: `{"severity": "error", "summary": "error summary", "path": "resources.jobs.job0.name"}`,
+			expected: diag.Diagnostics{
+				{
+					Severity: diag.Error,
+					Summary:  "error summary",
+					Path:     dyn.MustPathFromString("resources.jobs.job0.name"),
+				},
+			},
+		},
+		{
+			name:     "empty file",
+			input:    "",
+			expected: diag.Diagnostics{},
+		},
+		{
+			name:     "newline file",
+			input:    "\n",
+			expected: diag.Diagnostics{},
+		},
+		{
+			name:  "warning with detail",
+			input: `{"severity": "warning", "summary": "warning summary", "detail": "warning detail"}`,
+			expected: diag.Diagnostics{
+				{
+					Severity: diag.Warning,
+					Summary:  "warning summary",
+					Detail:   "warning detail",
+				},
+			},
+		},
+		{
+			name: "multiple errors",
+			input: `{"severity": "error", "summary": "error summary (1)"}` + "\n" +
+				`{"severity": "error", "summary": "error summary (2)"}`,
+			expected: diag.Diagnostics{
+				{
+					Severity: diag.Error,
+					Summary:  "error summary (1)",
+				},
+				{
+					Severity: diag.Error,
+					Summary:  "error summary (2)",
+				},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			diags, err := parsePythonDiagnostics(bytes.NewReader([]byte(tc.input)))
+
+			assert.NoError(t, err)
+			assert.Equal(t, tc.expected, diags)
+		})
+	}
+}
--- a/bundle/config/mutator/python/python_mutator.go
+++ b/bundle/config/mutator/python/python_mutator.go
@ -3,11 +3,14 @@ package python
 import (
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"runtime"

+	"github.com/databricks/databricks-sdk-go/logger"
+
 	"github.com/databricks/cli/bundle/env"

 	"github.com/databricks/cli/bundle"
@ -87,6 +90,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
 		return diag.Errorf("\"experimental.pydabs.enabled\" can only be used when \"experimental.pydabs.venv_path\" is set")
 	}

+	// mutateDiags is used because Mutate returns 'error' instead of 'diag.Diagnostics'
+	var mutateDiags diag.Diagnostics
+	var mutateDiagsHasError = errors.New("unexpected error")
+
 	err := b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) {
 		pythonPath := interpreterPath(experimental.PyDABs.VEnvPath)

@ -103,9 +110,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
 			return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
 		}

-		rightRoot, err := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot)
-		if err != nil {
-			return dyn.InvalidValue, err
+		rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot)
+		mutateDiags = diags
+		if diags.HasError() {
+			return dyn.InvalidValue, mutateDiagsHasError
 		}

 		visitor, err := createOverrideVisitor(ctx, m.phase)
@ -116,7 +124,15 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
 		return merge.Override(leftRoot, rightRoot, visitor)
 	})

-	return diag.FromErr(err)
+	if err == mutateDiagsHasError {
+		if !mutateDiags.HasError() {
+			panic("mutateDiags has no error, but error is expected")
+		}
+
+		return mutateDiags
+	}
+
+	return mutateDiags.Extend(diag.FromErr(err))
 }

 func createCacheDir(ctx context.Context) (string, error) {
@ -138,9 +154,10 @@ func createCacheDir(ctx context.Context) (string, error) {
 	return os.MkdirTemp("", "-pydabs")
 }

-func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, error) {
+func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) {
 	inputPath := filepath.Join(cacheDir, "input.json")
 	outputPath := filepath.Join(cacheDir, "output.json")
+	diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")

 	args := []string{
 		pythonPath,
@ -152,42 +169,77 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r
 		inputPath,
 		"--output",
 		outputPath,
+		"--diagnostics",
+		diagnosticsPath,
 	}

-	// we need to marshal dyn.Value instead of bundle.Config to JSON to support
-	// non-string fields assigned with bundle variables
-	rootConfigJson, err := json.Marshal(root.AsAny())
-	if err != nil {
-		return dyn.InvalidValue, fmt.Errorf("failed to marshal root config: %w", err)
-	}
-
-	err = os.WriteFile(inputPath, rootConfigJson, 0600)
-	if err != nil {
-		return dyn.InvalidValue, fmt.Errorf("failed to write input file: %w", err)
+	if err := writeInputFile(inputPath, root); err != nil {
+		return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err)
 	}

 	stderrWriter := newLogWriter(ctx, "stderr: ")
 	stdoutWriter := newLogWriter(ctx, "stdout: ")

-	_, err = process.Background(
+	_, processErr := process.Background(
 		ctx,
 		args,
 		process.WithDir(rootPath),
 		process.WithStderrWriter(stderrWriter),
 		process.WithStdoutWriter(stdoutWriter),
 	)
-	if err != nil {
-		return dyn.InvalidValue, fmt.Errorf("python mutator process failed: %w", err)
+	if processErr != nil {
+		logger.Debugf(ctx, "python mutator process failed: %s", processErr)
 	}

+	pythonDiagnostics, pythonDiagnosticsErr := loadDiagnosticsFile(diagnosticsPath)
+	if pythonDiagnosticsErr != nil {
+		logger.Debugf(ctx, "failed to load diagnostics: %s", pythonDiagnosticsErr)
+	}
+
+	// if diagnostics file exists, it gives the most descriptive errors
+	// if there is any error, we treat it as fatal error, and stop processing
+	if pythonDiagnostics.HasError() {
+		return dyn.InvalidValue, pythonDiagnostics
+	}
+
+	// process can fail without reporting errors in diagnostics file or creating it, for instance,
+	// venv doesn't have PyDABs library installed
+	if processErr != nil {
+		return dyn.InvalidValue, diag.Errorf("python mutator process failed: %sw, use --debug to enable logging", processErr)
+	}
+
+	// or we can fail to read diagnostics file, that should always be created
+	if pythonDiagnosticsErr != nil {
+		return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr)
+	}
+
+	output, err := loadOutputFile(rootPath, outputPath)
+	if err != nil {
+		return dyn.InvalidValue, diag.Errorf("failed to load Python mutator output: %s", err)
+	}
+
+	// we pass through pythonDiagnostic because it contains warnings
+	return output, pythonDiagnostics
+}
+
+func writeInputFile(inputPath string, input dyn.Value) error {
+	// we need to marshal dyn.Value instead of bundle.Config to JSON to support
+	// non-string fields assigned with bundle variables
+	rootConfigJson, err := json.Marshal(input.AsAny())
+	if err != nil {
+		return fmt.Errorf("failed to marshal input: %w", err)
+	}
+
+	return os.WriteFile(inputPath, rootConfigJson, 0600)
+}
+
+func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) {
 	outputFile, err := os.Open(outputPath)
 	if err != nil {
-		return dyn.InvalidValue, fmt.Errorf("failed to open Python mutator output: %w", err)
+		return dyn.InvalidValue, fmt.Errorf("failed to open output file: %w", err)
 	}

-	defer func() {
-		_ = outputFile.Close()
-	}()
+	defer outputFile.Close()

 	// we need absolute path because later parts of pipeline assume all paths are absolute
 	// and this file will be used as location to resolve relative paths.
@ -204,24 +256,43 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r

 	generated, err := yamlloader.LoadYAML(virtualPath, outputFile)
 	if err != nil {
-		return dyn.InvalidValue, fmt.Errorf("failed to parse Python mutator output: %w", err)
+		return dyn.InvalidValue, fmt.Errorf("failed to parse output file: %w", err)
 	}

 	normalized, diagnostic := convert.Normalize(config.Root{}, generated)
 	if diagnostic.Error() != nil {
-		return dyn.InvalidValue, fmt.Errorf("failed to normalize Python mutator output: %w", diagnostic.Error())
+		return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %w", diagnostic.Error())
 	}

 	// warnings shouldn't happen because output should be already normalized
 	// when it happens, it's a bug in the mutator, and should be treated as an error

 	for _, d := range diagnostic.Filter(diag.Warning) {
-		return dyn.InvalidValue, fmt.Errorf("failed to normalize Python mutator output: %s", d.Summary)
+		return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %s", d.Summary)
 	}

 	return normalized, nil
 }

+// loadDiagnosticsFile loads diagnostics from a file.
+//
+// It contains a list of warnings and errors that we should print to users.
+//
+// If the file doesn't exist, we return an error. We expect the file to always be
+// created by the Python mutator, and it's absence means there are integration problems,
+// and the diagnostics file was lost. If we treat non-existence as an empty diag.Diagnostics
+// we risk loosing errors and warnings.
+func loadDiagnosticsFile(path string) (diag.Diagnostics, error) {
+	file, err := os.Open(path)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open diagnostics file: %w", err)
+	}
+
+	defer file.Close()
+
+	return parsePythonDiagnostics(file)
+}
+
 func createOverrideVisitor(ctx context.Context, phase phase) (merge.OverrideVisitor, error) {
 	switch phase {
 	case PythonMutatorPhaseLoad:
--- a/bundle/config/mutator/python/python_mutator_test.go
+++ b/bundle/config/mutator/python/python_mutator_test.go
@ -74,12 +74,14 @@ func TestPythonMutator_load(t *testing.T) {
 					},
 				}
 			}
-		}`)
+		}`,
+		`{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`,
+	)

 	mutator := PythonMutator(PythonMutatorPhaseLoad)
-	diag := bundle.Apply(ctx, b, mutator)
+	diags := bundle.Apply(ctx, b, mutator)

-	assert.NoError(t, diag.Error())
+	assert.NoError(t, diags.Error())

 	assert.ElementsMatch(t, []string{"job0", "job1"}, maps.Keys(b.Config.Resources.Jobs))

@ -90,6 +92,14 @@ func TestPythonMutator_load(t *testing.T) {
 	if job1, ok := b.Config.Resources.Jobs["job1"]; ok {
 		assert.Equal(t, "job_1", job1.Name)
 	}
+
+	assert.Equal(t, 1, len(diags))
+	assert.Equal(t, "job doesn't have any tasks", diags[0].Summary)
+	assert.Equal(t, dyn.Location{
+		File:   "src/examples/file.py",
+		Line:   10,
+		Column: 5,
+	}, diags[0].Location)
 }

 func TestPythonMutator_load_disallowed(t *testing.T) {
@ -129,7 +139,7 @@ func TestPythonMutator_load_disallowed(t *testing.T) {
 					}
 				}
 			}
-		}`)
+		}`, "")

 	mutator := PythonMutator(PythonMutatorPhaseLoad)
 	diag := bundle.Apply(ctx, b, mutator)
@ -174,7 +184,7 @@ func TestPythonMutator_init(t *testing.T) {
 					}
 				}
 			}
-		}`)
+		}`, "")

 	mutator := PythonMutator(PythonMutatorPhaseInit)
 	diag := bundle.Apply(ctx, b, mutator)
@ -235,12 +245,12 @@ func TestPythonMutator_badOutput(t *testing.T) {
 					}
 				}
 			}
-		}`)
+		}`, "")

 	mutator := PythonMutator(PythonMutatorPhaseLoad)
 	diag := bundle.Apply(ctx, b, mutator)

-	assert.EqualError(t, diag.Error(), "failed to normalize Python mutator output: unknown field: unknown_property")
+	assert.EqualError(t, diag.Error(), "failed to load Python mutator output: failed to normalize output: unknown field: unknown_property")
 }

 func TestPythonMutator_disabled(t *testing.T) {
@ -409,6 +419,13 @@ func TestCreateOverrideVisitor(t *testing.T) {
 	}
 }

+func TestLoadDiagnosticsFile_nonExistent(t *testing.T) {
+	// this is an important behaviour, see loadDiagnosticsFile docstring
+	_, err := loadDiagnosticsFile("non_existent_file.json")
+
+	assert.Error(t, err)
+}
+
 func TestInterpreterPath(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		assert.Equal(t, "venv\\Scripts\\python3.exe", interpreterPath("venv"))
@ -417,7 +434,7 @@ func TestInterpreterPath(t *testing.T) {
 	}
 }

-func withProcessStub(t *testing.T, args []string, stdout string) context.Context {
+func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context {
 	ctx := context.Background()
 	ctx, stub := process.WithStub(ctx)

@ -429,17 +446,24 @@ func withProcessStub(t *testing.T, args []string, stdout string) context.Context

 	inputPath := filepath.Join(cacheDir, "input.json")
 	outputPath := filepath.Join(cacheDir, "output.json")
+	diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json")

 	args = append(args, "--input", inputPath)
 	args = append(args, "--output", outputPath)
+	args = append(args, "--diagnostics", diagnosticsPath)

 	stub.WithCallback(func(actual *exec.Cmd) error {
 		_, err := os.Stat(inputPath)
 		assert.NoError(t, err)

 		if reflect.DeepEqual(actual.Args, args) {
-			err := os.WriteFile(outputPath, []byte(stdout), 0600)
-			return err
+			err := os.WriteFile(outputPath, []byte(output), 0600)
+			require.NoError(t, err)
+
+			err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600)
+			require.NoError(t, err)
+
+			return nil
 		} else {
 			return fmt.Errorf("unexpected command: %v", actual.Args)
 		}