From 5a0a6d73345539d2719e580a4634dd8ed9326079 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Tue, 2 Jul 2024 17:10:53 +0200 Subject: [PATCH] PythonMutator: add diagnostics (#1531) ## Changes Allow PyDABs to report `dyn.Diagnostics` by writing to `diagnostics.json` supplied as an argument, similar to `input.json` and `output.json` Such errors are not yet properly printed in `databricks bundle validate`, which will be fixed in a follow-up PR. ## Tests Unit tests --- .../mutator/python/python_diagnostics.go | 97 ++++++++++++++ .../mutator/python/python_diagnostics_test.go | 107 ++++++++++++++++ .../config/mutator/python/python_mutator.go | 121 ++++++++++++++---- .../mutator/python/python_mutator_test.go | 44 +++++-- 4 files changed, 334 insertions(+), 35 deletions(-) create mode 100644 bundle/config/mutator/python/python_diagnostics.go create mode 100644 bundle/config/mutator/python/python_diagnostics_test.go diff --git a/bundle/config/mutator/python/python_diagnostics.go b/bundle/config/mutator/python/python_diagnostics.go new file mode 100644 index 00000000..b8efc9ef --- /dev/null +++ b/bundle/config/mutator/python/python_diagnostics.go @@ -0,0 +1,97 @@ +package python + +import ( + "encoding/json" + "fmt" + "io" + + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type pythonDiagnostic struct { + Severity pythonSeverity `json:"severity"` + Summary string `json:"summary"` + Detail string `json:"detail,omitempty"` + Location pythonDiagnosticLocation `json:"location,omitempty"` + Path string `json:"path,omitempty"` +} + +type pythonDiagnosticLocation struct { + File string `json:"file"` + Line int `json:"line"` + Column int `json:"column"` +} + +type pythonSeverity = string + +const ( + pythonError pythonSeverity = "error" + pythonWarning pythonSeverity = "warning" +) + +// parsePythonDiagnostics parses diagnostics from the Python mutator. +// +// diagnostics file is newline-separated JSON objects with pythonDiagnostic structure. +func parsePythonDiagnostics(input io.Reader) (diag.Diagnostics, error) { + diags := diag.Diagnostics{} + decoder := json.NewDecoder(input) + + for decoder.More() { + var parsedLine pythonDiagnostic + + err := decoder.Decode(&parsedLine) + if err != nil { + return nil, fmt.Errorf("failed to parse diags: %s", err) + } + + severity, err := convertPythonSeverity(parsedLine.Severity) + if err != nil { + return nil, fmt.Errorf("failed to parse severity: %s", err) + } + + path, err := convertPythonPath(parsedLine.Path) + if err != nil { + return nil, fmt.Errorf("failed to parse path: %s", err) + } + + diag := diag.Diagnostic{ + Severity: severity, + Summary: parsedLine.Summary, + Detail: parsedLine.Detail, + Location: convertPythonLocation(parsedLine.Location), + Path: path, + } + + diags = diags.Append(diag) + } + + return diags, nil +} + +func convertPythonPath(path string) (dyn.Path, error) { + if path == "" { + return nil, nil + } + + return dyn.NewPathFromString(path) +} + +func convertPythonSeverity(severity pythonSeverity) (diag.Severity, error) { + switch severity { + case pythonError: + return diag.Error, nil + case pythonWarning: + return diag.Warning, nil + default: + return 0, fmt.Errorf("unexpected value: %s", severity) + } +} + +func convertPythonLocation(location pythonDiagnosticLocation) dyn.Location { + return dyn.Location{ + File: location.File, + Line: location.Line, + Column: location.Column, + } +} diff --git a/bundle/config/mutator/python/python_diagnostics_test.go b/bundle/config/mutator/python/python_diagnostics_test.go new file mode 100644 index 00000000..7b66e253 --- /dev/null +++ b/bundle/config/mutator/python/python_diagnostics_test.go @@ -0,0 +1,107 @@ +package python + +import ( + "bytes" + "testing" + + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestConvertPythonLocation(t *testing.T) { + location := convertPythonLocation(pythonDiagnosticLocation{ + File: "src/examples/file.py", + Line: 1, + Column: 2, + }) + + assert.Equal(t, dyn.Location{ + File: "src/examples/file.py", + Line: 1, + Column: 2, + }, location) +} + +type parsePythonDiagnosticsTest struct { + name string + input string + expected diag.Diagnostics +} + +func TestParsePythonDiagnostics(t *testing.T) { + + testCases := []parsePythonDiagnosticsTest{ + { + name: "short error with location", + input: `{"severity": "error", "summary": "error summary", "location": {"file": "src/examples/file.py", "line": 1, "column": 2}}`, + expected: diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "error summary", + Location: dyn.Location{ + File: "src/examples/file.py", + Line: 1, + Column: 2, + }, + }, + }, + }, + { + name: "short error with path", + input: `{"severity": "error", "summary": "error summary", "path": "resources.jobs.job0.name"}`, + expected: diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "error summary", + Path: dyn.MustPathFromString("resources.jobs.job0.name"), + }, + }, + }, + { + name: "empty file", + input: "", + expected: diag.Diagnostics{}, + }, + { + name: "newline file", + input: "\n", + expected: diag.Diagnostics{}, + }, + { + name: "warning with detail", + input: `{"severity": "warning", "summary": "warning summary", "detail": "warning detail"}`, + expected: diag.Diagnostics{ + { + Severity: diag.Warning, + Summary: "warning summary", + Detail: "warning detail", + }, + }, + }, + { + name: "multiple errors", + input: `{"severity": "error", "summary": "error summary (1)"}` + "\n" + + `{"severity": "error", "summary": "error summary (2)"}`, + expected: diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "error summary (1)", + }, + { + Severity: diag.Error, + Summary: "error summary (2)", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + diags, err := parsePythonDiagnostics(bytes.NewReader([]byte(tc.input))) + + assert.NoError(t, err) + assert.Equal(t, tc.expected, diags) + }) + } +} diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index 73ddf952..bef69d9c 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -3,11 +3,14 @@ package python import ( "context" "encoding/json" + "errors" "fmt" "os" "path/filepath" "runtime" + "github.com/databricks/databricks-sdk-go/logger" + "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle" @@ -87,6 +90,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return diag.Errorf("\"experimental.pydabs.enabled\" can only be used when \"experimental.pydabs.venv_path\" is set") } + // mutateDiags is used because Mutate returns 'error' instead of 'diag.Diagnostics' + var mutateDiags diag.Diagnostics + var mutateDiagsHasError = errors.New("unexpected error") + err := b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) { pythonPath := interpreterPath(experimental.PyDABs.VEnvPath) @@ -103,9 +110,10 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err) } - rightRoot, err := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot) - if err != nil { - return dyn.InvalidValue, err + rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot) + mutateDiags = diags + if diags.HasError() { + return dyn.InvalidValue, mutateDiagsHasError } visitor, err := createOverrideVisitor(ctx, m.phase) @@ -116,7 +124,15 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return merge.Override(leftRoot, rightRoot, visitor) }) - return diag.FromErr(err) + if err == mutateDiagsHasError { + if !mutateDiags.HasError() { + panic("mutateDiags has no error, but error is expected") + } + + return mutateDiags + } + + return mutateDiags.Extend(diag.FromErr(err)) } func createCacheDir(ctx context.Context) (string, error) { @@ -138,9 +154,10 @@ func createCacheDir(ctx context.Context) (string, error) { return os.MkdirTemp("", "-pydabs") } -func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, error) { +func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) { inputPath := filepath.Join(cacheDir, "input.json") outputPath := filepath.Join(cacheDir, "output.json") + diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") args := []string{ pythonPath, @@ -152,42 +169,77 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r inputPath, "--output", outputPath, + "--diagnostics", + diagnosticsPath, } - // we need to marshal dyn.Value instead of bundle.Config to JSON to support - // non-string fields assigned with bundle variables - rootConfigJson, err := json.Marshal(root.AsAny()) - if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to marshal root config: %w", err) - } - - err = os.WriteFile(inputPath, rootConfigJson, 0600) - if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to write input file: %w", err) + if err := writeInputFile(inputPath, root); err != nil { + return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) } stderrWriter := newLogWriter(ctx, "stderr: ") stdoutWriter := newLogWriter(ctx, "stdout: ") - _, err = process.Background( + _, processErr := process.Background( ctx, args, process.WithDir(rootPath), process.WithStderrWriter(stderrWriter), process.WithStdoutWriter(stdoutWriter), ) - if err != nil { - return dyn.InvalidValue, fmt.Errorf("python mutator process failed: %w", err) + if processErr != nil { + logger.Debugf(ctx, "python mutator process failed: %s", processErr) } + pythonDiagnostics, pythonDiagnosticsErr := loadDiagnosticsFile(diagnosticsPath) + if pythonDiagnosticsErr != nil { + logger.Debugf(ctx, "failed to load diagnostics: %s", pythonDiagnosticsErr) + } + + // if diagnostics file exists, it gives the most descriptive errors + // if there is any error, we treat it as fatal error, and stop processing + if pythonDiagnostics.HasError() { + return dyn.InvalidValue, pythonDiagnostics + } + + // process can fail without reporting errors in diagnostics file or creating it, for instance, + // venv doesn't have PyDABs library installed + if processErr != nil { + return dyn.InvalidValue, diag.Errorf("python mutator process failed: %sw, use --debug to enable logging", processErr) + } + + // or we can fail to read diagnostics file, that should always be created + if pythonDiagnosticsErr != nil { + return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) + } + + output, err := loadOutputFile(rootPath, outputPath) + if err != nil { + return dyn.InvalidValue, diag.Errorf("failed to load Python mutator output: %s", err) + } + + // we pass through pythonDiagnostic because it contains warnings + return output, pythonDiagnostics +} + +func writeInputFile(inputPath string, input dyn.Value) error { + // we need to marshal dyn.Value instead of bundle.Config to JSON to support + // non-string fields assigned with bundle variables + rootConfigJson, err := json.Marshal(input.AsAny()) + if err != nil { + return fmt.Errorf("failed to marshal input: %w", err) + } + + return os.WriteFile(inputPath, rootConfigJson, 0600) +} + +func loadOutputFile(rootPath string, outputPath string) (dyn.Value, error) { outputFile, err := os.Open(outputPath) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to open Python mutator output: %w", err) + return dyn.InvalidValue, fmt.Errorf("failed to open output file: %w", err) } - defer func() { - _ = outputFile.Close() - }() + defer outputFile.Close() // we need absolute path because later parts of pipeline assume all paths are absolute // and this file will be used as location to resolve relative paths. @@ -204,24 +256,43 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r generated, err := yamlloader.LoadYAML(virtualPath, outputFile) if err != nil { - return dyn.InvalidValue, fmt.Errorf("failed to parse Python mutator output: %w", err) + return dyn.InvalidValue, fmt.Errorf("failed to parse output file: %w", err) } normalized, diagnostic := convert.Normalize(config.Root{}, generated) if diagnostic.Error() != nil { - return dyn.InvalidValue, fmt.Errorf("failed to normalize Python mutator output: %w", diagnostic.Error()) + return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %w", diagnostic.Error()) } // warnings shouldn't happen because output should be already normalized // when it happens, it's a bug in the mutator, and should be treated as an error for _, d := range diagnostic.Filter(diag.Warning) { - return dyn.InvalidValue, fmt.Errorf("failed to normalize Python mutator output: %s", d.Summary) + return dyn.InvalidValue, fmt.Errorf("failed to normalize output: %s", d.Summary) } return normalized, nil } +// loadDiagnosticsFile loads diagnostics from a file. +// +// It contains a list of warnings and errors that we should print to users. +// +// If the file doesn't exist, we return an error. We expect the file to always be +// created by the Python mutator, and it's absence means there are integration problems, +// and the diagnostics file was lost. If we treat non-existence as an empty diag.Diagnostics +// we risk loosing errors and warnings. +func loadDiagnosticsFile(path string) (diag.Diagnostics, error) { + file, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open diagnostics file: %w", err) + } + + defer file.Close() + + return parsePythonDiagnostics(file) +} + func createOverrideVisitor(ctx context.Context, phase phase) (merge.OverrideVisitor, error) { switch phase { case PythonMutatorPhaseLoad: diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index e2c20386..24e5ad60 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -74,12 +74,14 @@ func TestPythonMutator_load(t *testing.T) { }, } } - }`) + }`, + `{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`, + ) mutator := PythonMutator(PythonMutatorPhaseLoad) - diag := bundle.Apply(ctx, b, mutator) + diags := bundle.Apply(ctx, b, mutator) - assert.NoError(t, diag.Error()) + assert.NoError(t, diags.Error()) assert.ElementsMatch(t, []string{"job0", "job1"}, maps.Keys(b.Config.Resources.Jobs)) @@ -90,6 +92,14 @@ func TestPythonMutator_load(t *testing.T) { if job1, ok := b.Config.Resources.Jobs["job1"]; ok { assert.Equal(t, "job_1", job1.Name) } + + assert.Equal(t, 1, len(diags)) + assert.Equal(t, "job doesn't have any tasks", diags[0].Summary) + assert.Equal(t, dyn.Location{ + File: "src/examples/file.py", + Line: 10, + Column: 5, + }, diags[0].Location) } func TestPythonMutator_load_disallowed(t *testing.T) { @@ -129,7 +139,7 @@ func TestPythonMutator_load_disallowed(t *testing.T) { } } } - }`) + }`, "") mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) @@ -174,7 +184,7 @@ func TestPythonMutator_init(t *testing.T) { } } } - }`) + }`, "") mutator := PythonMutator(PythonMutatorPhaseInit) diag := bundle.Apply(ctx, b, mutator) @@ -235,12 +245,12 @@ func TestPythonMutator_badOutput(t *testing.T) { } } } - }`) + }`, "") mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) - assert.EqualError(t, diag.Error(), "failed to normalize Python mutator output: unknown field: unknown_property") + assert.EqualError(t, diag.Error(), "failed to load Python mutator output: failed to normalize output: unknown field: unknown_property") } func TestPythonMutator_disabled(t *testing.T) { @@ -409,6 +419,13 @@ func TestCreateOverrideVisitor(t *testing.T) { } } +func TestLoadDiagnosticsFile_nonExistent(t *testing.T) { + // this is an important behaviour, see loadDiagnosticsFile docstring + _, err := loadDiagnosticsFile("non_existent_file.json") + + assert.Error(t, err) +} + func TestInterpreterPath(t *testing.T) { if runtime.GOOS == "windows" { assert.Equal(t, "venv\\Scripts\\python3.exe", interpreterPath("venv")) @@ -417,7 +434,7 @@ func TestInterpreterPath(t *testing.T) { } } -func withProcessStub(t *testing.T, args []string, stdout string) context.Context { +func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) @@ -429,17 +446,24 @@ func withProcessStub(t *testing.T, args []string, stdout string) context.Context inputPath := filepath.Join(cacheDir, "input.json") outputPath := filepath.Join(cacheDir, "output.json") + diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") args = append(args, "--input", inputPath) args = append(args, "--output", outputPath) + args = append(args, "--diagnostics", diagnosticsPath) stub.WithCallback(func(actual *exec.Cmd) error { _, err := os.Stat(inputPath) assert.NoError(t, err) if reflect.DeepEqual(actual.Args, args) { - err := os.WriteFile(outputPath, []byte(stdout), 0600) - return err + err := os.WriteFile(outputPath, []byte(output), 0600) + require.NoError(t, err) + + err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600) + require.NoError(t, err) + + return nil } else { return fmt.Errorf("unexpected command: %v", actual.Args) }