2024-06-20 08:43:08 +00:00
|
|
|
package python
|
|
|
|
|
|
|
|
import (
|
2024-09-02 09:49:30 +00:00
|
|
|
"bytes"
|
2024-06-20 08:43:08 +00:00
|
|
|
"context"
|
|
|
|
"encoding/json"
|
2024-07-02 15:10:53 +00:00
|
|
|
"errors"
|
2024-06-20 08:43:08 +00:00
|
|
|
"fmt"
|
2024-09-02 09:49:30 +00:00
|
|
|
"io"
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
"io/fs"
|
2024-06-20 08:43:08 +00:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2025-01-08 09:29:45 +00:00
|
|
|
"reflect"
|
2024-09-02 09:49:30 +00:00
|
|
|
"strings"
|
2024-06-20 08:43:08 +00:00
|
|
|
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
"github.com/databricks/cli/bundle/config/mutator/paths"
|
|
|
|
|
2024-07-02 15:10:53 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/logger"
|
2024-09-02 09:49:30 +00:00
|
|
|
"github.com/fatih/color"
|
|
|
|
|
|
|
|
"github.com/databricks/cli/libs/python"
|
2024-07-02 15:10:53 +00:00
|
|
|
|
2024-06-24 07:47:41 +00:00
|
|
|
"github.com/databricks/cli/bundle/env"
|
|
|
|
|
2024-06-20 08:43:08 +00:00
|
|
|
"github.com/databricks/cli/bundle"
|
|
|
|
"github.com/databricks/cli/bundle/config"
|
|
|
|
"github.com/databricks/cli/libs/diag"
|
|
|
|
"github.com/databricks/cli/libs/dyn"
|
|
|
|
"github.com/databricks/cli/libs/dyn/convert"
|
|
|
|
"github.com/databricks/cli/libs/dyn/merge"
|
|
|
|
"github.com/databricks/cli/libs/dyn/yamlloader"
|
|
|
|
"github.com/databricks/cli/libs/log"
|
|
|
|
"github.com/databricks/cli/libs/process"
|
|
|
|
)
|
|
|
|
|
|
|
|
type phase string
|
|
|
|
|
|
|
|
const (
|
2024-06-24 07:47:41 +00:00
|
|
|
// PythonMutatorPhaseLoad is the phase in which bundle configuration is loaded.
|
2024-06-20 08:43:08 +00:00
|
|
|
//
|
|
|
|
// At this stage, PyDABs adds statically defined resources to the bundle configuration.
|
|
|
|
// Which resources are added should be deterministic and not depend on the bundle configuration.
|
|
|
|
//
|
|
|
|
// We also open for possibility of appending other sections of bundle configuration,
|
|
|
|
// for example, adding new variables. However, this is not supported yet, and CLI rejects
|
|
|
|
// such changes.
|
2025-01-08 09:29:45 +00:00
|
|
|
//
|
|
|
|
// Deprecated, left for backward-compatibility with PyDABs.
|
2024-06-24 07:47:41 +00:00
|
|
|
PythonMutatorPhaseLoad phase = "load"
|
2024-06-20 08:43:08 +00:00
|
|
|
|
2024-06-24 07:47:41 +00:00
|
|
|
// PythonMutatorPhaseInit is the phase after bundle configuration was loaded, and
|
2024-06-20 08:43:08 +00:00
|
|
|
// the list of statically declared resources is known.
|
|
|
|
//
|
|
|
|
// At this stage, PyDABs adds resources defined using generators, or mutates existing resources,
|
|
|
|
// including the ones defined using YAML.
|
|
|
|
//
|
|
|
|
// During this process, within generator and mutators, PyDABs can access:
|
|
|
|
// - selected deployment target
|
|
|
|
// - bundle variables values
|
|
|
|
// - variables provided through CLI arguments or environment variables
|
|
|
|
//
|
|
|
|
// The following is not available:
|
|
|
|
// - variables referencing other variables are in unresolved format
|
|
|
|
//
|
|
|
|
// PyDABs can output YAML containing references to variables, and CLI should resolve them.
|
|
|
|
//
|
|
|
|
// Existing resources can't be removed, and CLI rejects such changes.
|
2025-01-08 09:29:45 +00:00
|
|
|
//
|
|
|
|
// Deprecated, left for backward-compatibility with PyDABs.
|
2024-06-24 07:47:41 +00:00
|
|
|
PythonMutatorPhaseInit phase = "init"
|
2025-01-08 09:29:45 +00:00
|
|
|
|
|
|
|
// PythonMutatorPhaseLoadResources is the phase in which YAML configuration was loaded.
|
|
|
|
//
|
|
|
|
// At this stage, we execute Python code to load resources defined in Python.
|
|
|
|
//
|
|
|
|
// During this process, Python code can access:
|
|
|
|
// - selected deployment target
|
|
|
|
// - bundle variable values
|
|
|
|
// - variables provided through CLI argument or environment variables
|
|
|
|
//
|
|
|
|
// The following is not available:
|
|
|
|
// - variables referencing other variables are in unresolved format
|
|
|
|
//
|
|
|
|
// Python code can output YAML referencing variables, and CLI should resolve them.
|
|
|
|
//
|
|
|
|
// Existing resources can't be removed or modified, and CLI rejects such changes.
|
|
|
|
// While it's called 'load_resources', this phase is executed in 'init' phase of mutator pipeline.
|
|
|
|
PythonMutatorPhaseLoadResources phase = "load_resources"
|
|
|
|
|
|
|
|
// PythonMutatorPhaseApplyMutators is the phase in which resources defined in YAML or Python
|
|
|
|
// are already loaded.
|
|
|
|
//
|
|
|
|
// At this stage, we execute Python code to mutate resources defined in YAML or Python.
|
|
|
|
//
|
|
|
|
// During this process, Python code can access:
|
|
|
|
// - selected deployment target
|
|
|
|
// - bundle variable values
|
|
|
|
// - variables provided through CLI argument or environment variables
|
|
|
|
//
|
|
|
|
// The following is not available:
|
|
|
|
// - variables referencing other variables are in unresolved format
|
|
|
|
//
|
|
|
|
// Python code can output YAML referencing variables, and CLI should resolve them.
|
|
|
|
//
|
|
|
|
// Resources can't be added or removed, and CLI rejects such changes. Python code is
|
|
|
|
// allowed to modify existing resources, but not other parts of bundle configuration.
|
|
|
|
PythonMutatorPhaseApplyMutators phase = "apply_mutators"
|
2024-06-20 08:43:08 +00:00
|
|
|
)
|
|
|
|
|
2024-06-24 07:47:41 +00:00
|
|
|
type pythonMutator struct {
|
2024-06-20 08:43:08 +00:00
|
|
|
phase phase
|
|
|
|
}
|
|
|
|
|
2024-06-24 07:47:41 +00:00
|
|
|
func PythonMutator(phase phase) bundle.Mutator {
|
|
|
|
return &pythonMutator{
|
2024-06-20 08:43:08 +00:00
|
|
|
phase: phase,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-24 07:47:41 +00:00
|
|
|
func (m *pythonMutator) Name() string {
|
|
|
|
return fmt.Sprintf("PythonMutator(%s)", m.phase)
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
// opts is a common structure for deprecated PyDABs and upcoming Python
|
|
|
|
// configuration sections
|
|
|
|
type opts struct {
|
|
|
|
enabled bool
|
|
|
|
|
|
|
|
venvPath string
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
|
|
|
|
loadLocations bool
|
|
|
|
}
|
|
|
|
|
|
|
|
type runPythonMutatorOpts struct {
|
|
|
|
cacheDir string
|
|
|
|
bundleRootPath string
|
|
|
|
pythonPath string
|
|
|
|
loadLocations bool
|
2025-01-08 09:29:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// getOpts adapts deprecated PyDABs and upcoming Python configuration
|
|
|
|
// into a common structure.
|
|
|
|
func getOpts(b *bundle.Bundle, phase phase) (opts, error) {
|
|
|
|
experimental := b.Config.Experimental
|
|
|
|
if experimental == nil {
|
|
|
|
return opts{}, nil
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
// using reflect.DeepEquals in case we add more fields
|
|
|
|
pydabsEnabled := !reflect.DeepEqual(experimental.PyDABs, config.PyDABs{})
|
|
|
|
pythonEnabled := !reflect.DeepEqual(experimental.Python, config.Python{})
|
|
|
|
|
|
|
|
if pydabsEnabled && pythonEnabled {
|
|
|
|
return opts{}, errors.New("both experimental/pydabs and experimental/python are enabled, only one can be enabled")
|
|
|
|
} else if pydabsEnabled {
|
|
|
|
if !experimental.PyDABs.Enabled {
|
|
|
|
return opts{}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// don't execute for phases for 'python' section
|
|
|
|
if phase == PythonMutatorPhaseInit || phase == PythonMutatorPhaseLoad {
|
|
|
|
return opts{
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
enabled: true,
|
|
|
|
venvPath: experimental.PyDABs.VEnvPath,
|
|
|
|
loadLocations: false, // not supported in PyDABs
|
2025-01-08 09:29:45 +00:00
|
|
|
}, nil
|
|
|
|
} else {
|
|
|
|
return opts{}, nil
|
|
|
|
}
|
|
|
|
} else if pythonEnabled {
|
|
|
|
// don't execute for phases for 'pydabs' section
|
|
|
|
if phase == PythonMutatorPhaseLoadResources || phase == PythonMutatorPhaseApplyMutators {
|
|
|
|
return opts{
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
enabled: true,
|
|
|
|
venvPath: experimental.Python.VEnvPath,
|
|
|
|
loadLocations: true,
|
2025-01-08 09:29:45 +00:00
|
|
|
}, nil
|
|
|
|
} else {
|
|
|
|
return opts{}, nil
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return opts{}, nil
|
|
|
|
}
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
2024-06-24 07:47:41 +00:00
|
|
|
func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
|
2025-01-08 09:29:45 +00:00
|
|
|
opts, err := getOpts(b, m.phase)
|
|
|
|
if err != nil {
|
|
|
|
return diag.Errorf("failed to apply python mutator: %s", err)
|
|
|
|
}
|
2024-06-20 08:43:08 +00:00
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
if !opts.enabled {
|
2024-06-20 08:43:08 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-07-02 15:10:53 +00:00
|
|
|
// mutateDiags is used because Mutate returns 'error' instead of 'diag.Diagnostics'
|
|
|
|
var mutateDiags diag.Diagnostics
|
|
|
|
mutateDiagsHasError := errors.New("unexpected error")
|
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
err = b.Config.Mutate(func(leftRoot dyn.Value) (dyn.Value, error) {
|
|
|
|
pythonPath, err := detectExecutable(ctx, opts.venvPath)
|
2024-08-20 13:26:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return dyn.InvalidValue, fmt.Errorf("failed to get Python interpreter path: %w", err)
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
2024-06-24 07:47:41 +00:00
|
|
|
cacheDir, err := createCacheDir(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
|
|
|
|
}
|
|
|
|
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{
|
|
|
|
cacheDir: cacheDir,
|
|
|
|
bundleRootPath: b.BundleRootPath,
|
|
|
|
pythonPath: pythonPath,
|
|
|
|
loadLocations: opts.loadLocations,
|
|
|
|
})
|
2024-07-02 15:10:53 +00:00
|
|
|
mutateDiags = diags
|
|
|
|
if diags.HasError() {
|
|
|
|
return dyn.InvalidValue, mutateDiagsHasError
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
visitor, err := createOverrideVisitor(ctx, m.phase)
|
|
|
|
if err != nil {
|
|
|
|
return dyn.InvalidValue, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return merge.Override(leftRoot, rightRoot, visitor)
|
|
|
|
})
|
|
|
|
|
2024-07-02 15:10:53 +00:00
|
|
|
if err == mutateDiagsHasError {
|
|
|
|
if !mutateDiags.HasError() {
|
|
|
|
panic("mutateDiags has no error, but error is expected")
|
|
|
|
}
|
|
|
|
|
|
|
|
return mutateDiags
|
|
|
|
}
|
|
|
|
|
|
|
|
return mutateDiags.Extend(diag.FromErr(err))
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
2024-06-24 07:47:41 +00:00
|
|
|
func createCacheDir(ctx context.Context) (string, error) {
|
|
|
|
// b.CacheDir doesn't work because target isn't yet selected
|
|
|
|
|
|
|
|
// support the same env variable as in b.CacheDir
|
|
|
|
if tempDir, exists := env.TempDir(ctx); exists {
|
|
|
|
// use 'default' as target name
|
2025-01-08 09:29:45 +00:00
|
|
|
cacheDir := filepath.Join(tempDir, "default", "python")
|
2024-06-24 07:47:41 +00:00
|
|
|
|
|
|
|
err := os.MkdirAll(cacheDir, 0o700)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
return cacheDir, nil
|
|
|
|
}
|
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
return os.MkdirTemp("", "-python")
|
2024-06-24 07:47:41 +00:00
|
|
|
}
|
|
|
|
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) {
|
|
|
|
inputPath := filepath.Join(opts.cacheDir, "input.json")
|
|
|
|
outputPath := filepath.Join(opts.cacheDir, "output.json")
|
|
|
|
diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json")
|
|
|
|
locationsPath := filepath.Join(opts.cacheDir, "locations.json")
|
2024-06-24 07:47:41 +00:00
|
|
|
|
2024-06-20 08:43:08 +00:00
|
|
|
args := []string{
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
opts.pythonPath,
|
2024-06-20 08:43:08 +00:00
|
|
|
"-m",
|
|
|
|
"databricks.bundles.build",
|
|
|
|
"--phase",
|
|
|
|
string(m.phase),
|
2024-06-24 07:47:41 +00:00
|
|
|
"--input",
|
|
|
|
inputPath,
|
|
|
|
"--output",
|
|
|
|
outputPath,
|
2024-07-02 15:10:53 +00:00
|
|
|
"--diagnostics",
|
|
|
|
diagnosticsPath,
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
if opts.loadLocations {
|
|
|
|
args = append(args, "--locations", locationsPath)
|
|
|
|
}
|
|
|
|
|
2024-07-02 15:10:53 +00:00
|
|
|
if err := writeInputFile(inputPath, root); err != nil {
|
|
|
|
return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err)
|
2024-06-24 07:47:41 +00:00
|
|
|
}
|
|
|
|
|
2024-09-02 09:49:30 +00:00
|
|
|
stderrBuf := bytes.Buffer{}
|
|
|
|
stderrWriter := io.MultiWriter(
|
|
|
|
newLogWriter(ctx, "stderr: "),
|
|
|
|
&stderrBuf,
|
|
|
|
)
|
2024-06-24 07:47:41 +00:00
|
|
|
stdoutWriter := newLogWriter(ctx, "stdout: ")
|
2024-06-20 08:43:08 +00:00
|
|
|
|
2024-07-02 15:10:53 +00:00
|
|
|
_, processErr := process.Background(
|
2024-06-20 08:43:08 +00:00
|
|
|
ctx,
|
|
|
|
args,
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
process.WithDir(opts.bundleRootPath),
|
2024-06-24 07:47:41 +00:00
|
|
|
process.WithStderrWriter(stderrWriter),
|
|
|
|
process.WithStdoutWriter(stdoutWriter),
|
2024-06-20 08:43:08 +00:00
|
|
|
)
|
2024-07-02 15:10:53 +00:00
|
|
|
if processErr != nil {
|
|
|
|
logger.Debugf(ctx, "python mutator process failed: %s", processErr)
|
|
|
|
}
|
|
|
|
|
|
|
|
pythonDiagnostics, pythonDiagnosticsErr := loadDiagnosticsFile(diagnosticsPath)
|
|
|
|
if pythonDiagnosticsErr != nil {
|
|
|
|
logger.Debugf(ctx, "failed to load diagnostics: %s", pythonDiagnosticsErr)
|
|
|
|
}
|
|
|
|
|
|
|
|
// if diagnostics file exists, it gives the most descriptive errors
|
|
|
|
// if there is any error, we treat it as fatal error, and stop processing
|
|
|
|
if pythonDiagnostics.HasError() {
|
|
|
|
return dyn.InvalidValue, pythonDiagnostics
|
|
|
|
}
|
|
|
|
|
|
|
|
// process can fail without reporting errors in diagnostics file or creating it, for instance,
|
2025-01-08 09:29:45 +00:00
|
|
|
// venv doesn't have 'databricks-bundles' library installed
|
2024-07-02 15:10:53 +00:00
|
|
|
if processErr != nil {
|
2024-09-02 09:49:30 +00:00
|
|
|
diagnostic := diag.Diagnostic{
|
|
|
|
Severity: diag.Error,
|
|
|
|
Summary: fmt.Sprintf("python mutator process failed: %q, use --debug to enable logging", processErr),
|
|
|
|
Detail: explainProcessErr(stderrBuf.String()),
|
|
|
|
}
|
|
|
|
|
|
|
|
return dyn.InvalidValue, diag.Diagnostics{diagnostic}
|
2024-07-02 15:10:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// or we can fail to read diagnostics file, that should always be created
|
|
|
|
if pythonDiagnosticsErr != nil {
|
|
|
|
return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr)
|
|
|
|
}
|
|
|
|
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
locations, err := loadLocationsFile(locationsPath)
|
|
|
|
if err != nil {
|
|
|
|
return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
output, outputDiags := loadOutputFile(opts.bundleRootPath, outputPath, locations)
|
2024-08-30 13:29:00 +00:00
|
|
|
pythonDiagnostics = pythonDiagnostics.Extend(outputDiags)
|
2024-06-20 08:43:08 +00:00
|
|
|
|
2024-07-02 15:10:53 +00:00
|
|
|
// we pass through pythonDiagnostic because it contains warnings
|
|
|
|
return output, pythonDiagnostics
|
|
|
|
}
|
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
const pythonInstallExplanation = `Ensure that 'databricks-bundles' is installed in Python environment:
|
2024-09-02 09:49:30 +00:00
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
$ .venv/bin/pip install databricks-bundles
|
2024-09-02 09:49:30 +00:00
|
|
|
|
2024-09-27 09:30:39 +00:00
|
|
|
If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml,
|
2024-09-02 09:49:30 +00:00
|
|
|
or activate the environment before running CLI commands:
|
|
|
|
|
|
|
|
experimental:
|
2025-01-08 09:29:45 +00:00
|
|
|
python:
|
2024-09-02 09:49:30 +00:00
|
|
|
venv_path: .venv
|
|
|
|
`
|
|
|
|
|
|
|
|
// explainProcessErr provides additional explanation for common errors.
|
|
|
|
// It's meant to be the best effort, and not all errors are covered.
|
|
|
|
// Output should be used only used for error reporting.
|
|
|
|
func explainProcessErr(stderr string) string {
|
|
|
|
// implemented in cpython/Lib/runpy.py and portable across Python 3.x, including pypy
|
|
|
|
if strings.Contains(stderr, "Error while finding module specification for 'databricks.bundles.build'") {
|
2025-01-08 09:29:45 +00:00
|
|
|
summary := color.CyanString("Explanation: ") + "'databricks-bundles' library is not installed in the Python environment.\n"
|
2024-09-02 09:49:30 +00:00
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
return stderr + "\n" + summary + "\n" + pythonInstallExplanation
|
2024-09-02 09:49:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return stderr
|
|
|
|
}
|
|
|
|
|
2024-07-02 15:10:53 +00:00
|
|
|
func writeInputFile(inputPath string, input dyn.Value) error {
|
|
|
|
// we need to marshal dyn.Value instead of bundle.Config to JSON to support
|
|
|
|
// non-string fields assigned with bundle variables
|
|
|
|
rootConfigJson, err := json.Marshal(input.AsAny())
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to marshal input: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return os.WriteFile(inputPath, rootConfigJson, 0o600)
|
|
|
|
}
|
|
|
|
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
// loadLocationsFile loads locations.json containing source locations for generated YAML.
|
|
|
|
func loadLocationsFile(locationsPath string) (*pythonLocations, error) {
|
|
|
|
locationsFile, err := os.Open(locationsPath)
|
|
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
|
|
return newPythonLocations(), nil
|
|
|
|
} else if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to open locations file: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
defer locationsFile.Close()
|
|
|
|
|
|
|
|
return parsePythonLocations(locationsFile)
|
|
|
|
}
|
|
|
|
|
|
|
|
func loadOutputFile(rootPath, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) {
|
2024-06-24 07:47:41 +00:00
|
|
|
outputFile, err := os.Open(outputPath)
|
|
|
|
if err != nil {
|
2024-08-30 13:29:00 +00:00
|
|
|
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err))
|
2024-06-24 07:47:41 +00:00
|
|
|
}
|
|
|
|
|
2024-07-02 15:10:53 +00:00
|
|
|
defer outputFile.Close()
|
2024-06-24 07:47:41 +00:00
|
|
|
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
return loadOutput(rootPath, outputFile, locations)
|
|
|
|
}
|
|
|
|
|
|
|
|
func loadOutput(rootPath string, outputFile io.Reader, locations *pythonLocations) (dyn.Value, diag.Diagnostics) {
|
2024-06-24 07:47:41 +00:00
|
|
|
// we need absolute path because later parts of pipeline assume all paths are absolute
|
|
|
|
// and this file will be used as location to resolve relative paths.
|
|
|
|
//
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
// virtualPath has to stay in bundleRootPath, because locations outside root path are not allowed:
|
2024-06-24 07:47:41 +00:00
|
|
|
//
|
2025-01-08 09:29:45 +00:00
|
|
|
// Error: path /var/folders/.../python/dist/*.whl is not contained in bundle root path
|
2024-06-24 07:47:41 +00:00
|
|
|
//
|
|
|
|
// for that, we pass virtualPath instead of outputPath as file location
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName))
|
2024-06-20 08:43:08 +00:00
|
|
|
if err != nil {
|
2024-08-30 13:29:00 +00:00
|
|
|
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err))
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
2024-06-24 07:47:41 +00:00
|
|
|
generated, err := yamlloader.LoadYAML(virtualPath, outputFile)
|
2024-06-20 08:43:08 +00:00
|
|
|
if err != nil {
|
2024-08-30 13:29:00 +00:00
|
|
|
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err))
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
PythonMutator: propagate source locations (#1783)
## Changes
Add a mechanism to load Python source locations in the Python mutator.
Previously, locations pointed to generated YAML. Now, they point to
Python sources instead. Python process outputs "locations.json"
containing locations of bundle paths, examples:
```json
{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
```
Such locations form a tree, and we assign locations of the closest
ancestor to each `dyn.Value` based on its path. For example,
`resources.jobs.job_0.tasks[0].task_key` is located at `job_0.py:10:5`
and `resources.jobs.job_0.tasks[0].email_notifications` is located at
`job_0.py:3:5`, because we use the location of the job as the most
precise approximation.
This feature is only enabled if `experimental/python` is used.
Note: for now, we don't update locations with relative paths, because it
has a side effect in changing how these paths are resolved
## Example
```
% databricks bundle validate
Warning: job_cluster_key abc is not defined
at resources.jobs.examples.tasks[0].job_cluster_key
in resources/example.py:10:1
```
## Tests
Unit tests and manually
2025-01-22 15:37:37 +00:00
|
|
|
// paths are resolved relative to locations of their values, if we change location
|
|
|
|
// we have to update each path, until we simplify that, we don't update locations
|
|
|
|
// for such values, so we don't change how paths are resolved
|
|
|
|
//
|
|
|
|
// we can remove this once we:
|
|
|
|
// - add variable interpolation before and after PythonMutator
|
|
|
|
// - implement path normalization (aka path normal form)
|
|
|
|
_, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) {
|
|
|
|
putPythonLocation(locations, p, v.Location())
|
|
|
|
return v, nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
|
|
|
|
}
|
|
|
|
|
|
|
|
// generated has dyn.Location as if it comes from generated YAML file
|
|
|
|
// earlier we loaded locations.json with source locations in Python code
|
|
|
|
generatedWithLocations, err := mergePythonLocations(generated, locations)
|
|
|
|
if err != nil {
|
|
|
|
return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err))
|
|
|
|
}
|
|
|
|
|
|
|
|
return strictNormalize(config.Root{}, generatedWithLocations)
|
2024-08-30 13:29:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) {
|
|
|
|
normalized, diags := convert.Normalize(dst, generated)
|
2024-06-20 08:43:08 +00:00
|
|
|
|
|
|
|
// warnings shouldn't happen because output should be already normalized
|
|
|
|
// when it happens, it's a bug in the mutator, and should be treated as an error
|
|
|
|
|
2024-08-30 13:29:00 +00:00
|
|
|
strictDiags := diag.Diagnostics{}
|
|
|
|
|
|
|
|
for _, d := range diags {
|
|
|
|
if d.Severity == diag.Warning {
|
|
|
|
d.Severity = diag.Error
|
|
|
|
}
|
|
|
|
|
|
|
|
strictDiags = strictDiags.Append(d)
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
2024-08-30 13:29:00 +00:00
|
|
|
return normalized, strictDiags
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
|
|
|
|
2024-07-02 15:10:53 +00:00
|
|
|
// loadDiagnosticsFile loads diagnostics from a file.
|
|
|
|
//
|
|
|
|
// It contains a list of warnings and errors that we should print to users.
|
|
|
|
//
|
|
|
|
// If the file doesn't exist, we return an error. We expect the file to always be
|
|
|
|
// created by the Python mutator, and it's absence means there are integration problems,
|
|
|
|
// and the diagnostics file was lost. If we treat non-existence as an empty diag.Diagnostics
|
|
|
|
// we risk loosing errors and warnings.
|
|
|
|
func loadDiagnosticsFile(path string) (diag.Diagnostics, error) {
|
|
|
|
file, err := os.Open(path)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to open diagnostics file: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
return parsePythonDiagnostics(file)
|
|
|
|
}
|
|
|
|
|
2024-06-20 08:43:08 +00:00
|
|
|
func createOverrideVisitor(ctx context.Context, phase phase) (merge.OverrideVisitor, error) {
|
|
|
|
switch phase {
|
2024-06-24 07:47:41 +00:00
|
|
|
case PythonMutatorPhaseLoad:
|
2025-01-08 09:29:45 +00:00
|
|
|
return createLoadResourcesOverrideVisitor(ctx), nil
|
2024-06-24 07:47:41 +00:00
|
|
|
case PythonMutatorPhaseInit:
|
2025-01-08 09:29:45 +00:00
|
|
|
return createInitOverrideVisitor(ctx, insertResourceModeAllow), nil
|
|
|
|
case PythonMutatorPhaseLoadResources:
|
|
|
|
return createLoadResourcesOverrideVisitor(ctx), nil
|
|
|
|
case PythonMutatorPhaseApplyMutators:
|
|
|
|
return createInitOverrideVisitor(ctx, insertResourceModeDisallow), nil
|
2024-06-20 08:43:08 +00:00
|
|
|
default:
|
|
|
|
return merge.OverrideVisitor{}, fmt.Errorf("unknown phase: %s", phase)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
// createLoadResourcesOverrideVisitor creates an override visitor for the load_resources phase.
|
2024-06-20 08:43:08 +00:00
|
|
|
//
|
2025-01-08 09:29:45 +00:00
|
|
|
// During load_resources, it's only possible to create new resources, and not modify or
|
2024-06-20 08:43:08 +00:00
|
|
|
// delete existing ones.
|
2025-01-08 09:29:45 +00:00
|
|
|
func createLoadResourcesOverrideVisitor(ctx context.Context) merge.OverrideVisitor {
|
2024-07-03 08:33:23 +00:00
|
|
|
resourcesPath := dyn.NewPath(dyn.Key("resources"))
|
2024-06-20 08:43:08 +00:00
|
|
|
jobsPath := dyn.NewPath(dyn.Key("resources"), dyn.Key("jobs"))
|
|
|
|
|
|
|
|
return merge.OverrideVisitor{
|
|
|
|
VisitDelete: func(valuePath dyn.Path, left dyn.Value) error {
|
2024-07-03 07:22:03 +00:00
|
|
|
if isOmitemptyDelete(left) {
|
|
|
|
return merge.ErrOverrideUndoDelete
|
|
|
|
}
|
|
|
|
|
2024-06-20 08:43:08 +00:00
|
|
|
return fmt.Errorf("unexpected change at %q (delete)", valuePath.String())
|
|
|
|
},
|
|
|
|
VisitInsert: func(valuePath dyn.Path, right dyn.Value) (dyn.Value, error) {
|
2024-07-03 08:33:23 +00:00
|
|
|
// insert 'resources' or 'resources.jobs' if it didn't exist before
|
|
|
|
if valuePath.Equal(resourcesPath) || valuePath.Equal(jobsPath) {
|
|
|
|
return right, nil
|
|
|
|
}
|
|
|
|
|
2024-06-20 08:43:08 +00:00
|
|
|
if !valuePath.HasPrefix(jobsPath) {
|
|
|
|
return dyn.InvalidValue, fmt.Errorf("unexpected change at %q (insert)", valuePath.String())
|
|
|
|
}
|
|
|
|
|
|
|
|
insertResource := len(valuePath) == len(jobsPath)+1
|
|
|
|
|
|
|
|
// adding a property into an existing resource is not allowed, because it changes it
|
|
|
|
if !insertResource {
|
|
|
|
return dyn.InvalidValue, fmt.Errorf("unexpected change at %q (insert)", valuePath.String())
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debugf(ctx, "Insert value at %q", valuePath.String())
|
|
|
|
|
|
|
|
return right, nil
|
|
|
|
},
|
|
|
|
VisitUpdate: func(valuePath dyn.Path, left, right dyn.Value) (dyn.Value, error) {
|
|
|
|
return dyn.InvalidValue, fmt.Errorf("unexpected change at %q (update)", valuePath.String())
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
// insertResourceMode controls whether createInitOverrideVisitor allows or disallows inserting new resources.
|
|
|
|
type insertResourceMode int
|
|
|
|
|
|
|
|
const (
|
|
|
|
insertResourceModeDisallow insertResourceMode = iota
|
|
|
|
insertResourceModeAllow insertResourceMode = iota
|
|
|
|
)
|
|
|
|
|
2024-06-20 08:43:08 +00:00
|
|
|
// createInitOverrideVisitor creates an override visitor for the init phase.
|
|
|
|
//
|
|
|
|
// During the init phase it's possible to create new resources, modify existing
|
|
|
|
// resources, but not delete existing resources.
|
2025-01-08 09:29:45 +00:00
|
|
|
//
|
|
|
|
// If mode is insertResourceModeDisallow, it matching expected behaviour of apply_mutators
|
|
|
|
func createInitOverrideVisitor(ctx context.Context, mode insertResourceMode) merge.OverrideVisitor {
|
2024-07-03 08:33:23 +00:00
|
|
|
resourcesPath := dyn.NewPath(dyn.Key("resources"))
|
2024-06-20 08:43:08 +00:00
|
|
|
jobsPath := dyn.NewPath(dyn.Key("resources"), dyn.Key("jobs"))
|
|
|
|
|
|
|
|
return merge.OverrideVisitor{
|
|
|
|
VisitDelete: func(valuePath dyn.Path, left dyn.Value) error {
|
2024-07-03 07:22:03 +00:00
|
|
|
if isOmitemptyDelete(left) {
|
|
|
|
return merge.ErrOverrideUndoDelete
|
|
|
|
}
|
|
|
|
|
2024-06-20 08:43:08 +00:00
|
|
|
if !valuePath.HasPrefix(jobsPath) {
|
|
|
|
return fmt.Errorf("unexpected change at %q (delete)", valuePath.String())
|
|
|
|
}
|
|
|
|
|
|
|
|
deleteResource := len(valuePath) == len(jobsPath)+1
|
|
|
|
|
|
|
|
if deleteResource {
|
|
|
|
return fmt.Errorf("unexpected change at %q (delete)", valuePath.String())
|
|
|
|
}
|
|
|
|
|
|
|
|
// deleting properties is allowed because it only changes an existing resource
|
|
|
|
log.Debugf(ctx, "Delete value at %q", valuePath.String())
|
|
|
|
|
|
|
|
return nil
|
|
|
|
},
|
|
|
|
VisitInsert: func(valuePath dyn.Path, right dyn.Value) (dyn.Value, error) {
|
2024-07-03 08:33:23 +00:00
|
|
|
// insert 'resources' or 'resources.jobs' if it didn't exist before
|
|
|
|
if valuePath.Equal(resourcesPath) || valuePath.Equal(jobsPath) {
|
|
|
|
return right, nil
|
|
|
|
}
|
|
|
|
|
2024-06-20 08:43:08 +00:00
|
|
|
if !valuePath.HasPrefix(jobsPath) {
|
|
|
|
return dyn.InvalidValue, fmt.Errorf("unexpected change at %q (insert)", valuePath.String())
|
|
|
|
}
|
|
|
|
|
2025-01-08 09:29:45 +00:00
|
|
|
insertResource := len(valuePath) == len(jobsPath)+1
|
|
|
|
if mode == insertResourceModeDisallow && insertResource {
|
|
|
|
return dyn.InvalidValue, fmt.Errorf("unexpected change at %q (insert)", valuePath.String())
|
|
|
|
}
|
|
|
|
|
2024-06-20 08:43:08 +00:00
|
|
|
log.Debugf(ctx, "Insert value at %q", valuePath.String())
|
|
|
|
|
|
|
|
return right, nil
|
|
|
|
},
|
|
|
|
VisitUpdate: func(valuePath dyn.Path, left, right dyn.Value) (dyn.Value, error) {
|
|
|
|
if !valuePath.HasPrefix(jobsPath) {
|
|
|
|
return dyn.InvalidValue, fmt.Errorf("unexpected change at %q (update)", valuePath.String())
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debugf(ctx, "Update value at %q", valuePath.String())
|
|
|
|
|
|
|
|
return right, nil
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-07-03 07:22:03 +00:00
|
|
|
func isOmitemptyDelete(left dyn.Value) bool {
|
2025-01-08 09:29:45 +00:00
|
|
|
// Python output can omit empty sequences/mappings, because we don't track them as optional,
|
2024-07-03 07:22:03 +00:00
|
|
|
// there is no semantic difference between empty and missing, so we keep them as they were before
|
2025-01-08 09:29:45 +00:00
|
|
|
// Python mutator deleted them.
|
2024-07-03 07:22:03 +00:00
|
|
|
|
|
|
|
switch left.Kind() {
|
|
|
|
case dyn.KindMap:
|
|
|
|
return left.MustMap().Len() == 0
|
|
|
|
|
|
|
|
case dyn.KindSequence:
|
|
|
|
return len(left.MustSequence()) == 0
|
|
|
|
|
|
|
|
case dyn.KindNil:
|
|
|
|
// map/sequence can be nil, for instance, bad YAML like: `foo:<eof>`
|
|
|
|
return true
|
|
|
|
|
|
|
|
default:
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-20 13:26:57 +00:00
|
|
|
// detectExecutable lookups Python interpreter in virtual environment, or if not set, in PATH.
|
|
|
|
func detectExecutable(ctx context.Context, venvPath string) (string, error) {
|
|
|
|
if venvPath == "" {
|
|
|
|
interpreter, err := python.DetectExecutable(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
return interpreter, nil
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|
2024-08-20 13:26:57 +00:00
|
|
|
|
|
|
|
return python.DetectVEnvExecutable(venvPath)
|
2024-06-20 08:43:08 +00:00
|
|
|
}
|