PythonMutator: replace stdin/stdout with files (#1512)

## Changes
Replace stdin/stdout with files in `PythonMutator`. Files are created in
a temporary directory.

Rename `ApplyPythonMutator` to `PythonMutator`.

Add test for `dyn.Location` behavior during the "load" stage.

## Tests
Unit tests
This commit is contained in:
Gleb Kanterov 2024-06-24 09:47:41 +02:00 committed by GitHub
parent 068c7cfc2d
commit 5ff06578ac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 150 additions and 57 deletions

View File

@ -25,6 +25,6 @@ func DefaultMutators() []bundle.Mutator {
InitializeVariables(),
DefineDefaultTarget(),
LoadGitDetails(),
pythonmutator.ApplyPythonMutator(pythonmutator.ApplyPythonMutatorPhaseLoad),
pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseLoad),
}
}

View File

@ -1,7 +1,6 @@
package python
import (
"bytes"
"context"
"encoding/json"
"fmt"
@ -9,6 +8,8 @@ import (
"path/filepath"
"runtime"
"github.com/databricks/cli/bundle/env"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/diag"
@ -23,7 +24,7 @@ import (
type phase string
const (
// ApplyPythonMutatorPhaseLoad is the phase in which bundle configuration is loaded.
// PythonMutatorPhaseLoad is the phase in which bundle configuration is loaded.
//
// At this stage, PyDABs adds statically defined resources to the bundle configuration.
// Which resources are added should be deterministic and not depend on the bundle configuration.
@ -31,9 +32,9 @@ const (
// We also open for possibility of appending other sections of bundle configuration,
// for example, adding new variables. However, this is not supported yet, and CLI rejects
// such changes.
ApplyPythonMutatorPhaseLoad phase = "load"
PythonMutatorPhaseLoad phase = "load"
// ApplyPythonMutatorPhaseInit is the phase after bundle configuration was loaded, and
// PythonMutatorPhaseInit is the phase after bundle configuration was loaded, and
// the list of statically declared resources is known.
//
// At this stage, PyDABs adds resources defined using generators, or mutates existing resources,
@ -50,21 +51,21 @@ const (
// PyDABs can output YAML containing references to variables, and CLI should resolve them.
//
// Existing resources can't be removed, and CLI rejects such changes.
ApplyPythonMutatorPhaseInit phase = "init"
PythonMutatorPhaseInit phase = "init"
)
type applyPythonMutator struct {
type pythonMutator struct {
phase phase
}
func ApplyPythonMutator(phase phase) bundle.Mutator {
return &applyPythonMutator{
func PythonMutator(phase phase) bundle.Mutator {
return &pythonMutator{
phase: phase,
}
}
func (m *applyPythonMutator) Name() string {
return fmt.Sprintf("ApplyPythonMutator(%s)", m.phase)
func (m *pythonMutator) Name() string {
return fmt.Sprintf("PythonMutator(%s)", m.phase)
}
func getExperimental(b *bundle.Bundle) config.Experimental {
@ -75,7 +76,7 @@ func getExperimental(b *bundle.Bundle) config.Experimental {
return *b.Config.Experimental
}
func (m *applyPythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
experimental := getExperimental(b)
if !experimental.PyDABs.Enabled {
@ -97,7 +98,12 @@ func (m *applyPythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.D
}
}
rightRoot, err := m.runPythonMutator(ctx, b.RootPath, pythonPath, leftRoot)
cacheDir, err := createCacheDir(ctx)
if err != nil {
return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
}
rightRoot, err := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot)
if err != nil {
return dyn.InvalidValue, err
}
@ -113,13 +119,39 @@ func (m *applyPythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.D
return diag.FromErr(err)
}
func (m *applyPythonMutator) runPythonMutator(ctx context.Context, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, error) {
func createCacheDir(ctx context.Context) (string, error) {
// b.CacheDir doesn't work because target isn't yet selected
// support the same env variable as in b.CacheDir
if tempDir, exists := env.TempDir(ctx); exists {
// use 'default' as target name
cacheDir := filepath.Join(tempDir, "default", "pydabs")
err := os.MkdirAll(cacheDir, 0700)
if err != nil {
return "", err
}
return cacheDir, nil
}
return os.MkdirTemp("", "-pydabs")
}
func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, error) {
inputPath := filepath.Join(cacheDir, "input.json")
outputPath := filepath.Join(cacheDir, "output.json")
args := []string{
pythonPath,
"-m",
"databricks.bundles.build",
"--phase",
string(m.phase),
"--input",
inputPath,
"--output",
outputPath,
}
// we need to marshal dyn.Value instead of bundle.Config to JSON to support
@ -129,27 +161,48 @@ func (m *applyPythonMutator) runPythonMutator(ctx context.Context, rootPath stri
return dyn.InvalidValue, fmt.Errorf("failed to marshal root config: %w", err)
}
logWriter := newLogWriter(ctx, "stderr: ")
err = os.WriteFile(inputPath, rootConfigJson, 0600)
if err != nil {
return dyn.InvalidValue, fmt.Errorf("failed to write input file: %w", err)
}
stdout, err := process.Background(
stderrWriter := newLogWriter(ctx, "stderr: ")
stdoutWriter := newLogWriter(ctx, "stdout: ")
_, err = process.Background(
ctx,
args,
process.WithDir(rootPath),
process.WithStderrWriter(logWriter),
process.WithStdinReader(bytes.NewBuffer(rootConfigJson)),
process.WithStderrWriter(stderrWriter),
process.WithStdoutWriter(stdoutWriter),
)
if err != nil {
return dyn.InvalidValue, fmt.Errorf("python mutator process failed: %w", err)
}
// we need absolute path, or because later parts of pipeline assume all paths are absolute
// and this file will be used as location
outputFile, err := os.Open(outputPath)
if err != nil {
return dyn.InvalidValue, fmt.Errorf("failed to open Python mutator output: %w", err)
}
defer func() {
_ = outputFile.Close()
}()
// we need absolute path because later parts of pipeline assume all paths are absolute
// and this file will be used as location to resolve relative paths.
//
// virtualPath has to stay in rootPath, because locations outside root path are not allowed:
//
// Error: path /var/folders/.../pydabs/dist/*.whl is not contained in bundle root path
//
// for that, we pass virtualPath instead of outputPath as file location
virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_pydabs__.yml"))
if err != nil {
return dyn.InvalidValue, fmt.Errorf("failed to get absolute path: %w", err)
}
generated, err := yamlloader.LoadYAML(virtualPath, bytes.NewReader([]byte(stdout)))
generated, err := yamlloader.LoadYAML(virtualPath, outputFile)
if err != nil {
return dyn.InvalidValue, fmt.Errorf("failed to parse Python mutator output: %w", err)
}
@ -171,9 +224,9 @@ func (m *applyPythonMutator) runPythonMutator(ctx context.Context, rootPath stri
func createOverrideVisitor(ctx context.Context, phase phase) (merge.OverrideVisitor, error) {
switch phase {
case ApplyPythonMutatorPhaseLoad:
case PythonMutatorPhaseLoad:
return createLoadOverrideVisitor(ctx), nil
case ApplyPythonMutatorPhaseInit:
case PythonMutatorPhaseInit:
return createInitOverrideVisitor(ctx), nil
default:
return merge.OverrideVisitor{}, fmt.Errorf("unknown phase: %s", phase)

View File

@ -10,6 +10,9 @@ import (
"runtime"
"testing"
"github.com/databricks/cli/bundle/env"
"github.com/stretchr/testify/require"
"golang.org/x/exp/maps"
"github.com/databricks/cli/libs/dyn"
@ -20,19 +23,19 @@ import (
"github.com/databricks/cli/libs/process"
)
func TestApplyPythonMutator_Name_load(t *testing.T) {
mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad)
func TestPythonMutator_Name_load(t *testing.T) {
mutator := PythonMutator(PythonMutatorPhaseLoad)
assert.Equal(t, "ApplyPythonMutator(load)", mutator.Name())
assert.Equal(t, "PythonMutator(load)", mutator.Name())
}
func TestApplyPythonMutator_Name_init(t *testing.T) {
mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseInit)
func TestPythonMutator_Name_init(t *testing.T) {
mutator := PythonMutator(PythonMutatorPhaseInit)
assert.Equal(t, "ApplyPythonMutator(init)", mutator.Name())
assert.Equal(t, "PythonMutator(init)", mutator.Name())
}
func TestApplyPythonMutator_load(t *testing.T) {
func TestPythonMutator_load(t *testing.T) {
withFakeVEnv(t, ".venv")
b := loadYaml("databricks.yml", `
@ -46,6 +49,7 @@ func TestApplyPythonMutator_load(t *testing.T) {
name: job_0`)
ctx := withProcessStub(
t,
[]string{
interpreterPath(".venv"),
"-m",
@ -72,7 +76,7 @@ func TestApplyPythonMutator_load(t *testing.T) {
}
}`)
mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad)
mutator := PythonMutator(PythonMutatorPhaseLoad)
diag := bundle.Apply(ctx, b, mutator)
assert.NoError(t, diag.Error())
@ -88,7 +92,7 @@ func TestApplyPythonMutator_load(t *testing.T) {
}
}
func TestApplyPythonMutator_load_disallowed(t *testing.T) {
func TestPythonMutator_load_disallowed(t *testing.T) {
withFakeVEnv(t, ".venv")
b := loadYaml("databricks.yml", `
@ -102,6 +106,7 @@ func TestApplyPythonMutator_load_disallowed(t *testing.T) {
name: job_0`)
ctx := withProcessStub(
t,
[]string{
interpreterPath(".venv"),
"-m",
@ -126,13 +131,13 @@ func TestApplyPythonMutator_load_disallowed(t *testing.T) {
}
}`)
mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad)
mutator := PythonMutator(PythonMutatorPhaseLoad)
diag := bundle.Apply(ctx, b, mutator)
assert.EqualError(t, diag.Error(), "unexpected change at \"resources.jobs.job0.description\" (insert)")
}
func TestApplyPythonMutator_init(t *testing.T) {
func TestPythonMutator_init(t *testing.T) {
withFakeVEnv(t, ".venv")
b := loadYaml("databricks.yml", `
@ -146,6 +151,7 @@ func TestApplyPythonMutator_init(t *testing.T) {
name: job_0`)
ctx := withProcessStub(
t,
[]string{
interpreterPath(".venv"),
"-m",
@ -170,7 +176,7 @@ func TestApplyPythonMutator_init(t *testing.T) {
}
}`)
mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseInit)
mutator := PythonMutator(PythonMutatorPhaseInit)
diag := bundle.Apply(ctx, b, mutator)
assert.NoError(t, diag.Error())
@ -178,9 +184,28 @@ func TestApplyPythonMutator_init(t *testing.T) {
assert.ElementsMatch(t, []string{"job0"}, maps.Keys(b.Config.Resources.Jobs))
assert.Equal(t, "job_0", b.Config.Resources.Jobs["job0"].Name)
assert.Equal(t, "my job", b.Config.Resources.Jobs["job0"].Description)
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
// 'name' wasn't changed, so it keeps its location
name, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job0.name"))
require.NoError(t, err)
assert.Equal(t, "databricks.yml", name.Location().File)
// 'description' was updated by PyDABs and has location of generated file until
// we implement source maps
description, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job0.description"))
require.NoError(t, err)
expectedVirtualPath, err := filepath.Abs("__generated_by_pydabs__.yml")
require.NoError(t, err)
assert.Equal(t, expectedVirtualPath, description.Location().File)
return v, nil
})
assert.NoError(t, err)
}
func TestApplyPythonMutator_badOutput(t *testing.T) {
func TestPythonMutator_badOutput(t *testing.T) {
withFakeVEnv(t, ".venv")
b := loadYaml("databricks.yml", `
@ -194,6 +219,7 @@ func TestApplyPythonMutator_badOutput(t *testing.T) {
name: job_0`)
ctx := withProcessStub(
t,
[]string{
interpreterPath(".venv"),
"-m",
@ -211,36 +237,36 @@ func TestApplyPythonMutator_badOutput(t *testing.T) {
}
}`)
mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad)
mutator := PythonMutator(PythonMutatorPhaseLoad)
diag := bundle.Apply(ctx, b, mutator)
assert.EqualError(t, diag.Error(), "failed to normalize Python mutator output: unknown field: unknown_property")
}
func TestApplyPythonMutator_disabled(t *testing.T) {
func TestPythonMutator_disabled(t *testing.T) {
b := loadYaml("databricks.yml", ``)
ctx := context.Background()
mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad)
mutator := PythonMutator(PythonMutatorPhaseLoad)
diag := bundle.Apply(ctx, b, mutator)
assert.NoError(t, diag.Error())
}
func TestApplyPythonMutator_venvRequired(t *testing.T) {
func TestPythonMutator_venvRequired(t *testing.T) {
b := loadYaml("databricks.yml", `
experimental:
pydabs:
enabled: true`)
ctx := context.Background()
mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad)
mutator := PythonMutator(PythonMutatorPhaseLoad)
diag := bundle.Apply(ctx, b, mutator)
assert.Error(t, diag.Error(), "\"experimental.enable_pydabs\" is enabled, but \"experimental.venv.path\" is not set")
}
func TestApplyPythonMutator_venvNotFound(t *testing.T) {
func TestPythonMutator_venvNotFound(t *testing.T) {
expectedError := fmt.Sprintf("can't find %q, check if venv is created", interpreterPath("bad_path"))
b := loadYaml("databricks.yml", `
@ -249,7 +275,7 @@ func TestApplyPythonMutator_venvNotFound(t *testing.T) {
enabled: true
venv_path: bad_path`)
mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseInit)
mutator := PythonMutator(PythonMutatorPhaseInit)
diag := bundle.Apply(context.Background(), b, mutator)
assert.EqualError(t, diag.Error(), expectedError)
@ -273,7 +299,7 @@ func TestCreateOverrideVisitor(t *testing.T) {
testCases := []createOverrideVisitorTestCase{
{
name: "load: can't change an existing job",
phase: ApplyPythonMutatorPhaseLoad,
phase: PythonMutatorPhaseLoad,
updatePath: dyn.MustPathFromString("resources.jobs.job0.name"),
deletePath: dyn.MustPathFromString("resources.jobs.job0.name"),
insertPath: dyn.MustPathFromString("resources.jobs.job0.name"),
@ -283,19 +309,19 @@ func TestCreateOverrideVisitor(t *testing.T) {
},
{
name: "load: can't delete an existing job",
phase: ApplyPythonMutatorPhaseLoad,
phase: PythonMutatorPhaseLoad,
deletePath: dyn.MustPathFromString("resources.jobs.job0"),
deleteError: fmt.Errorf("unexpected change at \"resources.jobs.job0\" (delete)"),
},
{
name: "load: can insert a job",
phase: ApplyPythonMutatorPhaseLoad,
phase: PythonMutatorPhaseLoad,
insertPath: dyn.MustPathFromString("resources.jobs.job0"),
insertError: nil,
},
{
name: "load: can't change include",
phase: ApplyPythonMutatorPhaseLoad,
phase: PythonMutatorPhaseLoad,
deletePath: dyn.MustPathFromString("include[0]"),
insertPath: dyn.MustPathFromString("include[0]"),
updatePath: dyn.MustPathFromString("include[0]"),
@ -305,7 +331,7 @@ func TestCreateOverrideVisitor(t *testing.T) {
},
{
name: "init: can change an existing job",
phase: ApplyPythonMutatorPhaseInit,
phase: PythonMutatorPhaseInit,
updatePath: dyn.MustPathFromString("resources.jobs.job0.name"),
deletePath: dyn.MustPathFromString("resources.jobs.job0.name"),
insertPath: dyn.MustPathFromString("resources.jobs.job0.name"),
@ -315,19 +341,19 @@ func TestCreateOverrideVisitor(t *testing.T) {
},
{
name: "init: can't delete an existing job",
phase: ApplyPythonMutatorPhaseInit,
phase: PythonMutatorPhaseInit,
deletePath: dyn.MustPathFromString("resources.jobs.job0"),
deleteError: fmt.Errorf("unexpected change at \"resources.jobs.job0\" (delete)"),
},
{
name: "init: can insert a job",
phase: ApplyPythonMutatorPhaseInit,
phase: PythonMutatorPhaseInit,
insertPath: dyn.MustPathFromString("resources.jobs.job0"),
insertError: nil,
},
{
name: "init: can't change include",
phase: ApplyPythonMutatorPhaseInit,
phase: PythonMutatorPhaseInit,
deletePath: dyn.MustPathFromString("include[0]"),
insertPath: dyn.MustPathFromString("include[0]"),
updatePath: dyn.MustPathFromString("include[0]"),
@ -391,14 +417,28 @@ func TestInterpreterPath(t *testing.T) {
}
}
func withProcessStub(args []string, stdout string) context.Context {
func withProcessStub(t *testing.T, args []string, stdout string) context.Context {
ctx := context.Background()
ctx, stub := process.WithStub(ctx)
stub.WithCallback(func(actual *exec.Cmd) error {
if reflect.DeepEqual(actual.Args, args) {
_, err := actual.Stdout.Write([]byte(stdout))
t.Setenv(env.TempDirVariable, t.TempDir())
// after we override env variable, we always get the same cache dir as mutator
cacheDir, err := createCacheDir(ctx)
require.NoError(t, err)
inputPath := filepath.Join(cacheDir, "input.json")
outputPath := filepath.Join(cacheDir, "output.json")
args = append(args, "--input", inputPath)
args = append(args, "--output", outputPath)
stub.WithCallback(func(actual *exec.Cmd) error {
_, err := os.Stat(inputPath)
assert.NoError(t, err)
if reflect.DeepEqual(actual.Args, args) {
err := os.WriteFile(outputPath, []byte(stdout), 0600)
return err
} else {
return fmt.Errorf("unexpected command: %v", actual.Args)

View File

@ -30,8 +30,8 @@ func Initialize() bundle.Mutator {
mutator.DefineDefaultWorkspacePaths(),
mutator.SetVariables(),
// Intentionally placed before ResolveVariableReferencesInLookup, ResolveResourceReferences
// and ResolveVariableReferences. See what is expected in ApplyPythonMutatorPhaseInit doc
pythonmutator.ApplyPythonMutator(pythonmutator.ApplyPythonMutatorPhaseInit),
// and ResolveVariableReferences. See what is expected in PythonMutatorPhaseInit doc
pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseInit),
mutator.ResolveVariableReferencesInLookup(),
mutator.ResolveResourceReferences(),
mutator.ResolveVariableReferences(