From 5ff06578ac9cec91fc3a0c4e34c566c9481296dc Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Mon, 24 Jun 2024 09:47:41 +0200 Subject: [PATCH] PythonMutator: replace stdin/stdout with files (#1512) ## Changes Replace stdin/stdout with files in `PythonMutator`. Files are created in a temporary directory. Rename `ApplyPythonMutator` to `PythonMutator`. Add test for `dyn.Location` behavior during the "load" stage. ## Tests Unit tests --- bundle/config/mutator/mutator.go | 2 +- ...ly_python_mutator.go => python_mutator.go} | 97 ++++++++++++---- ...mutator_test.go => python_mutator_test.go} | 104 ++++++++++++------ bundle/phases/initialize.go | 4 +- 4 files changed, 150 insertions(+), 57 deletions(-) rename bundle/config/mutator/python/{apply_python_mutator.go => python_mutator.go} (73%) rename bundle/config/mutator/python/{apply_python_mutator_test.go => python_mutator_test.go} (77%) diff --git a/bundle/config/mutator/mutator.go b/bundle/config/mutator/mutator.go index d6bfcb77..52f85eeb 100644 --- a/bundle/config/mutator/mutator.go +++ b/bundle/config/mutator/mutator.go @@ -25,6 +25,6 @@ func DefaultMutators() []bundle.Mutator { InitializeVariables(), DefineDefaultTarget(), LoadGitDetails(), - pythonmutator.ApplyPythonMutator(pythonmutator.ApplyPythonMutatorPhaseLoad), + pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseLoad), } } diff --git a/bundle/config/mutator/python/apply_python_mutator.go b/bundle/config/mutator/python/python_mutator.go similarity index 73% rename from bundle/config/mutator/python/apply_python_mutator.go rename to bundle/config/mutator/python/python_mutator.go index 298ffb57..73ddf952 100644 --- a/bundle/config/mutator/python/apply_python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -1,7 +1,6 @@ package python import ( - "bytes" "context" "encoding/json" "fmt" @@ -9,6 +8,8 @@ import ( "path/filepath" "runtime" + "github.com/databricks/cli/bundle/env" + "github.com/databricks/cli/bundle" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/libs/diag" @@ -23,7 +24,7 @@ import ( type phase string const ( - // ApplyPythonMutatorPhaseLoad is the phase in which bundle configuration is loaded. + // PythonMutatorPhaseLoad is the phase in which bundle configuration is loaded. // // At this stage, PyDABs adds statically defined resources to the bundle configuration. // Which resources are added should be deterministic and not depend on the bundle configuration. @@ -31,9 +32,9 @@ const ( // We also open for possibility of appending other sections of bundle configuration, // for example, adding new variables. However, this is not supported yet, and CLI rejects // such changes. - ApplyPythonMutatorPhaseLoad phase = "load" + PythonMutatorPhaseLoad phase = "load" - // ApplyPythonMutatorPhaseInit is the phase after bundle configuration was loaded, and + // PythonMutatorPhaseInit is the phase after bundle configuration was loaded, and // the list of statically declared resources is known. // // At this stage, PyDABs adds resources defined using generators, or mutates existing resources, @@ -50,21 +51,21 @@ const ( // PyDABs can output YAML containing references to variables, and CLI should resolve them. // // Existing resources can't be removed, and CLI rejects such changes. - ApplyPythonMutatorPhaseInit phase = "init" + PythonMutatorPhaseInit phase = "init" ) -type applyPythonMutator struct { +type pythonMutator struct { phase phase } -func ApplyPythonMutator(phase phase) bundle.Mutator { - return &applyPythonMutator{ +func PythonMutator(phase phase) bundle.Mutator { + return &pythonMutator{ phase: phase, } } -func (m *applyPythonMutator) Name() string { - return fmt.Sprintf("ApplyPythonMutator(%s)", m.phase) +func (m *pythonMutator) Name() string { + return fmt.Sprintf("PythonMutator(%s)", m.phase) } func getExperimental(b *bundle.Bundle) config.Experimental { @@ -75,7 +76,7 @@ func getExperimental(b *bundle.Bundle) config.Experimental { return *b.Config.Experimental } -func (m *applyPythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { +func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { experimental := getExperimental(b) if !experimental.PyDABs.Enabled { @@ -97,7 +98,12 @@ func (m *applyPythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.D } } - rightRoot, err := m.runPythonMutator(ctx, b.RootPath, pythonPath, leftRoot) + cacheDir, err := createCacheDir(ctx) + if err != nil { + return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err) + } + + rightRoot, err := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot) if err != nil { return dyn.InvalidValue, err } @@ -113,13 +119,39 @@ func (m *applyPythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.D return diag.FromErr(err) } -func (m *applyPythonMutator) runPythonMutator(ctx context.Context, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, error) { +func createCacheDir(ctx context.Context) (string, error) { + // b.CacheDir doesn't work because target isn't yet selected + + // support the same env variable as in b.CacheDir + if tempDir, exists := env.TempDir(ctx); exists { + // use 'default' as target name + cacheDir := filepath.Join(tempDir, "default", "pydabs") + + err := os.MkdirAll(cacheDir, 0700) + if err != nil { + return "", err + } + + return cacheDir, nil + } + + return os.MkdirTemp("", "-pydabs") +} + +func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, error) { + inputPath := filepath.Join(cacheDir, "input.json") + outputPath := filepath.Join(cacheDir, "output.json") + args := []string{ pythonPath, "-m", "databricks.bundles.build", "--phase", string(m.phase), + "--input", + inputPath, + "--output", + outputPath, } // we need to marshal dyn.Value instead of bundle.Config to JSON to support @@ -129,27 +161,48 @@ func (m *applyPythonMutator) runPythonMutator(ctx context.Context, rootPath stri return dyn.InvalidValue, fmt.Errorf("failed to marshal root config: %w", err) } - logWriter := newLogWriter(ctx, "stderr: ") + err = os.WriteFile(inputPath, rootConfigJson, 0600) + if err != nil { + return dyn.InvalidValue, fmt.Errorf("failed to write input file: %w", err) + } - stdout, err := process.Background( + stderrWriter := newLogWriter(ctx, "stderr: ") + stdoutWriter := newLogWriter(ctx, "stdout: ") + + _, err = process.Background( ctx, args, process.WithDir(rootPath), - process.WithStderrWriter(logWriter), - process.WithStdinReader(bytes.NewBuffer(rootConfigJson)), + process.WithStderrWriter(stderrWriter), + process.WithStdoutWriter(stdoutWriter), ) if err != nil { return dyn.InvalidValue, fmt.Errorf("python mutator process failed: %w", err) } - // we need absolute path, or because later parts of pipeline assume all paths are absolute - // and this file will be used as location + outputFile, err := os.Open(outputPath) + if err != nil { + return dyn.InvalidValue, fmt.Errorf("failed to open Python mutator output: %w", err) + } + + defer func() { + _ = outputFile.Close() + }() + + // we need absolute path because later parts of pipeline assume all paths are absolute + // and this file will be used as location to resolve relative paths. + // + // virtualPath has to stay in rootPath, because locations outside root path are not allowed: + // + // Error: path /var/folders/.../pydabs/dist/*.whl is not contained in bundle root path + // + // for that, we pass virtualPath instead of outputPath as file location virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_pydabs__.yml")) if err != nil { return dyn.InvalidValue, fmt.Errorf("failed to get absolute path: %w", err) } - generated, err := yamlloader.LoadYAML(virtualPath, bytes.NewReader([]byte(stdout))) + generated, err := yamlloader.LoadYAML(virtualPath, outputFile) if err != nil { return dyn.InvalidValue, fmt.Errorf("failed to parse Python mutator output: %w", err) } @@ -171,9 +224,9 @@ func (m *applyPythonMutator) runPythonMutator(ctx context.Context, rootPath stri func createOverrideVisitor(ctx context.Context, phase phase) (merge.OverrideVisitor, error) { switch phase { - case ApplyPythonMutatorPhaseLoad: + case PythonMutatorPhaseLoad: return createLoadOverrideVisitor(ctx), nil - case ApplyPythonMutatorPhaseInit: + case PythonMutatorPhaseInit: return createInitOverrideVisitor(ctx), nil default: return merge.OverrideVisitor{}, fmt.Errorf("unknown phase: %s", phase) diff --git a/bundle/config/mutator/python/apply_python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go similarity index 77% rename from bundle/config/mutator/python/apply_python_mutator_test.go rename to bundle/config/mutator/python/python_mutator_test.go index 8759ab80..e2c20386 100644 --- a/bundle/config/mutator/python/apply_python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -10,6 +10,9 @@ import ( "runtime" "testing" + "github.com/databricks/cli/bundle/env" + "github.com/stretchr/testify/require" + "golang.org/x/exp/maps" "github.com/databricks/cli/libs/dyn" @@ -20,19 +23,19 @@ import ( "github.com/databricks/cli/libs/process" ) -func TestApplyPythonMutator_Name_load(t *testing.T) { - mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad) +func TestPythonMutator_Name_load(t *testing.T) { + mutator := PythonMutator(PythonMutatorPhaseLoad) - assert.Equal(t, "ApplyPythonMutator(load)", mutator.Name()) + assert.Equal(t, "PythonMutator(load)", mutator.Name()) } -func TestApplyPythonMutator_Name_init(t *testing.T) { - mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseInit) +func TestPythonMutator_Name_init(t *testing.T) { + mutator := PythonMutator(PythonMutatorPhaseInit) - assert.Equal(t, "ApplyPythonMutator(init)", mutator.Name()) + assert.Equal(t, "PythonMutator(init)", mutator.Name()) } -func TestApplyPythonMutator_load(t *testing.T) { +func TestPythonMutator_load(t *testing.T) { withFakeVEnv(t, ".venv") b := loadYaml("databricks.yml", ` @@ -46,6 +49,7 @@ func TestApplyPythonMutator_load(t *testing.T) { name: job_0`) ctx := withProcessStub( + t, []string{ interpreterPath(".venv"), "-m", @@ -72,7 +76,7 @@ func TestApplyPythonMutator_load(t *testing.T) { } }`) - mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad) + mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) assert.NoError(t, diag.Error()) @@ -88,7 +92,7 @@ func TestApplyPythonMutator_load(t *testing.T) { } } -func TestApplyPythonMutator_load_disallowed(t *testing.T) { +func TestPythonMutator_load_disallowed(t *testing.T) { withFakeVEnv(t, ".venv") b := loadYaml("databricks.yml", ` @@ -102,6 +106,7 @@ func TestApplyPythonMutator_load_disallowed(t *testing.T) { name: job_0`) ctx := withProcessStub( + t, []string{ interpreterPath(".venv"), "-m", @@ -126,13 +131,13 @@ func TestApplyPythonMutator_load_disallowed(t *testing.T) { } }`) - mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad) + mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) assert.EqualError(t, diag.Error(), "unexpected change at \"resources.jobs.job0.description\" (insert)") } -func TestApplyPythonMutator_init(t *testing.T) { +func TestPythonMutator_init(t *testing.T) { withFakeVEnv(t, ".venv") b := loadYaml("databricks.yml", ` @@ -146,6 +151,7 @@ func TestApplyPythonMutator_init(t *testing.T) { name: job_0`) ctx := withProcessStub( + t, []string{ interpreterPath(".venv"), "-m", @@ -170,7 +176,7 @@ func TestApplyPythonMutator_init(t *testing.T) { } }`) - mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseInit) + mutator := PythonMutator(PythonMutatorPhaseInit) diag := bundle.Apply(ctx, b, mutator) assert.NoError(t, diag.Error()) @@ -178,9 +184,28 @@ func TestApplyPythonMutator_init(t *testing.T) { assert.ElementsMatch(t, []string{"job0"}, maps.Keys(b.Config.Resources.Jobs)) assert.Equal(t, "job_0", b.Config.Resources.Jobs["job0"].Name) assert.Equal(t, "my job", b.Config.Resources.Jobs["job0"].Description) + + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + // 'name' wasn't changed, so it keeps its location + name, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job0.name")) + require.NoError(t, err) + assert.Equal(t, "databricks.yml", name.Location().File) + + // 'description' was updated by PyDABs and has location of generated file until + // we implement source maps + description, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job0.description")) + require.NoError(t, err) + + expectedVirtualPath, err := filepath.Abs("__generated_by_pydabs__.yml") + require.NoError(t, err) + assert.Equal(t, expectedVirtualPath, description.Location().File) + + return v, nil + }) + assert.NoError(t, err) } -func TestApplyPythonMutator_badOutput(t *testing.T) { +func TestPythonMutator_badOutput(t *testing.T) { withFakeVEnv(t, ".venv") b := loadYaml("databricks.yml", ` @@ -194,6 +219,7 @@ func TestApplyPythonMutator_badOutput(t *testing.T) { name: job_0`) ctx := withProcessStub( + t, []string{ interpreterPath(".venv"), "-m", @@ -211,36 +237,36 @@ func TestApplyPythonMutator_badOutput(t *testing.T) { } }`) - mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad) + mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) assert.EqualError(t, diag.Error(), "failed to normalize Python mutator output: unknown field: unknown_property") } -func TestApplyPythonMutator_disabled(t *testing.T) { +func TestPythonMutator_disabled(t *testing.T) { b := loadYaml("databricks.yml", ``) ctx := context.Background() - mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad) + mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) assert.NoError(t, diag.Error()) } -func TestApplyPythonMutator_venvRequired(t *testing.T) { +func TestPythonMutator_venvRequired(t *testing.T) { b := loadYaml("databricks.yml", ` experimental: pydabs: enabled: true`) ctx := context.Background() - mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseLoad) + mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) assert.Error(t, diag.Error(), "\"experimental.enable_pydabs\" is enabled, but \"experimental.venv.path\" is not set") } -func TestApplyPythonMutator_venvNotFound(t *testing.T) { +func TestPythonMutator_venvNotFound(t *testing.T) { expectedError := fmt.Sprintf("can't find %q, check if venv is created", interpreterPath("bad_path")) b := loadYaml("databricks.yml", ` @@ -249,7 +275,7 @@ func TestApplyPythonMutator_venvNotFound(t *testing.T) { enabled: true venv_path: bad_path`) - mutator := ApplyPythonMutator(ApplyPythonMutatorPhaseInit) + mutator := PythonMutator(PythonMutatorPhaseInit) diag := bundle.Apply(context.Background(), b, mutator) assert.EqualError(t, diag.Error(), expectedError) @@ -273,7 +299,7 @@ func TestCreateOverrideVisitor(t *testing.T) { testCases := []createOverrideVisitorTestCase{ { name: "load: can't change an existing job", - phase: ApplyPythonMutatorPhaseLoad, + phase: PythonMutatorPhaseLoad, updatePath: dyn.MustPathFromString("resources.jobs.job0.name"), deletePath: dyn.MustPathFromString("resources.jobs.job0.name"), insertPath: dyn.MustPathFromString("resources.jobs.job0.name"), @@ -283,19 +309,19 @@ func TestCreateOverrideVisitor(t *testing.T) { }, { name: "load: can't delete an existing job", - phase: ApplyPythonMutatorPhaseLoad, + phase: PythonMutatorPhaseLoad, deletePath: dyn.MustPathFromString("resources.jobs.job0"), deleteError: fmt.Errorf("unexpected change at \"resources.jobs.job0\" (delete)"), }, { name: "load: can insert a job", - phase: ApplyPythonMutatorPhaseLoad, + phase: PythonMutatorPhaseLoad, insertPath: dyn.MustPathFromString("resources.jobs.job0"), insertError: nil, }, { name: "load: can't change include", - phase: ApplyPythonMutatorPhaseLoad, + phase: PythonMutatorPhaseLoad, deletePath: dyn.MustPathFromString("include[0]"), insertPath: dyn.MustPathFromString("include[0]"), updatePath: dyn.MustPathFromString("include[0]"), @@ -305,7 +331,7 @@ func TestCreateOverrideVisitor(t *testing.T) { }, { name: "init: can change an existing job", - phase: ApplyPythonMutatorPhaseInit, + phase: PythonMutatorPhaseInit, updatePath: dyn.MustPathFromString("resources.jobs.job0.name"), deletePath: dyn.MustPathFromString("resources.jobs.job0.name"), insertPath: dyn.MustPathFromString("resources.jobs.job0.name"), @@ -315,19 +341,19 @@ func TestCreateOverrideVisitor(t *testing.T) { }, { name: "init: can't delete an existing job", - phase: ApplyPythonMutatorPhaseInit, + phase: PythonMutatorPhaseInit, deletePath: dyn.MustPathFromString("resources.jobs.job0"), deleteError: fmt.Errorf("unexpected change at \"resources.jobs.job0\" (delete)"), }, { name: "init: can insert a job", - phase: ApplyPythonMutatorPhaseInit, + phase: PythonMutatorPhaseInit, insertPath: dyn.MustPathFromString("resources.jobs.job0"), insertError: nil, }, { name: "init: can't change include", - phase: ApplyPythonMutatorPhaseInit, + phase: PythonMutatorPhaseInit, deletePath: dyn.MustPathFromString("include[0]"), insertPath: dyn.MustPathFromString("include[0]"), updatePath: dyn.MustPathFromString("include[0]"), @@ -391,14 +417,28 @@ func TestInterpreterPath(t *testing.T) { } } -func withProcessStub(args []string, stdout string) context.Context { +func withProcessStub(t *testing.T, args []string, stdout string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) - stub.WithCallback(func(actual *exec.Cmd) error { - if reflect.DeepEqual(actual.Args, args) { - _, err := actual.Stdout.Write([]byte(stdout)) + t.Setenv(env.TempDirVariable, t.TempDir()) + // after we override env variable, we always get the same cache dir as mutator + cacheDir, err := createCacheDir(ctx) + require.NoError(t, err) + + inputPath := filepath.Join(cacheDir, "input.json") + outputPath := filepath.Join(cacheDir, "output.json") + + args = append(args, "--input", inputPath) + args = append(args, "--output", outputPath) + + stub.WithCallback(func(actual *exec.Cmd) error { + _, err := os.Stat(inputPath) + assert.NoError(t, err) + + if reflect.DeepEqual(actual.Args, args) { + err := os.WriteFile(outputPath, []byte(stdout), 0600) return err } else { return fmt.Errorf("unexpected command: %v", actual.Args) diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index d96c8d3b..d96ee0eb 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -30,8 +30,8 @@ func Initialize() bundle.Mutator { mutator.DefineDefaultWorkspacePaths(), mutator.SetVariables(), // Intentionally placed before ResolveVariableReferencesInLookup, ResolveResourceReferences - // and ResolveVariableReferences. See what is expected in ApplyPythonMutatorPhaseInit doc - pythonmutator.ApplyPythonMutator(pythonmutator.ApplyPythonMutatorPhaseInit), + // and ResolveVariableReferences. See what is expected in PythonMutatorPhaseInit doc + pythonmutator.PythonMutator(pythonmutator.PythonMutatorPhaseInit), mutator.ResolveVariableReferencesInLookup(), mutator.ResolveResourceReferences(), mutator.ResolveVariableReferences(