2022-12-16 13:49:23 +00:00
|
|
|
package mutator_test
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2024-11-21 10:51:21 +00:00
|
|
|
"fmt"
|
2022-12-16 13:49:23 +00:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2024-11-20 12:22:27 +00:00
|
|
|
"runtime"
|
2022-12-16 13:49:23 +00:00
|
|
|
"testing"
|
|
|
|
|
2023-05-16 16:35:39 +00:00
|
|
|
"github.com/databricks/cli/bundle"
|
|
|
|
"github.com/databricks/cli/bundle/config"
|
|
|
|
"github.com/databricks/cli/bundle/config/mutator"
|
|
|
|
"github.com/databricks/cli/bundle/config/resources"
|
2024-07-09 11:12:42 +00:00
|
|
|
"github.com/databricks/cli/bundle/config/variable"
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
"github.com/databricks/cli/bundle/internal/bundletest"
|
2024-07-09 11:12:42 +00:00
|
|
|
"github.com/databricks/cli/libs/diag"
|
|
|
|
"github.com/databricks/cli/libs/dyn"
|
2024-07-03 10:13:22 +00:00
|
|
|
"github.com/databricks/cli/libs/vfs"
|
2023-09-04 09:55:01 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
2022-12-16 13:49:23 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
|
|
|
"github.com/databricks/databricks-sdk-go/service/pipelines"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
)
|
|
|
|
|
2023-04-05 14:02:17 +00:00
|
|
|
func touchNotebookFile(t *testing.T, path string) {
|
2022-12-16 13:49:23 +00:00
|
|
|
f, err := os.Create(path)
|
|
|
|
require.NoError(t, err)
|
2023-02-20 18:42:55 +00:00
|
|
|
_, err = f.WriteString("# Databricks notebook source\n")
|
2022-12-16 13:49:23 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
f.Close()
|
|
|
|
}
|
|
|
|
|
2023-04-05 14:02:17 +00:00
|
|
|
func touchEmptyFile(t *testing.T, path string) {
|
2023-04-12 14:17:13 +00:00
|
|
|
err := os.MkdirAll(filepath.Dir(path), 0o700)
|
|
|
|
require.NoError(t, err)
|
2023-04-05 14:02:17 +00:00
|
|
|
f, err := os.Create(path)
|
|
|
|
require.NoError(t, err)
|
|
|
|
f.Close()
|
|
|
|
}
|
|
|
|
|
2023-06-07 10:34:59 +00:00
|
|
|
func TestTranslatePathsSkippedWithGitSource(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-06-07 10:34:59 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Workspace: config.Workspace{
|
2023-11-15 13:37:26 +00:00
|
|
|
FilePath: "/bundle",
|
2023-06-07 10:34:59 +00:00
|
|
|
},
|
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
|
|
|
GitSource: &jobs.GitSource{
|
|
|
|
GitBranch: "somebranch",
|
|
|
|
GitCommit: "somecommit",
|
|
|
|
GitProvider: "github",
|
|
|
|
GitTag: "sometag",
|
|
|
|
GitUrl: "https://github.com/someuser/somerepo",
|
|
|
|
},
|
2023-07-03 09:46:45 +00:00
|
|
|
Tasks: []jobs.Task{
|
2023-06-07 10:34:59 +00:00
|
|
|
{
|
|
|
|
NotebookTask: &jobs.NotebookTask{
|
|
|
|
NotebookPath: "my_job_notebook.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
PythonWheelTask: &jobs.PythonWheelTask{
|
|
|
|
PackageName: "foo",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
SparkPythonTask: &jobs.SparkPythonTask{
|
|
|
|
PythonFile: "my_python_file.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
require.NoError(t, diags.Error())
|
2023-06-07 10:34:59 +00:00
|
|
|
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"my_job_notebook.py",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath,
|
2023-06-07 10:34:59 +00:00
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"foo",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[1].PythonWheelTask.PackageName,
|
2023-06-07 10:34:59 +00:00
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"my_python_file.py",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[2].SparkPythonTask.PythonFile,
|
2023-06-07 10:34:59 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-04-05 14:02:17 +00:00
|
|
|
func TestTranslatePaths(t *testing.T) {
|
2022-12-16 13:49:23 +00:00
|
|
|
dir := t.TempDir()
|
2023-04-05 14:02:17 +00:00
|
|
|
touchNotebookFile(t, filepath.Join(dir, "my_job_notebook.py"))
|
|
|
|
touchNotebookFile(t, filepath.Join(dir, "my_pipeline_notebook.py"))
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "my_python_file.py"))
|
2023-09-04 09:55:01 +00:00
|
|
|
touchEmptyFile(t, filepath.Join(dir, "dist", "task.jar"))
|
2024-08-21 10:03:56 +00:00
|
|
|
touchEmptyFile(t, filepath.Join(dir, "requirements.txt"))
|
2022-12-16 13:49:23 +00:00
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2022-12-16 13:49:23 +00:00
|
|
|
Config: config.Root{
|
2023-04-05 14:02:17 +00:00
|
|
|
Workspace: config.Workspace{
|
2023-11-15 13:37:26 +00:00
|
|
|
FilePath: "/bundle",
|
2023-04-05 14:02:17 +00:00
|
|
|
},
|
2022-12-16 13:49:23 +00:00
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
2023-07-03 09:46:45 +00:00
|
|
|
Tasks: []jobs.Task{
|
2022-12-16 13:49:23 +00:00
|
|
|
{
|
|
|
|
NotebookTask: &jobs.NotebookTask{
|
|
|
|
NotebookPath: "./my_job_notebook.py",
|
|
|
|
},
|
2023-09-04 09:55:01 +00:00
|
|
|
Libraries: []compute.Library{
|
|
|
|
{Whl: "./dist/task.whl"},
|
|
|
|
},
|
2022-12-16 13:49:23 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
NotebookTask: &jobs.NotebookTask{
|
2023-03-21 17:13:16 +00:00
|
|
|
NotebookPath: "/Users/jane.doe@databricks.com/doesnt_exist.py",
|
2022-12-16 13:49:23 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
NotebookTask: &jobs.NotebookTask{
|
|
|
|
NotebookPath: "./my_job_notebook.py",
|
|
|
|
},
|
2024-08-21 10:03:56 +00:00
|
|
|
Libraries: []compute.Library{
|
|
|
|
{Requirements: "./requirements.txt"},
|
|
|
|
},
|
2022-12-16 13:49:23 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
PythonWheelTask: &jobs.PythonWheelTask{
|
|
|
|
PackageName: "foo",
|
|
|
|
},
|
|
|
|
},
|
2023-04-05 14:02:17 +00:00
|
|
|
{
|
|
|
|
SparkPythonTask: &jobs.SparkPythonTask{
|
|
|
|
PythonFile: "./my_python_file.py",
|
|
|
|
},
|
|
|
|
},
|
2023-09-04 09:55:01 +00:00
|
|
|
{
|
|
|
|
SparkJarTask: &jobs.SparkJarTask{
|
|
|
|
MainClassName: "HelloWorld",
|
|
|
|
},
|
|
|
|
Libraries: []compute.Library{
|
|
|
|
{Jar: "./dist/task.jar"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
SparkJarTask: &jobs.SparkJarTask{
|
|
|
|
MainClassName: "HelloWorldRemote",
|
|
|
|
},
|
|
|
|
Libraries: []compute.Library{
|
2023-09-08 09:53:57 +00:00
|
|
|
{Jar: "dbfs:/bundle/dist/task_remote.jar"},
|
2023-09-04 09:55:01 +00:00
|
|
|
},
|
|
|
|
},
|
2022-12-16 13:49:23 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
Pipelines: map[string]*resources.Pipeline{
|
|
|
|
"pipeline": {
|
2025-02-07 17:22:51 +00:00
|
|
|
CreatePipeline: &pipelines.CreatePipeline{
|
2022-12-16 13:49:23 +00:00
|
|
|
Libraries: []pipelines.PipelineLibrary{
|
|
|
|
{
|
|
|
|
Notebook: &pipelines.NotebookLibrary{
|
|
|
|
Path: "./my_pipeline_notebook.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Notebook: &pipelines.NotebookLibrary{
|
2023-03-21 17:13:16 +00:00
|
|
|
Path: "/Users/jane.doe@databricks.com/doesnt_exist.py",
|
2022-12-16 13:49:23 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Notebook: &pipelines.NotebookLibrary{
|
|
|
|
Path: "./my_pipeline_notebook.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Jar: "foo",
|
|
|
|
},
|
2023-04-05 14:29:42 +00:00
|
|
|
{
|
|
|
|
File: &pipelines.FileLibrary{
|
|
|
|
Path: "./my_python_file.py",
|
|
|
|
},
|
|
|
|
},
|
2022-12-16 13:49:23 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
require.NoError(t, diags.Error())
|
2022-12-16 13:49:23 +00:00
|
|
|
|
|
|
|
// Assert that the path in the tasks now refer to the artifact.
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2023-04-05 14:02:17 +00:00
|
|
|
"/bundle/my_job_notebook",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath,
|
2022-12-16 13:49:23 +00:00
|
|
|
)
|
2023-09-04 09:55:01 +00:00
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
"dist/task.whl",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl,
|
2023-09-04 09:55:01 +00:00
|
|
|
)
|
2022-12-16 13:49:23 +00:00
|
|
|
assert.Equal(
|
|
|
|
t,
|
2023-03-21 17:13:16 +00:00
|
|
|
"/Users/jane.doe@databricks.com/doesnt_exist.py",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[1].NotebookTask.NotebookPath,
|
2022-12-16 13:49:23 +00:00
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2023-04-05 14:02:17 +00:00
|
|
|
"/bundle/my_job_notebook",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[2].NotebookTask.NotebookPath,
|
2022-12-16 13:49:23 +00:00
|
|
|
)
|
2024-08-21 10:03:56 +00:00
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"/bundle/requirements.txt",
|
|
|
|
b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements,
|
|
|
|
)
|
2023-04-05 14:02:17 +00:00
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"/bundle/my_python_file.py",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[4].SparkPythonTask.PythonFile,
|
2023-04-05 14:02:17 +00:00
|
|
|
)
|
2023-09-04 09:55:01 +00:00
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
"dist/task.jar",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[5].Libraries[0].Jar,
|
2023-09-04 09:55:01 +00:00
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2023-09-08 09:53:57 +00:00
|
|
|
"dbfs:/bundle/dist/task_remote.jar",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[6].Libraries[0].Jar,
|
2023-09-04 09:55:01 +00:00
|
|
|
)
|
2022-12-16 13:49:23 +00:00
|
|
|
|
|
|
|
// Assert that the path in the libraries now refer to the artifact.
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2023-04-05 14:02:17 +00:00
|
|
|
"/bundle/my_pipeline_notebook",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path,
|
2022-12-16 13:49:23 +00:00
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2023-03-21 17:13:16 +00:00
|
|
|
"/Users/jane.doe@databricks.com/doesnt_exist.py",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Pipelines["pipeline"].Libraries[1].Notebook.Path,
|
2022-12-16 13:49:23 +00:00
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2023-04-05 14:02:17 +00:00
|
|
|
"/bundle/my_pipeline_notebook",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Pipelines["pipeline"].Libraries[2].Notebook.Path,
|
2022-12-16 13:49:23 +00:00
|
|
|
)
|
2023-04-05 14:29:42 +00:00
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"/bundle/my_python_file.py",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Pipelines["pipeline"].Libraries[4].File.Path,
|
2023-04-05 14:29:42 +00:00
|
|
|
)
|
2022-12-16 13:49:23 +00:00
|
|
|
}
|
2023-03-21 17:13:16 +00:00
|
|
|
|
2023-04-12 14:17:13 +00:00
|
|
|
func TestTranslatePathsInSubdirectories(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "job", "my_python_file.py"))
|
2023-09-04 09:55:01 +00:00
|
|
|
touchEmptyFile(t, filepath.Join(dir, "job", "dist", "task.jar"))
|
2023-04-12 14:17:13 +00:00
|
|
|
touchEmptyFile(t, filepath.Join(dir, "pipeline", "my_python_file.py"))
|
2023-11-07 20:00:09 +00:00
|
|
|
touchEmptyFile(t, filepath.Join(dir, "job", "my_sql_file.sql"))
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "job", "my_dbt_project", "dbt_project.yml"))
|
2023-04-12 14:17:13 +00:00
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-04-12 14:17:13 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Workspace: config.Workspace{
|
2023-11-15 13:37:26 +00:00
|
|
|
FilePath: "/bundle",
|
2023-04-12 14:17:13 +00:00
|
|
|
},
|
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
2023-07-03 09:46:45 +00:00
|
|
|
Tasks: []jobs.Task{
|
2023-04-12 14:17:13 +00:00
|
|
|
{
|
|
|
|
SparkPythonTask: &jobs.SparkPythonTask{
|
|
|
|
PythonFile: "./my_python_file.py",
|
|
|
|
},
|
|
|
|
},
|
2023-09-04 09:55:01 +00:00
|
|
|
{
|
|
|
|
SparkJarTask: &jobs.SparkJarTask{
|
|
|
|
MainClassName: "HelloWorld",
|
|
|
|
},
|
|
|
|
Libraries: []compute.Library{
|
|
|
|
{Jar: "./dist/task.jar"},
|
|
|
|
},
|
|
|
|
},
|
2023-11-07 20:00:09 +00:00
|
|
|
{
|
|
|
|
SqlTask: &jobs.SqlTask{
|
|
|
|
File: &jobs.SqlTaskFile{
|
|
|
|
Path: "./my_sql_file.sql",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
DbtTask: &jobs.DbtTask{
|
|
|
|
ProjectDirectory: "./my_dbt_project",
|
|
|
|
},
|
|
|
|
},
|
2023-04-12 14:17:13 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
Pipelines: map[string]*resources.Pipeline{
|
|
|
|
"pipeline": {
|
2025-02-07 17:22:51 +00:00
|
|
|
CreatePipeline: &pipelines.CreatePipeline{
|
2023-04-12 14:17:13 +00:00
|
|
|
Libraries: []pipelines.PipelineLibrary{
|
|
|
|
{
|
|
|
|
File: &pipelines.FileLibrary{
|
|
|
|
Path: "./my_python_file.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, "resources.jobs", []dyn.Location{{File: filepath.Join(dir, "job/resource.yml")}})
|
|
|
|
bundletest.SetLocation(b, "resources.pipelines", []dyn.Location{{File: filepath.Join(dir, "pipeline/resource.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
require.NoError(t, diags.Error())
|
2023-04-12 14:17:13 +00:00
|
|
|
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"/bundle/job/my_python_file.py",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[0].SparkPythonTask.PythonFile,
|
2023-04-12 14:17:13 +00:00
|
|
|
)
|
2023-09-04 09:55:01 +00:00
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
"job/dist/task.jar",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[1].Libraries[0].Jar,
|
2023-09-04 09:55:01 +00:00
|
|
|
)
|
2023-11-07 20:00:09 +00:00
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"/bundle/job/my_sql_file.sql",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[2].SqlTask.File.Path,
|
2023-11-07 20:00:09 +00:00
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"/bundle/job/my_dbt_project",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[3].DbtTask.ProjectDirectory,
|
2023-11-07 20:00:09 +00:00
|
|
|
)
|
2023-04-12 14:17:13 +00:00
|
|
|
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"/bundle/pipeline/my_python_file.py",
|
2023-11-15 14:03:36 +00:00
|
|
|
b.Config.Resources.Pipelines["pipeline"].Libraries[0].File.Path,
|
2023-04-12 14:17:13 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2024-08-21 15:33:25 +00:00
|
|
|
func TestTranslatePathsOutsideSyncRoot(t *testing.T) {
|
2023-04-12 14:17:13 +00:00
|
|
|
dir := t.TempDir()
|
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-04-12 14:17:13 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Workspace: config.Workspace{
|
2023-11-15 13:37:26 +00:00
|
|
|
FilePath: "/bundle",
|
2023-04-12 14:17:13 +00:00
|
|
|
},
|
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
2023-07-03 09:46:45 +00:00
|
|
|
Tasks: []jobs.Task{
|
2023-04-12 14:17:13 +00:00
|
|
|
{
|
|
|
|
SparkPythonTask: &jobs.SparkPythonTask{
|
|
|
|
PythonFile: "./my_python_file.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "../resource.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
2024-08-21 15:33:25 +00:00
|
|
|
assert.ErrorContains(t, diags.Error(), "is not contained in sync root path")
|
2023-04-12 14:17:13 +00:00
|
|
|
}
|
|
|
|
|
2023-03-21 17:13:16 +00:00
|
|
|
func TestJobNotebookDoesNotExistError(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-03-21 17:13:16 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
2023-07-03 09:46:45 +00:00
|
|
|
Tasks: []jobs.Task{
|
2023-03-21 17:13:16 +00:00
|
|
|
{
|
|
|
|
NotebookTask: &jobs.NotebookTask{
|
|
|
|
NotebookPath: "./doesnt_exist.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
assert.EqualError(t, diags.Error(), "notebook ./doesnt_exist.py not found")
|
2023-04-05 14:02:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestJobFileDoesNotExistError(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-04-05 14:02:17 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
2023-07-03 09:46:45 +00:00
|
|
|
Tasks: []jobs.Task{
|
2023-04-05 14:02:17 +00:00
|
|
|
{
|
|
|
|
SparkPythonTask: &jobs.SparkPythonTask{
|
|
|
|
PythonFile: "./doesnt_exist.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
assert.EqualError(t, diags.Error(), "file ./doesnt_exist.py not found")
|
2023-03-21 17:13:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestPipelineNotebookDoesNotExistError(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-03-21 17:13:16 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Resources: config.Resources{
|
|
|
|
Pipelines: map[string]*resources.Pipeline{
|
|
|
|
"pipeline": {
|
2025-02-07 17:22:51 +00:00
|
|
|
CreatePipeline: &pipelines.CreatePipeline{
|
2023-03-21 17:13:16 +00:00
|
|
|
Libraries: []pipelines.PipelineLibrary{
|
|
|
|
{
|
|
|
|
Notebook: &pipelines.NotebookLibrary{
|
|
|
|
Path: "./doesnt_exist.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
assert.EqualError(t, diags.Error(), "notebook ./doesnt_exist.py not found")
|
2023-03-21 17:13:16 +00:00
|
|
|
}
|
2023-04-12 14:17:13 +00:00
|
|
|
|
2024-11-21 10:51:21 +00:00
|
|
|
func TestPipelineNotebookDoesNotExistErrorWithoutExtension(t *testing.T) {
|
|
|
|
for _, ext := range []string{
|
|
|
|
".py",
|
|
|
|
".r",
|
|
|
|
".scala",
|
|
|
|
".sql",
|
|
|
|
".ipynb",
|
|
|
|
"",
|
|
|
|
} {
|
|
|
|
t.Run("case_"+ext, func(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
|
|
|
|
if ext != "" {
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "foo"+ext))
|
|
|
|
}
|
|
|
|
|
|
|
|
b := &bundle.Bundle{
|
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
|
|
|
Config: config.Root{
|
|
|
|
Resources: config.Resources{
|
|
|
|
Pipelines: map[string]*resources.Pipeline{
|
|
|
|
"pipeline": {
|
2025-02-07 17:22:51 +00:00
|
|
|
CreatePipeline: &pipelines.CreatePipeline{
|
2024-11-21 10:51:21 +00:00
|
|
|
Libraries: []pipelines.PipelineLibrary{
|
|
|
|
{
|
|
|
|
Notebook: &pipelines.NotebookLibrary{
|
|
|
|
Path: "./foo",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}})
|
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
|
|
|
|
if ext == "" {
|
|
|
|
assert.EqualError(t, diags.Error(), `notebook ./foo not found. Local notebook references are expected
|
|
|
|
to contain one of the following file extensions: [.py, .r, .scala, .sql, .ipynb]`)
|
|
|
|
} else {
|
|
|
|
assert.EqualError(t, diags.Error(), fmt.Sprintf(`notebook ./foo not found. Did you mean ./foo%s?
|
|
|
|
Local notebook references are expected to contain one of the following
|
|
|
|
file extensions: [.py, .r, .scala, .sql, .ipynb]`, ext))
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-12 14:17:13 +00:00
|
|
|
func TestPipelineFileDoesNotExistError(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-04-12 14:17:13 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Resources: config.Resources{
|
|
|
|
Pipelines: map[string]*resources.Pipeline{
|
|
|
|
"pipeline": {
|
2025-02-07 17:22:51 +00:00
|
|
|
CreatePipeline: &pipelines.CreatePipeline{
|
2023-04-12 14:17:13 +00:00
|
|
|
Libraries: []pipelines.PipelineLibrary{
|
|
|
|
{
|
|
|
|
File: &pipelines.FileLibrary{
|
|
|
|
Path: "./doesnt_exist.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
assert.EqualError(t, diags.Error(), "file ./doesnt_exist.py not found")
|
2023-04-12 14:17:13 +00:00
|
|
|
}
|
2023-07-12 12:25:00 +00:00
|
|
|
|
|
|
|
func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
touchNotebookFile(t, filepath.Join(dir, "my_notebook.py"))
|
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-07-12 12:25:00 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Workspace: config.Workspace{
|
2023-11-15 13:37:26 +00:00
|
|
|
FilePath: "/bundle",
|
2023-07-12 12:25:00 +00:00
|
|
|
},
|
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
|
|
|
Tasks: []jobs.Task{
|
|
|
|
{
|
|
|
|
SparkPythonTask: &jobs.SparkPythonTask{
|
|
|
|
PythonFile: "./my_notebook.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
assert.ErrorContains(t, diags.Error(), `expected a file for "resources.jobs.job.tasks[0].spark_python_task.python_file" but got a notebook`)
|
2023-07-12 12:25:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestJobNotebookTaskWithFileSourceError(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "my_file.py"))
|
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-07-12 12:25:00 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Workspace: config.Workspace{
|
2023-11-15 13:37:26 +00:00
|
|
|
FilePath: "/bundle",
|
2023-07-12 12:25:00 +00:00
|
|
|
},
|
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
|
|
|
Tasks: []jobs.Task{
|
|
|
|
{
|
|
|
|
NotebookTask: &jobs.NotebookTask{
|
|
|
|
NotebookPath: "./my_file.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
assert.ErrorContains(t, diags.Error(), `expected a notebook for "resources.jobs.job.tasks[0].notebook_task.notebook_path" but got a file`)
|
2023-07-12 12:25:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "my_file.py"))
|
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-07-12 12:25:00 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Workspace: config.Workspace{
|
2023-11-15 13:37:26 +00:00
|
|
|
FilePath: "/bundle",
|
2023-07-12 12:25:00 +00:00
|
|
|
},
|
|
|
|
Resources: config.Resources{
|
|
|
|
Pipelines: map[string]*resources.Pipeline{
|
|
|
|
"pipeline": {
|
2025-02-07 17:22:51 +00:00
|
|
|
CreatePipeline: &pipelines.CreatePipeline{
|
2023-07-12 12:25:00 +00:00
|
|
|
Libraries: []pipelines.PipelineLibrary{
|
|
|
|
{
|
|
|
|
Notebook: &pipelines.NotebookLibrary{
|
|
|
|
Path: "./my_file.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
assert.ErrorContains(t, diags.Error(), `expected a notebook for "resources.pipelines.pipeline.libraries[0].notebook.path" but got a file`)
|
2023-07-12 12:25:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
touchNotebookFile(t, filepath.Join(dir, "my_notebook.py"))
|
|
|
|
|
2023-11-15 14:03:36 +00:00
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2023-07-12 12:25:00 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Workspace: config.Workspace{
|
2023-11-15 13:37:26 +00:00
|
|
|
FilePath: "/bundle",
|
2023-07-12 12:25:00 +00:00
|
|
|
},
|
|
|
|
Resources: config.Resources{
|
|
|
|
Pipelines: map[string]*resources.Pipeline{
|
|
|
|
"pipeline": {
|
2025-02-07 17:22:51 +00:00
|
|
|
CreatePipeline: &pipelines.CreatePipeline{
|
2023-07-12 12:25:00 +00:00
|
|
|
Libraries: []pipelines.PipelineLibrary{
|
|
|
|
{
|
|
|
|
File: &pipelines.FileLibrary{
|
|
|
|
Path: "./my_notebook.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}})
|
Use dynamic configuration model in bundles (#1098)
## Changes
This is a fundamental change to how we load and process bundle
configuration. We now depend on the configuration being represented as a
`dyn.Value`. This representation is functionally equivalent to Go's
`any` (it is variadic) and allows us to capture metadata associated with
a value, such as where it was defined (e.g. file, line, and column). It
also allows us to represent Go's zero values properly (e.g. empty
string, integer equal to 0, or boolean false).
Using this representation allows us to let the configuration model
deviate from the typed structure we have been relying on so far
(`config.Root`). We need to deviate from these types when using
variables for fields that are not a string themselves. For example,
using `${var.num_workers}` for an integer `workers` field was impossible
until now (though not implemented in this change).
The loader for a `dyn.Value` includes functionality to capture any and
all type mismatches between the user-defined configuration and the
expected types. These mismatches can be surfaced as validation errors in
future PRs.
Given that many mutators expect the typed struct to be the source of
truth, this change converts between the dynamic representation and the
typed representation on mutator entry and exit. Existing mutators can
continue to modify the typed representation and these modifications are
reflected in the dynamic representation (see `MarkMutatorEntry` and
`MarkMutatorExit` in `bundle/config/root.go`).
Required changes included in this change:
* The existing interpolation package is removed in favor of
`libs/dyn/dynvar`.
* Functionality to merge job clusters, job tasks, and pipeline clusters
are now all broken out into their own mutators.
To be implemented later:
* Allow variable references for non-string types.
* Surface diagnostics about the configuration provided by the user in
the validation output.
* Some mutators use a resource's configuration file path to resolve
related relative paths. These depend on `bundle/config/paths.Path` being
set and populated through `ConfigureConfigFilePath`. Instead, they
should interact with the dynamically typed configuration directly. Doing
this also unlocks being able to differentiate different base paths used
within a job (e.g. a task override with a relative path defined in a
directory other than the base job).
## Tests
* Existing unit tests pass (some have been modified to accommodate)
* Integration tests pass
2024-02-16 19:41:58 +00:00
|
|
|
|
2024-03-25 14:18:47 +00:00
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
assert.ErrorContains(t, diags.Error(), `expected a file for "resources.pipelines.pipeline.libraries[0].file.path" but got a notebook`)
|
2023-07-12 12:25:00 +00:00
|
|
|
}
|
2024-04-22 11:44:34 +00:00
|
|
|
|
|
|
|
func TestTranslatePathJobEnvironments(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "env1.py"))
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "env2.py"))
|
|
|
|
|
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2024-04-22 11:44:34 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
|
|
|
Environments: []jobs.JobEnvironment{
|
|
|
|
{
|
|
|
|
Spec: &compute.Environment{
|
|
|
|
Dependencies: []string{
|
|
|
|
"./dist/env1.whl",
|
|
|
|
"../dist/env2.whl",
|
|
|
|
"simplejson",
|
|
|
|
"/Workspace/Users/foo@bar.com/test.whl",
|
2024-10-21 11:45:39 +00:00
|
|
|
"--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar",
|
|
|
|
"foobar --extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple",
|
|
|
|
"https://foo@bar.com/packages/pypi/simple",
|
2024-04-22 11:44:34 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, "resources.jobs", []dyn.Location{{File: filepath.Join(dir, "job/resource.yml")}})
|
2024-04-22 11:44:34 +00:00
|
|
|
|
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
require.NoError(t, diags.Error())
|
|
|
|
|
2025-01-17 09:38:01 +00:00
|
|
|
assert.Equal(t, "./job/dist/env1.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[0])
|
|
|
|
assert.Equal(t, "./dist/env2.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[1])
|
2024-04-22 11:44:34 +00:00
|
|
|
assert.Equal(t, "simplejson", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[2])
|
|
|
|
assert.Equal(t, "/Workspace/Users/foo@bar.com/test.whl", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[3])
|
2024-10-21 11:45:39 +00:00
|
|
|
assert.Equal(t, "--extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple foobar", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[4])
|
|
|
|
assert.Equal(t, "foobar --extra-index-url https://name:token@gitlab.com/api/v4/projects/9876/packages/pypi/simple", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[5])
|
|
|
|
assert.Equal(t, "https://foo@bar.com/packages/pypi/simple", b.Config.Resources.Jobs["job"].JobSettings.Environments[0].Spec.Dependencies[6])
|
2024-04-22 11:44:34 +00:00
|
|
|
}
|
2024-07-09 11:12:42 +00:00
|
|
|
|
|
|
|
func TestTranslatePathWithComplexVariables(t *testing.T) {
|
|
|
|
dir := t.TempDir()
|
|
|
|
b := &bundle.Bundle{
|
2024-08-21 15:33:25 +00:00
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
2024-07-09 11:12:42 +00:00
|
|
|
Config: config.Root{
|
|
|
|
Variables: map[string]*variable.Variable{
|
|
|
|
"cluster_libraries": {
|
|
|
|
Type: variable.VariableTypeComplex,
|
|
|
|
Default: [](map[string]string){
|
|
|
|
{
|
|
|
|
"whl": "./local/whl.whl",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
|
|
|
Tasks: []jobs.Task{
|
|
|
|
{
|
|
|
|
TaskKey: "test",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-09-25 16:13:48 +00:00
|
|
|
bundletest.SetLocation(b, "variables", []dyn.Location{{File: filepath.Join(dir, "variables/variables.yml")}})
|
|
|
|
bundletest.SetLocation(b, "resources.jobs", []dyn.Location{{File: filepath.Join(dir, "job/resource.yml")}})
|
2024-07-09 11:12:42 +00:00
|
|
|
|
|
|
|
ctx := context.Background()
|
|
|
|
// Assign the variables to the dynamic configuration.
|
|
|
|
diags := bundle.ApplyFunc(ctx, b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
|
|
|
|
err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
|
|
|
|
p := dyn.MustPathFromString("resources.jobs.job.tasks[0]")
|
|
|
|
return dyn.SetByPath(v, p.Append(dyn.Key("libraries")), dyn.V("${var.cluster_libraries}"))
|
|
|
|
})
|
|
|
|
return diag.FromErr(err)
|
|
|
|
})
|
|
|
|
require.NoError(t, diags.Error())
|
|
|
|
|
Remove bundle.{Seq,If,Defer,newPhase,logString}, switch to regular functions (#2390)
## Changes
- Instead of constructing chains of mutators and then executing them,
execute them directly.
- Remove functionality related to chain-building: Seq, If, Defer,
newPhase, logString.
- Phases become functions that apply the changes directly rather than
construct mutator chains that will be called later.
- Add a helper ApplySeq to call multiple mutators, use it where
Apply+Seq were used before.
This is intended to be a refactoring without functional changes, but
there are a few behaviour changes:
- Since defer() is used to call unlock instead of bundle.Defer()
unlocking will now happen even in case of panics.
- In --debug, the phase names are are still logged once before start of
the phase but each entry no longer has 'seq' or phase name in it.
- The message "Deployment complete!" was printed even if
terraform.Apply() mutator had an error. It no longer does that.
## Motivation
The use of the chains was necessary when mutators were returning a list
of other mutators instead of calling them directly. But that has since
been removed, so now the chain machinery have no purpose anymore.
Use of direct functions simplifies the logic and makes bugs more
apparent and easy to fix.
Other improvements that this unlocks:
- Simpler stacktraces/debugging (breakpoints).
- Use of functions with narrowly scoped API: instead of mutators that
receive full bundle config, we can use focused functions that only deal
with sections they care about prepareGitSettings(currentGitSection) ->
updatedGitSection. This makes the data flow more apparent.
- Parallel computations across mutators (within phase): launch
goroutines fetching data from APIs at the beggining, process them once
they are ready.
## Tests
Existing tests.
2025-02-27 11:41:58 +00:00
|
|
|
diags = bundle.ApplySeq(ctx, b,
|
|
|
|
mutator.SetVariables(),
|
|
|
|
mutator.ResolveVariableReferences("variables"),
|
|
|
|
mutator.TranslatePaths())
|
2024-07-09 11:12:42 +00:00
|
|
|
require.NoError(t, diags.Error())
|
|
|
|
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
"variables/local/whl.whl",
|
2024-07-09 11:12:42 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl,
|
|
|
|
)
|
|
|
|
}
|
2024-11-20 12:22:27 +00:00
|
|
|
|
|
|
|
func TestTranslatePathsWithSourceLinkedDeployment(t *testing.T) {
|
|
|
|
if runtime.GOOS == "windows" {
|
|
|
|
t.Skip("this test is not applicable on Windows because source-linked mode works only in the Databricks Workspace")
|
|
|
|
}
|
|
|
|
|
|
|
|
dir := t.TempDir()
|
|
|
|
touchNotebookFile(t, filepath.Join(dir, "my_job_notebook.py"))
|
|
|
|
touchNotebookFile(t, filepath.Join(dir, "my_pipeline_notebook.py"))
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "my_python_file.py"))
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "dist", "task.jar"))
|
|
|
|
touchEmptyFile(t, filepath.Join(dir, "requirements.txt"))
|
|
|
|
|
|
|
|
enabled := true
|
|
|
|
b := &bundle.Bundle{
|
|
|
|
SyncRootPath: dir,
|
|
|
|
SyncRoot: vfs.MustNew(dir),
|
|
|
|
Config: config.Root{
|
|
|
|
Workspace: config.Workspace{
|
|
|
|
FilePath: "/bundle",
|
|
|
|
},
|
|
|
|
Resources: config.Resources{
|
|
|
|
Jobs: map[string]*resources.Job{
|
|
|
|
"job": {
|
|
|
|
JobSettings: &jobs.JobSettings{
|
|
|
|
Tasks: []jobs.Task{
|
|
|
|
{
|
|
|
|
NotebookTask: &jobs.NotebookTask{
|
|
|
|
NotebookPath: "my_job_notebook.py",
|
|
|
|
},
|
|
|
|
Libraries: []compute.Library{
|
|
|
|
{Whl: "./dist/task.whl"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
NotebookTask: &jobs.NotebookTask{
|
|
|
|
NotebookPath: "/Users/jane.doe@databricks.com/absolute_remote.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
NotebookTask: &jobs.NotebookTask{
|
|
|
|
NotebookPath: "my_job_notebook.py",
|
|
|
|
},
|
|
|
|
Libraries: []compute.Library{
|
|
|
|
{Requirements: "requirements.txt"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
SparkPythonTask: &jobs.SparkPythonTask{
|
|
|
|
PythonFile: "my_python_file.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
SparkJarTask: &jobs.SparkJarTask{
|
|
|
|
MainClassName: "HelloWorld",
|
|
|
|
},
|
|
|
|
Libraries: []compute.Library{
|
|
|
|
{Jar: "./dist/task.jar"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
SparkJarTask: &jobs.SparkJarTask{
|
|
|
|
MainClassName: "HelloWorldRemote",
|
|
|
|
},
|
|
|
|
Libraries: []compute.Library{
|
|
|
|
{Jar: "dbfs:/bundle/dist/task_remote.jar"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
Pipelines: map[string]*resources.Pipeline{
|
|
|
|
"pipeline": {
|
2025-02-07 17:22:51 +00:00
|
|
|
CreatePipeline: &pipelines.CreatePipeline{
|
2024-11-20 12:22:27 +00:00
|
|
|
Libraries: []pipelines.PipelineLibrary{
|
|
|
|
{
|
|
|
|
Notebook: &pipelines.NotebookLibrary{
|
|
|
|
Path: "my_pipeline_notebook.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Notebook: &pipelines.NotebookLibrary{
|
|
|
|
Path: "/Users/jane.doe@databricks.com/absolute_remote.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
File: &pipelines.FileLibrary{
|
|
|
|
Path: "my_python_file.py",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
Presets: config.Presets{
|
|
|
|
SourceLinkedDeployment: &enabled,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}})
|
|
|
|
diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths())
|
|
|
|
require.NoError(t, diags.Error())
|
|
|
|
|
|
|
|
// updated to source path
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
dir+"/my_job_notebook",
|
2024-11-20 12:22:27 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[0].NotebookTask.NotebookPath,
|
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
dir+"/requirements.txt",
|
2024-11-20 12:22:27 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[2].Libraries[0].Requirements,
|
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
dir+"/my_python_file.py",
|
2024-11-20 12:22:27 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[3].SparkPythonTask.PythonFile,
|
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
dir+"/my_pipeline_notebook",
|
2024-11-20 12:22:27 +00:00
|
|
|
b.Config.Resources.Pipelines["pipeline"].Libraries[0].Notebook.Path,
|
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
dir+"/my_python_file.py",
|
2024-11-20 12:22:27 +00:00
|
|
|
b.Config.Resources.Pipelines["pipeline"].Libraries[2].File.Path,
|
|
|
|
)
|
|
|
|
|
|
|
|
// left as is
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
"dist/task.whl",
|
2024-11-20 12:22:27 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[0].Libraries[0].Whl,
|
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"/Users/jane.doe@databricks.com/absolute_remote.py",
|
|
|
|
b.Config.Resources.Jobs["job"].Tasks[1].NotebookTask.NotebookPath,
|
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
2025-01-17 09:38:01 +00:00
|
|
|
"dist/task.jar",
|
2024-11-20 12:22:27 +00:00
|
|
|
b.Config.Resources.Jobs["job"].Tasks[4].Libraries[0].Jar,
|
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"dbfs:/bundle/dist/task_remote.jar",
|
|
|
|
b.Config.Resources.Jobs["job"].Tasks[5].Libraries[0].Jar,
|
|
|
|
)
|
|
|
|
assert.Equal(
|
|
|
|
t,
|
|
|
|
"/Users/jane.doe@databricks.com/absolute_remote.py",
|
|
|
|
b.Config.Resources.Pipelines["pipeline"].Libraries[1].Notebook.Path,
|
|
|
|
)
|
|
|
|
}
|