databricks-cli/bundle/config/root_test.go

122 lines
3.3 KiB
Go
Raw Permalink Normal View History

package config
import (
"encoding/json"
"reflect"
"testing"
"github.com/databricks/cli/bundle/config/variable"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestRootMarshalUnmarshal(t *testing.T) {
// Marshal empty
buf, err := json.Marshal(&Root{})
require.NoError(t, err)
// Unmarshal empty
var root Root
err = json.Unmarshal(buf, &root)
require.NoError(t, err)
// Compare
assert.True(t, reflect.DeepEqual(Root{}, root))
}
func TestRootLoad(t *testing.T) {
root, err := Load("../tests/basic/databricks.yml")
require.NoError(t, err)
assert.Equal(t, "basic", root.Bundle.Name)
}
func TestDuplicateIdOnLoadReturnsError(t *testing.T) {
_, err := Load("./testdata/duplicate_resource_names_in_root/databricks.yml")
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/databricks.yml, pipeline at ./testdata/duplicate_resource_names_in_root/databricks.yml)")
}
func TestDuplicateIdOnMergeReturnsError(t *testing.T) {
root, err := Load("./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml")
require.NoError(t, err)
other, err := Load("./testdata/duplicate_resource_name_in_subconfiguration/resources.yml")
require.NoError(t, err)
err = root.Merge(other)
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)")
}
func TestInitializeVariables(t *testing.T) {
fooDefault := "abc"
root := &Root{
Variables: map[string]*variable.Variable{
"foo": {
Default: &fooDefault,
Description: "an optional variable since default is defined",
},
"bar": {
Description: "a required variable",
},
},
}
err := root.InitializeVariables([]string{"foo=123", "bar=456"})
assert.NoError(t, err)
assert.Equal(t, "123", *(root.Variables["foo"].Value))
assert.Equal(t, "456", *(root.Variables["bar"].Value))
}
func TestInitializeVariablesWithAnEqualSignInValue(t *testing.T) {
root := &Root{
Variables: map[string]*variable.Variable{
"foo": {
Description: "a variable called foo",
},
},
}
err := root.InitializeVariables([]string{"foo=123=567"})
assert.NoError(t, err)
assert.Equal(t, "123=567", *(root.Variables["foo"].Value))
}
func TestInitializeVariablesInvalidFormat(t *testing.T) {
root := &Root{
Variables: map[string]*variable.Variable{
"foo": {
Description: "a variable called foo",
},
},
}
err := root.InitializeVariables([]string{"foo"})
assert.ErrorContains(t, err, "unexpected flag value for variable assignment: foo")
}
func TestInitializeVariablesUndefinedVariables(t *testing.T) {
root := &Root{
Variables: map[string]*variable.Variable{
"foo": {
Description: "A required variable",
},
},
}
err := root.InitializeVariables([]string{"bar=567"})
assert.ErrorContains(t, err, "variable bar has not been defined")
}
Add development runs (#522) This implements the "development run" functionality that we desire for DABs in the workspace / IDE. ## bundle.yml changes In bundle.yml, there should be a "dev" environment that is marked as `mode: debug`: ``` environments: dev: default: true mode: development # future accepted values might include pull_request, production ``` Setting `mode` to `development` indicates that this environment is used just for running things for development. This results in several changes to deployed assets: * All assets will get '[dev]' in their name and will get a 'dev' tag * All assets will be hidden from the list of assets (future work; e.g. for jobs we would have a special job_type that hides it from the list) * All deployed assets will be ephemeral (future work, we need some form of garbage collection) * Pipelines will be marked as 'development: true' * Jobs can run on development compute through the `--compute` parameter in the CLI * Jobs get their schedule / triggers paused * Jobs get concurrent runs (it's really annoying if your runs get skipped because the last run was still in progress) Other accepted values for `mode` are `default` (which does nothing) and `pull-request` (which is reserved for future use). ## CLI changes To run a single job called "shark_sighting" on existing compute, use the following commands: ``` $ databricks bundle deploy --compute 0617-201942-9yd9g8ix $ databricks bundle run shark_sighting ``` which would deploy and run a job called "[dev] shark_sightings" on the compute provided. Note that `--compute` is not accepted in production environments, so we show an error if `mode: development` is not used. The `run --deploy` command offers a convenient shorthand for the common combination of deploying & running: ``` $ export DATABRICKS_COMPUTE=0617-201942-9yd9g8ix $ bundle run --deploy shark_sightings ``` The `--deploy` addition isn't really essential and I welcome feedback 🤔 I played with the idea of a "debug" or "dev" command but that seemed to only make the option space even broader for users. The above could work well with an IDE or workspace that automatically sets the target compute. One more thing I added is`run --no-wait` can now be used to run something without waiting for it to be completed (useful for IDE-like environments that can display progress themselves). ``` $ bundle run --deploy shark_sightings --no-wait ```
2023-07-12 06:51:54 +00:00
func TestRootMergeTargetOverridesWithMode(t *testing.T) {
Add development runs (#522) This implements the "development run" functionality that we desire for DABs in the workspace / IDE. ## bundle.yml changes In bundle.yml, there should be a "dev" environment that is marked as `mode: debug`: ``` environments: dev: default: true mode: development # future accepted values might include pull_request, production ``` Setting `mode` to `development` indicates that this environment is used just for running things for development. This results in several changes to deployed assets: * All assets will get '[dev]' in their name and will get a 'dev' tag * All assets will be hidden from the list of assets (future work; e.g. for jobs we would have a special job_type that hides it from the list) * All deployed assets will be ephemeral (future work, we need some form of garbage collection) * Pipelines will be marked as 'development: true' * Jobs can run on development compute through the `--compute` parameter in the CLI * Jobs get their schedule / triggers paused * Jobs get concurrent runs (it's really annoying if your runs get skipped because the last run was still in progress) Other accepted values for `mode` are `default` (which does nothing) and `pull-request` (which is reserved for future use). ## CLI changes To run a single job called "shark_sighting" on existing compute, use the following commands: ``` $ databricks bundle deploy --compute 0617-201942-9yd9g8ix $ databricks bundle run shark_sighting ``` which would deploy and run a job called "[dev] shark_sightings" on the compute provided. Note that `--compute` is not accepted in production environments, so we show an error if `mode: development` is not used. The `run --deploy` command offers a convenient shorthand for the common combination of deploying & running: ``` $ export DATABRICKS_COMPUTE=0617-201942-9yd9g8ix $ bundle run --deploy shark_sightings ``` The `--deploy` addition isn't really essential and I welcome feedback 🤔 I played with the idea of a "debug" or "dev" command but that seemed to only make the option space even broader for users. The above could work well with an IDE or workspace that automatically sets the target compute. One more thing I added is`run --no-wait` can now be used to run something without waiting for it to be completed (useful for IDE-like environments that can display progress themselves). ``` $ bundle run --deploy shark_sightings --no-wait ```
2023-07-12 06:51:54 +00:00
root := &Root{
Bundle: Bundle{},
Use dynamic configuration model in bundles (#1098) ## Changes This is a fundamental change to how we load and process bundle configuration. We now depend on the configuration being represented as a `dyn.Value`. This representation is functionally equivalent to Go's `any` (it is variadic) and allows us to capture metadata associated with a value, such as where it was defined (e.g. file, line, and column). It also allows us to represent Go's zero values properly (e.g. empty string, integer equal to 0, or boolean false). Using this representation allows us to let the configuration model deviate from the typed structure we have been relying on so far (`config.Root`). We need to deviate from these types when using variables for fields that are not a string themselves. For example, using `${var.num_workers}` for an integer `workers` field was impossible until now (though not implemented in this change). The loader for a `dyn.Value` includes functionality to capture any and all type mismatches between the user-defined configuration and the expected types. These mismatches can be surfaced as validation errors in future PRs. Given that many mutators expect the typed struct to be the source of truth, this change converts between the dynamic representation and the typed representation on mutator entry and exit. Existing mutators can continue to modify the typed representation and these modifications are reflected in the dynamic representation (see `MarkMutatorEntry` and `MarkMutatorExit` in `bundle/config/root.go`). Required changes included in this change: * The existing interpolation package is removed in favor of `libs/dyn/dynvar`. * Functionality to merge job clusters, job tasks, and pipeline clusters are now all broken out into their own mutators. To be implemented later: * Allow variable references for non-string types. * Surface diagnostics about the configuration provided by the user in the validation output. * Some mutators use a resource's configuration file path to resolve related relative paths. These depend on `bundle/config/paths.Path` being set and populated through `ConfigureConfigFilePath`. Instead, they should interact with the dynamically typed configuration directly. Doing this also unlocks being able to differentiate different base paths used within a job (e.g. a task override with a relative path defined in a directory other than the base job). ## Tests * Existing unit tests pass (some have been modified to accommodate) * Integration tests pass
2024-02-16 19:41:58 +00:00
Targets: map[string]*Target{
"development": {
Mode: Development,
},
},
Add development runs (#522) This implements the "development run" functionality that we desire for DABs in the workspace / IDE. ## bundle.yml changes In bundle.yml, there should be a "dev" environment that is marked as `mode: debug`: ``` environments: dev: default: true mode: development # future accepted values might include pull_request, production ``` Setting `mode` to `development` indicates that this environment is used just for running things for development. This results in several changes to deployed assets: * All assets will get '[dev]' in their name and will get a 'dev' tag * All assets will be hidden from the list of assets (future work; e.g. for jobs we would have a special job_type that hides it from the list) * All deployed assets will be ephemeral (future work, we need some form of garbage collection) * Pipelines will be marked as 'development: true' * Jobs can run on development compute through the `--compute` parameter in the CLI * Jobs get their schedule / triggers paused * Jobs get concurrent runs (it's really annoying if your runs get skipped because the last run was still in progress) Other accepted values for `mode` are `default` (which does nothing) and `pull-request` (which is reserved for future use). ## CLI changes To run a single job called "shark_sighting" on existing compute, use the following commands: ``` $ databricks bundle deploy --compute 0617-201942-9yd9g8ix $ databricks bundle run shark_sighting ``` which would deploy and run a job called "[dev] shark_sightings" on the compute provided. Note that `--compute` is not accepted in production environments, so we show an error if `mode: development` is not used. The `run --deploy` command offers a convenient shorthand for the common combination of deploying & running: ``` $ export DATABRICKS_COMPUTE=0617-201942-9yd9g8ix $ bundle run --deploy shark_sightings ``` The `--deploy` addition isn't really essential and I welcome feedback 🤔 I played with the idea of a "debug" or "dev" command but that seemed to only make the option space even broader for users. The above could work well with an IDE or workspace that automatically sets the target compute. One more thing I added is`run --no-wait` can now be used to run something without waiting for it to be completed (useful for IDE-like environments that can display progress themselves). ``` $ bundle run --deploy shark_sightings --no-wait ```
2023-07-12 06:51:54 +00:00
}
Use dynamic configuration model in bundles (#1098) ## Changes This is a fundamental change to how we load and process bundle configuration. We now depend on the configuration being represented as a `dyn.Value`. This representation is functionally equivalent to Go's `any` (it is variadic) and allows us to capture metadata associated with a value, such as where it was defined (e.g. file, line, and column). It also allows us to represent Go's zero values properly (e.g. empty string, integer equal to 0, or boolean false). Using this representation allows us to let the configuration model deviate from the typed structure we have been relying on so far (`config.Root`). We need to deviate from these types when using variables for fields that are not a string themselves. For example, using `${var.num_workers}` for an integer `workers` field was impossible until now (though not implemented in this change). The loader for a `dyn.Value` includes functionality to capture any and all type mismatches between the user-defined configuration and the expected types. These mismatches can be surfaced as validation errors in future PRs. Given that many mutators expect the typed struct to be the source of truth, this change converts between the dynamic representation and the typed representation on mutator entry and exit. Existing mutators can continue to modify the typed representation and these modifications are reflected in the dynamic representation (see `MarkMutatorEntry` and `MarkMutatorExit` in `bundle/config/root.go`). Required changes included in this change: * The existing interpolation package is removed in favor of `libs/dyn/dynvar`. * Functionality to merge job clusters, job tasks, and pipeline clusters are now all broken out into their own mutators. To be implemented later: * Allow variable references for non-string types. * Surface diagnostics about the configuration provided by the user in the validation output. * Some mutators use a resource's configuration file path to resolve related relative paths. These depend on `bundle/config/paths.Path` being set and populated through `ConfigureConfigFilePath`. Instead, they should interact with the dynamically typed configuration directly. Doing this also unlocks being able to differentiate different base paths used within a job (e.g. a task override with a relative path defined in a directory other than the base job). ## Tests * Existing unit tests pass (some have been modified to accommodate) * Integration tests pass
2024-02-16 19:41:58 +00:00
root.initializeDynamicValue()
require.NoError(t, root.MergeTargetOverrides("development"))
Add development runs (#522) This implements the "development run" functionality that we desire for DABs in the workspace / IDE. ## bundle.yml changes In bundle.yml, there should be a "dev" environment that is marked as `mode: debug`: ``` environments: dev: default: true mode: development # future accepted values might include pull_request, production ``` Setting `mode` to `development` indicates that this environment is used just for running things for development. This results in several changes to deployed assets: * All assets will get '[dev]' in their name and will get a 'dev' tag * All assets will be hidden from the list of assets (future work; e.g. for jobs we would have a special job_type that hides it from the list) * All deployed assets will be ephemeral (future work, we need some form of garbage collection) * Pipelines will be marked as 'development: true' * Jobs can run on development compute through the `--compute` parameter in the CLI * Jobs get their schedule / triggers paused * Jobs get concurrent runs (it's really annoying if your runs get skipped because the last run was still in progress) Other accepted values for `mode` are `default` (which does nothing) and `pull-request` (which is reserved for future use). ## CLI changes To run a single job called "shark_sighting" on existing compute, use the following commands: ``` $ databricks bundle deploy --compute 0617-201942-9yd9g8ix $ databricks bundle run shark_sighting ``` which would deploy and run a job called "[dev] shark_sightings" on the compute provided. Note that `--compute` is not accepted in production environments, so we show an error if `mode: development` is not used. The `run --deploy` command offers a convenient shorthand for the common combination of deploying & running: ``` $ export DATABRICKS_COMPUTE=0617-201942-9yd9g8ix $ bundle run --deploy shark_sightings ``` The `--deploy` addition isn't really essential and I welcome feedback 🤔 I played with the idea of a "debug" or "dev" command but that seemed to only make the option space even broader for users. The above could work well with an IDE or workspace that automatically sets the target compute. One more thing I added is`run --no-wait` can now be used to run something without waiting for it to be completed (useful for IDE-like environments that can display progress themselves). ``` $ bundle run --deploy shark_sightings --no-wait ```
2023-07-12 06:51:54 +00:00
assert.Equal(t, Development, root.Bundle.Mode)
}