diff --git a/bundle/config/interpolation/interpolation.go b/bundle/config/interpolation/interpolation.go index 8eab03b4..854a0d2d 100644 --- a/bundle/config/interpolation/interpolation.go +++ b/bundle/config/interpolation/interpolation.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/databricks/bricks/bundle" + "github.com/databricks/bricks/bundle/config/variable" "golang.org/x/exp/maps" "golang.org/x/exp/slices" ) @@ -127,6 +128,13 @@ func (a *accumulator) walk(scope []string, rv reflect.Value, s setter) { case reflect.String: path := strings.Join(scope, Delimiter) a.strings[path] = newStringField(path, anyGetter{rv}, s) + + // register alias for variable value. `var.foo` would be the alias for + // `variables.foo.value` + if len(scope) == 3 && scope[0] == "variables" && scope[2] == "value" { + aliasPath := strings.Join([]string{variable.VariableReferencePrefix, scope[1]}, Delimiter) + a.strings[aliasPath] = a.strings[path] + } case reflect.Struct: a.walkStruct(scope, rv) case reflect.Map: @@ -174,7 +182,7 @@ func (a *accumulator) Resolve(path string, seenPaths []string, fns ...LookupFunc // fetch the string node to resolve field, ok := a.strings[path] if !ok { - return fmt.Errorf("could not find string field with path %s", path) + return fmt.Errorf("could not resolve reference %s", path) } // return early if the string field has no variables to interpolate diff --git a/bundle/config/interpolation/interpolation_test.go b/bundle/config/interpolation/interpolation_test.go index eb5848fd..380fcffe 100644 --- a/bundle/config/interpolation/interpolation_test.go +++ b/bundle/config/interpolation/interpolation_test.go @@ -3,6 +3,8 @@ package interpolation import ( "testing" + "github.com/databricks/bricks/bundle/config" + "github.com/databricks/bricks/bundle/config/variable" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -125,3 +127,70 @@ func TestInterpolationVariableLoopError(t *testing.T) { err := expand(&f) assert.ErrorContains(t, err, "cycle detected in field resolution: b -> c -> d -> b") } + +func TestInterpolationForVariables(t *testing.T) { + foo := "abc" + bar := "${var.foo} def" + apple := "${var.foo} ${var.bar}" + config := config.Root{ + Variables: map[string]*variable.Variable{ + "foo": { + Value: &foo, + }, + "bar": { + Value: &bar, + }, + "apple": { + Value: &apple, + }, + }, + Bundle: config.Bundle{ + Name: "${var.apple} ${var.foo}", + }, + } + + err := expand(&config) + assert.NoError(t, err) + assert.Equal(t, "abc", *(config.Variables["foo"].Value)) + assert.Equal(t, "abc def", *(config.Variables["bar"].Value)) + assert.Equal(t, "abc abc def", *(config.Variables["apple"].Value)) + assert.Equal(t, "abc abc def abc", config.Bundle.Name) +} + +func TestInterpolationLoopForVariables(t *testing.T) { + foo := "${var.bar}" + bar := "${var.foo}" + config := config.Root{ + Variables: map[string]*variable.Variable{ + "foo": { + Value: &foo, + }, + "bar": { + Value: &bar, + }, + }, + Bundle: config.Bundle{ + Name: "${var.foo}", + }, + } + + err := expand(&config) + assert.ErrorContains(t, err, "cycle detected in field resolution: bundle.name -> var.foo -> var.bar -> var.foo") +} + +func TestInterpolationInvalidVariableReference(t *testing.T) { + foo := "abc" + config := config.Root{ + Variables: map[string]*variable.Variable{ + "foo": { + Value: &foo, + }, + }, + Bundle: config.Bundle{ + Name: "${vars.foo}", + }, + } + + err := expand(&config) + assert.ErrorContains(t, err, "could not resolve reference vars.foo") +} diff --git a/bundle/config/mutator/set_variables.go b/bundle/config/mutator/set_variables.go new file mode 100644 index 00000000..397164ef --- /dev/null +++ b/bundle/config/mutator/set_variables.go @@ -0,0 +1,63 @@ +package mutator + +import ( + "context" + "fmt" + "os" + + "github.com/databricks/bricks/bundle" + "github.com/databricks/bricks/bundle/config/variable" +) + +const bundleVarPrefix = "BUNDLE_VAR_" + +type setVariables struct{} + +func SetVariables() bundle.Mutator { + return &setVariables{} +} + +func (m *setVariables) Name() string { + return "SetVariables" +} + +func setVariable(v *variable.Variable, name string) error { + // case: variable already has value initialized, so skip + if v.HasValue() { + return nil + } + + // case: read and set variable value from process environment + envVarName := bundleVarPrefix + name + if val, ok := os.LookupEnv(envVarName); ok { + err := v.Set(val) + if err != nil { + return fmt.Errorf(`failed to assign value "%s" to variable %s from environment variable %s with error: %w`, val, name, envVarName, err) + } + return nil + } + + // case: Set the variable to its default value + if v.HasDefault() { + err := v.Set(*v.Default) + if err != nil { + return fmt.Errorf(`failed to assign default value from config "%s" to variable %s with error: %w`, *v.Default, name, err) + } + return nil + } + + // We should have had a value to set for the variable at this point. + // TODO: use cmdio to request values for unassigned variables if current + // terminal is a tty. Tracked in https://github.com/databricks/bricks/issues/379 + return fmt.Errorf(`no value assigned to required variable %s. Assignment can be done through the "--var" flag or by setting the %s environment variable`, name, bundleVarPrefix+name) +} + +func (m *setVariables) Apply(ctx context.Context, b *bundle.Bundle) ([]bundle.Mutator, error) { + for name, variable := range b.Config.Variables { + err := setVariable(variable, name) + if err != nil { + return nil, err + } + } + return nil, nil +} diff --git a/bundle/config/mutator/set_variables_test.go b/bundle/config/mutator/set_variables_test.go new file mode 100644 index 00000000..b1fe3a6c --- /dev/null +++ b/bundle/config/mutator/set_variables_test.go @@ -0,0 +1,116 @@ +package mutator + +import ( + "context" + "testing" + + "github.com/databricks/bricks/bundle" + "github.com/databricks/bricks/bundle/config" + "github.com/databricks/bricks/bundle/config/variable" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSetVariableFromProcessEnvVar(t *testing.T) { + defaultVal := "default" + variable := variable.Variable{ + Description: "a test variable", + Default: &defaultVal, + } + + // set value for variable as an environment variable + t.Setenv("BUNDLE_VAR_foo", "process-env") + + err := setVariable(&variable, "foo") + require.NoError(t, err) + assert.Equal(t, *variable.Value, "process-env") +} + +func TestSetVariableUsingDefaultValue(t *testing.T) { + defaultVal := "default" + variable := variable.Variable{ + Description: "a test variable", + Default: &defaultVal, + } + + err := setVariable(&variable, "foo") + require.NoError(t, err) + assert.Equal(t, *variable.Value, "default") +} + +func TestSetVariableWhenAlreadyAValueIsAssigned(t *testing.T) { + defaultVal := "default" + val := "assigned-value" + variable := variable.Variable{ + Description: "a test variable", + Default: &defaultVal, + Value: &val, + } + + // since a value is already assigned to the variable, it would not be overridden + // by the default value + err := setVariable(&variable, "foo") + require.NoError(t, err) + assert.Equal(t, *variable.Value, "assigned-value") +} + +func TestSetVariableEnvVarValueDoesNotOverridePresetValue(t *testing.T) { + defaultVal := "default" + val := "assigned-value" + variable := variable.Variable{ + Description: "a test variable", + Default: &defaultVal, + Value: &val, + } + + // set value for variable as an environment variable + t.Setenv("BUNDLE_VAR_foo", "process-env") + + // since a value is already assigned to the variable, it would not be overridden + // by the value from environment + err := setVariable(&variable, "foo") + require.NoError(t, err) + assert.Equal(t, *variable.Value, "assigned-value") +} + +func TestSetVariablesErrorsIfAValueCouldNotBeResolved(t *testing.T) { + variable := variable.Variable{ + Description: "a test variable with no default", + } + + // fails because we could not resolve a value for the variable + err := setVariable(&variable, "foo") + assert.ErrorContains(t, err, "no value assigned to required variable foo. Assignment can be done through the \"--var\" flag or by setting the BUNDLE_VAR_foo environment variable") +} + +func TestSetVariablesMutator(t *testing.T) { + defaultValForA := "default-a" + defaultValForB := "default-b" + valForC := "assigned-val-c" + bundle := &bundle.Bundle{ + Config: config.Root{ + Variables: map[string]*variable.Variable{ + "a": { + Description: "resolved to default value", + Default: &defaultValForA, + }, + "b": { + Description: "resolved from environment vairables", + Default: &defaultValForB, + }, + "c": { + Description: "has already been assigned a value", + Value: &valForC, + }, + }, + }, + } + + t.Setenv("BUNDLE_VAR_b", "env-var-b") + + _, err := SetVariables().Apply(context.Background(), bundle) + require.NoError(t, err) + assert.Equal(t, "default-a", *bundle.Config.Variables["a"].Value) + assert.Equal(t, "env-var-b", *bundle.Config.Variables["b"].Value) + assert.Equal(t, "assigned-val-c", *bundle.Config.Variables["c"].Value) +} diff --git a/bundle/config/root.go b/bundle/config/root.go index 0e7b89e1..e67c8c0a 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -1,9 +1,12 @@ package config import ( + "fmt" "os" "path/filepath" + "strings" + "github.com/databricks/bricks/bundle/config/variable" "github.com/ghodss/yaml" "github.com/imdario/mergo" ) @@ -16,6 +19,9 @@ type Root struct { // It is set when loading `bundle.yml`. Path string `json:"-" bundle:"readonly"` + // Contains user defined variables + Variables map[string]*variable.Variable `json:"variables,omitempty"` + // Bundle contains details about this bundle, such as its name, // version of the spec (TODO), default cluster, default warehouse, etc. Bundle Bundle `json:"bundle"` @@ -79,6 +85,29 @@ func (r *Root) SetConfigFilePath(path string) { } } +// Initializes variables using values passed from the command line flag +// Input has to be a string of the form `foo=bar`. In this case the variable with +// name `foo` is assigned the value `bar` +func (r *Root) InitializeVariables(vars []string) error { + for _, variable := range vars { + parsedVariable := strings.SplitN(variable, "=", 2) + if len(parsedVariable) != 2 { + return fmt.Errorf("unexpected flag value for variable assignment: %s", variable) + } + name := parsedVariable[0] + val := parsedVariable[1] + + if _, ok := r.Variables[name]; !ok { + return fmt.Errorf("variable %s has not been defined", name) + } + err := r.Variables[name].Set(val) + if err != nil { + return fmt.Errorf("failed to assign %s to %s: %s", val, name, err) + } + } + return nil +} + func (r *Root) Load(path string) error { raw, err := os.ReadFile(path) if err != nil { diff --git a/bundle/config/root_test.go b/bundle/config/root_test.go index 7dd13181..5d8240ce 100644 --- a/bundle/config/root_test.go +++ b/bundle/config/root_test.go @@ -5,6 +5,7 @@ import ( "reflect" "testing" + "github.com/databricks/bricks/bundle/config/variable" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -93,3 +94,63 @@ func TestDuplicateIdOnMergeReturnsError(t *testing.T) { err = root.Merge(other) assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)") } + +func TestInitializeVariables(t *testing.T) { + fooDefault := "abc" + root := &Root{ + Variables: map[string]*variable.Variable{ + "foo": { + Default: &fooDefault, + Description: "an optional variable since default is defined", + }, + "bar": { + Description: "a required variable", + }, + }, + } + + err := root.InitializeVariables([]string{"foo=123", "bar=456"}) + assert.NoError(t, err) + assert.Equal(t, "123", *(root.Variables["foo"].Value)) + assert.Equal(t, "456", *(root.Variables["bar"].Value)) +} + +func TestInitializeVariablesWithAnEqualSignInValue(t *testing.T) { + root := &Root{ + Variables: map[string]*variable.Variable{ + "foo": { + Description: "a variable called foo", + }, + }, + } + + err := root.InitializeVariables([]string{"foo=123=567"}) + assert.NoError(t, err) + assert.Equal(t, "123=567", *(root.Variables["foo"].Value)) +} + +func TestInitializeVariablesInvalidFormat(t *testing.T) { + root := &Root{ + Variables: map[string]*variable.Variable{ + "foo": { + Description: "a variable called foo", + }, + }, + } + + err := root.InitializeVariables([]string{"foo"}) + assert.ErrorContains(t, err, "unexpected flag value for variable assignment: foo") +} + +func TestInitializeVariablesUndefinedVariables(t *testing.T) { + root := &Root{ + Variables: map[string]*variable.Variable{ + "foo": { + Description: "A required variable", + }, + }, + } + + err := root.InitializeVariables([]string{"bar=567"}) + assert.ErrorContains(t, err, "variable bar has not been defined") +} diff --git a/bundle/config/variable/variable.go b/bundle/config/variable/variable.go new file mode 100644 index 00000000..68499ee3 --- /dev/null +++ b/bundle/config/variable/variable.go @@ -0,0 +1,45 @@ +package variable + +import ( + "fmt" +) + +const VariableReferencePrefix = "var" + +// An input variable for the bundle config +type Variable struct { + // A default value which then makes the variable optional + Default *string `json:"default,omitempty"` + + // Documentation for this input variable + Description string `json:"description,omitempty"` + + // This field stores the resolved value for the variable. The variable are + // resolved in the following priority order (from highest to lowest) + // + // 1. Command line flag. For example: `--var="foo=bar"` + // 2. Environment variable. eg: BUNDLE_VAR_foo=bar + // 3. default value defined in bundle config + // 4. Throw error, since if no default value is defined, then the variable + // is required + Value *string `json:"value,omitempty" bundle:"readonly"` +} + +// True if the variable has been assigned a default value. Variables without a +// a default value are by defination required +func (v *Variable) HasDefault() bool { + return v.Default != nil +} + +// True if variable has already been assigned a value +func (v *Variable) HasValue() bool { + return v.Value != nil +} + +func (v *Variable) Set(val string) error { + if v.HasValue() { + return fmt.Errorf("variable has already been assigned value: %s", *v.Value) + } + v.Value = &val + return nil +} diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 176cacf7..e4053660 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -4,6 +4,7 @@ import ( "github.com/databricks/bricks/bundle" "github.com/databricks/bricks/bundle/config/interpolation" "github.com/databricks/bricks/bundle/config/mutator" + "github.com/databricks/bricks/bundle/config/variable" "github.com/databricks/bricks/bundle/deploy/terraform" ) @@ -18,9 +19,11 @@ func Initialize() bundle.Mutator { mutator.DefineDefaultWorkspaceRoot(), mutator.ExpandWorkspaceRoot(), mutator.DefineDefaultWorkspacePaths(), + mutator.SetVariables(), interpolation.Interpolate( interpolation.IncludeLookupsInPath("bundle"), interpolation.IncludeLookupsInPath("workspace"), + interpolation.IncludeLookupsInPath(variable.VariableReferencePrefix), ), mutator.TranslatePaths(), terraform.Initialize(), diff --git a/bundle/tests/variables/bundle.yml b/bundle/tests/variables/bundle.yml new file mode 100644 index 00000000..d80c4c05 --- /dev/null +++ b/bundle/tests/variables/bundle.yml @@ -0,0 +1,10 @@ +variables: + a: + description: optional variable + default: abc + + b: + description: required variable + +bundle: + name: ${var.a} ${var.b} diff --git a/bundle/tests/variables_test.go b/bundle/tests/variables_test.go new file mode 100644 index 00000000..6ddbc5d7 --- /dev/null +++ b/bundle/tests/variables_test.go @@ -0,0 +1,35 @@ +package config_tests + +import ( + "context" + "testing" + + "github.com/databricks/bricks/bundle" + "github.com/databricks/bricks/bundle/config/interpolation" + "github.com/databricks/bricks/bundle/config/mutator" + "github.com/databricks/bricks/bundle/config/variable" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestVariables(t *testing.T) { + t.Setenv("BUNDLE_VAR_b", "def") + b := load(t, "./variables") + err := bundle.Apply(context.Background(), b, []bundle.Mutator{ + mutator.SetVariables(), + interpolation.Interpolate( + interpolation.IncludeLookupsInPath(variable.VariableReferencePrefix), + )}) + require.NoError(t, err) + assert.Equal(t, "abc def", b.Config.Bundle.Name) +} + +func TestVariablesLoadingFailsWhenRequiredVariableIsNotSpecified(t *testing.T) { + b := load(t, "./variables") + err := bundle.Apply(context.Background(), b, []bundle.Mutator{ + mutator.SetVariables(), + interpolation.Interpolate( + interpolation.IncludeLookupsInPath(variable.VariableReferencePrefix), + )}) + assert.ErrorContains(t, err, "no value assigned to required variable b. Assignment can be done through the \"--var\" flag or by setting the BUNDLE_VAR_b environment variable") +} diff --git a/cmd/bundle/debug/whoami.go b/cmd/bundle/debug/whoami.go index b3401261..a082adac 100644 --- a/cmd/bundle/debug/whoami.go +++ b/cmd/bundle/debug/whoami.go @@ -4,14 +4,14 @@ import ( "fmt" "github.com/databricks/bricks/bundle" - "github.com/databricks/bricks/cmd/root" + bundleCmd "github.com/databricks/bricks/cmd/bundle" "github.com/spf13/cobra" ) var whoamiCmd = &cobra.Command{ Use: "whoami", - PreRunE: root.MustConfigureBundle, + PreRunE: bundleCmd.ConfigureBundleWithVariables, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() w := bundle.Get(ctx).WorkspaceClient() diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index c96da816..5c31b045 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -3,7 +3,6 @@ package bundle import ( "github.com/databricks/bricks/bundle" "github.com/databricks/bricks/bundle/phases" - "github.com/databricks/bricks/cmd/root" "github.com/spf13/cobra" ) @@ -11,7 +10,7 @@ var deployCmd = &cobra.Command{ Use: "deploy", Short: "Deploy bundle", - PreRunE: root.MustConfigureBundle, + PreRunE: ConfigureBundleWithVariables, RunE: func(cmd *cobra.Command, args []string) error { b := bundle.Get(cmd.Context()) diff --git a/cmd/bundle/destroy.go b/cmd/bundle/destroy.go index ce018d80..2a1e4fb9 100644 --- a/cmd/bundle/destroy.go +++ b/cmd/bundle/destroy.go @@ -6,7 +6,6 @@ import ( "github.com/databricks/bricks/bundle" "github.com/databricks/bricks/bundle/phases" - "github.com/databricks/bricks/cmd/root" "github.com/databricks/bricks/libs/cmdio" "github.com/databricks/bricks/libs/flags" "github.com/spf13/cobra" @@ -17,7 +16,7 @@ var destroyCmd = &cobra.Command{ Use: "destroy", Short: "Destroy deployed bundle resources", - PreRunE: root.MustConfigureBundle, + PreRunE: ConfigureBundleWithVariables, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() b := bundle.Get(ctx) diff --git a/cmd/bundle/root.go b/cmd/bundle/root.go index a21e3da0..a1722263 100644 --- a/cmd/bundle/root.go +++ b/cmd/bundle/root.go @@ -15,6 +15,9 @@ func AddCommand(cmd *cobra.Command) { rootCmd.AddCommand(cmd) } +var variables []string + func init() { root.RootCmd.AddCommand(rootCmd) + AddVariableFlag(rootCmd) } diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 620f2b68..0724d84b 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -20,7 +20,7 @@ var runCmd = &cobra.Command{ Short: "Run a workload (e.g. a job or a pipeline)", Args: cobra.ExactArgs(1), - PreRunE: root.MustConfigureBundle, + PreRunE: ConfigureBundleWithVariables, RunE: func(cmd *cobra.Command, args []string) error { b := bundle.Get(cmd.Context()) err := bundle.Apply(cmd.Context(), b, []bundle.Mutator{ diff --git a/cmd/bundle/sync.go b/cmd/bundle/sync.go index 6888cd4c..e6d9812b 100644 --- a/cmd/bundle/sync.go +++ b/cmd/bundle/sync.go @@ -6,7 +6,6 @@ import ( "github.com/databricks/bricks/bundle" "github.com/databricks/bricks/bundle/phases" - "github.com/databricks/bricks/cmd/root" "github.com/databricks/bricks/libs/log" "github.com/databricks/bricks/libs/sync" "github.com/spf13/cobra" @@ -35,7 +34,7 @@ var syncCmd = &cobra.Command{ Short: "Synchronize bundle tree to the workspace", Args: cobra.NoArgs, - PreRunE: root.MustConfigureBundle, + PreRunE: ConfigureBundleWithVariables, RunE: func(cmd *cobra.Command, args []string) error { b := bundle.Get(cmd.Context()) diff --git a/cmd/bundle/validate.go b/cmd/bundle/validate.go index 58a7d58f..a55ff713 100644 --- a/cmd/bundle/validate.go +++ b/cmd/bundle/validate.go @@ -5,7 +5,6 @@ import ( "github.com/databricks/bricks/bundle" "github.com/databricks/bricks/bundle/phases" - "github.com/databricks/bricks/cmd/root" "github.com/spf13/cobra" ) @@ -13,9 +12,10 @@ var validateCmd = &cobra.Command{ Use: "validate", Short: "Validate configuration", - PreRunE: root.MustConfigureBundle, + PreRunE: ConfigureBundleWithVariables, RunE: func(cmd *cobra.Command, args []string) error { b := bundle.Get(cmd.Context()) + err := bundle.Apply(cmd.Context(), b, []bundle.Mutator{ phases.Initialize(), }) diff --git a/cmd/bundle/variables.go b/cmd/bundle/variables.go new file mode 100644 index 00000000..db87fd3e --- /dev/null +++ b/cmd/bundle/variables.go @@ -0,0 +1,23 @@ +package bundle + +import ( + "github.com/databricks/bricks/bundle" + "github.com/databricks/bricks/cmd/root" + "github.com/spf13/cobra" +) + +func ConfigureBundleWithVariables(cmd *cobra.Command, args []string) error { + // Load bundle config and apply environment + err := root.MustConfigureBundle(cmd, args) + if err != nil { + return err + } + + // Initialize variables by assigning them values passed as command line flags + b := bundle.Get(cmd.Context()) + return b.Config.InitializeVariables(variables) +} + +func AddVariableFlag(cmd *cobra.Command) { + cmd.PersistentFlags().StringSliceVar(&variables, "var", []string{}, `set values for variables defined in bundle config. Example: --var="foo=bar"`) +}