From 72030844c52f9499b10e7158544a80fde56451f3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 4 Sep 2024 19:16:40 +0200 Subject: [PATCH 1/3] Fixed variable override in target with full variable syntax (#1749) ## Changes This PR makes sure that both of this override syntax for variables work correctly ``` targets: dev: variables: cluster1: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false cluster2: default: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false ``` ## Tests Added regression test --------- Co-authored-by: Pieter Noordhuis --- bundle/config/root.go | 43 ++++++++++++++----- bundle/tests/complex_variables_test.go | 11 ++--- .../complex_multiple_files/databricks.yml | 43 +++++++++++++++++-- .../variables/clusters.yml | 10 ++++- 4 files changed, 86 insertions(+), 21 deletions(-) diff --git a/bundle/config/root.go b/bundle/config/root.go index 281c5c2a..46578769 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -406,6 +406,30 @@ func (r *Root) MergeTargetOverrides(name string) error { return r.updateWithDynamicValue(root) } +var variableKeywords = []string{"default", "lookup"} + +// isFullVariableOverrideDef checks if the given value is a full syntax varaible override. +// A full syntax variable override is a map with only one of the following +// keys: "default", "lookup". +func isFullVariableOverrideDef(v dyn.Value) bool { + mv, ok := v.AsMap() + if !ok { + return false + } + + if mv.Len() != 1 { + return false + } + + for _, keyword := range variableKeywords { + if _, ok := mv.GetByString(keyword); ok { + return true + } + } + + return false +} + // rewriteShorthands performs lightweight rewriting of the configuration // tree where we allow users to write a shorthand and must rewrite to the full form. func rewriteShorthands(v dyn.Value) (dyn.Value, error) { @@ -433,30 +457,27 @@ func rewriteShorthands(v dyn.Value) (dyn.Value, error) { }, variable.Locations()), nil case dyn.KindMap, dyn.KindSequence: - lookup, err := dyn.Get(variable, "lookup") - // If lookup is set, we don't want to rewrite the variable and return it as is. - if err == nil && lookup.Kind() != dyn.KindInvalid { + // If it's a full variable definition, leave it as is. + if isFullVariableOverrideDef(variable) { return variable, nil } // Check if the original definition of variable has a type field. + // If it has a type field, it means the shorthand is a value of a complex type. // Type might not be found if the variable overriden in a separate file // and configuration is not merged yet. typeV, err := dyn.GetByPath(v, p.Append(dyn.Key("type"))) - if err != nil { - return dyn.NewValue(map[string]dyn.Value{ - "default": variable, - }, variable.Locations()), nil - } - - if typeV.MustString() == "complex" { + if err == nil && typeV.MustString() == "complex" { return dyn.NewValue(map[string]dyn.Value{ "type": typeV, "default": variable, }, variable.Locations()), nil } - return variable, nil + // If it's a shorthand, rewrite it to a full variable definition. + return dyn.NewValue(map[string]dyn.Value{ + "default": variable, + }, variable.Locations()), nil default: return variable, nil diff --git a/bundle/tests/complex_variables_test.go b/bundle/tests/complex_variables_test.go index d46d8d8c..6371071c 100644 --- a/bundle/tests/complex_variables_test.go +++ b/bundle/tests/complex_variables_test.go @@ -81,9 +81,10 @@ func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) { ), )) require.NoError(t, diags.Error()) - - require.Equal(t, "14.2.x-scala2.11", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkVersion) - require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NodeTypeId) - require.Equal(t, 4, b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NumWorkers) - require.Equal(t, "false", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.speculation"]) + for _, cluster := range b.Config.Resources.Jobs["my_job"].JobClusters { + require.Equalf(t, "14.2.x-scala2.11", cluster.NewCluster.SparkVersion, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, "Standard_DS3_v2", cluster.NewCluster.NodeTypeId, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, 4, cluster.NewCluster.NumWorkers, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, "false", cluster.NewCluster.SparkConf["spark.speculation"], "cluster: %v", cluster.JobClusterKey) + } } diff --git a/bundle/tests/variables/complex_multiple_files/databricks.yml b/bundle/tests/variables/complex_multiple_files/databricks.yml index cb5d2439..42a82c61 100644 --- a/bundle/tests/variables/complex_multiple_files/databricks.yml +++ b/bundle/tests/variables/complex_multiple_files/databricks.yml @@ -5,13 +5,48 @@ resources: jobs: my_job: job_clusters: - - job_cluster_key: key - new_cluster: ${var.cluster} - + - job_cluster_key: key1 + new_cluster: ${var.cluster1} + - job_cluster_key: key2 + new_cluster: ${var.cluster2} + - job_cluster_key: key3 + new_cluster: ${var.cluster3} + - job_cluster_key: key4 + new_cluster: ${var.cluster4} variables: - cluster: + cluster1: + type: complex + description: "A cluster definition" + cluster2: + type: complex + description: "A cluster definition" + cluster3: + type: complex + description: "A cluster definition" + cluster4: type: complex description: "A cluster definition" include: - ./variables/*.yml + + +targets: + default: + dev: + variables: + cluster3: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false + cluster4: + default: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false diff --git a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml index badd4516..0186c437 100644 --- a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml +++ b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml @@ -2,10 +2,18 @@ targets: default: dev: variables: - cluster: + cluster1: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false + cluster2: + default: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false From f71d9e76499f9296daad620f6efed651b9864679 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 5 Sep 2024 10:56:52 +0200 Subject: [PATCH 2/3] [Release] Release v0.228.0 (#1752) CLI: * Do not error if we cannot prompt for a profile in `auth login` ([#1745](https://github.com/databricks/cli/pull/1745)). Bundles: As of this release CLI will show a prompt is if there are configuration changes which will lead to a DLT recreation. Users can skip the prompt by specifying the `--auto-approve` flag * Pass along $AZURE_CONFIG_FILE to Terraform process ([#1734](https://github.com/databricks/cli/pull/1734)). * Add prompt when a pipeline recreation happens ([#1672](https://github.com/databricks/cli/pull/1672)). * Use materialized views in the default-sql template ([#1709](https://github.com/databricks/cli/pull/1709)). * Update templates to latest LTS DBR ([#1715](https://github.com/databricks/cli/pull/1715)). * Make lock optional in the JSON schema ([#1738](https://github.com/databricks/cli/pull/1738)). * Do not suppress normalisation diagnostics for resolving variables ([#1740](https://github.com/databricks/cli/pull/1740)). * Include a permissions section in all templates ([#1713](https://github.com/databricks/cli/pull/1713)). * Fixed complex variables are not being correctly merged from include files ([#1746](https://github.com/databricks/cli/pull/1746)). * Fixed variable override in target with full variable syntax ([#1749](https://github.com/databricks/cli/pull/1749)). Internal: * Consider serverless clusters as compatible for Python wheel tasks ([#1733](https://github.com/databricks/cli/pull/1733)). * PythonMutator: explain missing package error ([#1736](https://github.com/databricks/cli/pull/1736)). * Add `dyn.Time` to box a timestamp with its original string value ([#1732](https://github.com/databricks/cli/pull/1732)). * Fix streaming of stdout, stdin, stderr in cobra test runner ([#1742](https://github.com/databricks/cli/pull/1742)). Dependency updates: * Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 ([#1741](https://github.com/databricks/cli/pull/1741)). --------- Co-authored-by: Pieter Noordhuis --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fac7d597..d6383125 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Version changelog +## [Release] Release v0.228.0 + +CLI: + * Do not error if we cannot prompt for a profile in `auth login` ([#1745](https://github.com/databricks/cli/pull/1745)). + +Bundles: + +As of this release, the CLI will show a prompt if there are configuration changes that lead to DLT pipeline recreation. +Users can skip the prompt by specifying the `--auto-approve` flag. + + * Pass along to Terraform process ([#1734](https://github.com/databricks/cli/pull/1734)). + * Add prompt when a pipeline recreation happens ([#1672](https://github.com/databricks/cli/pull/1672)). + * Use materialized views in the default-sql template ([#1709](https://github.com/databricks/cli/pull/1709)). + * Update templates to latest LTS DBR ([#1715](https://github.com/databricks/cli/pull/1715)). + * Make lock optional in the JSON schema ([#1738](https://github.com/databricks/cli/pull/1738)). + * Do not suppress normalisation diagnostics for resolving variables ([#1740](https://github.com/databricks/cli/pull/1740)). + * Include a permissions section in all templates ([#1713](https://github.com/databricks/cli/pull/1713)). + * Fixed complex variables are not being correctly merged from include files ([#1746](https://github.com/databricks/cli/pull/1746)). + * Fixed variable override in target with full variable syntax ([#1749](https://github.com/databricks/cli/pull/1749)). + +Internal: + * Consider serverless clusters as compatible for Python wheel tasks ([#1733](https://github.com/databricks/cli/pull/1733)). + * PythonMutator: explain missing package error ([#1736](https://github.com/databricks/cli/pull/1736)). + * Add `dyn.Time` to box a timestamp with its original string value ([#1732](https://github.com/databricks/cli/pull/1732)). + * Fix streaming of stdout, stdin, stderr in cobra test runner ([#1742](https://github.com/databricks/cli/pull/1742)). + +Dependency updates: + * Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 ([#1741](https://github.com/databricks/cli/pull/1741)). + ## [Release] Release v0.227.1 CLI: From ceefa80d7278eeef9cce500b2c69453da2040f40 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 5 Sep 2024 13:05:16 +0200 Subject: [PATCH 3/3] Pass copy of `dyn.Path` to callback function (#1747) ## Changes Some call sites hold on to the `dyn.Path` provided to them by the callback. It must therefore never be mutated after the callback returns, or these mutations leak out into unknown scope. This change means it is no longer possible for this failure mode to happen. ## Tests Unit test. --- bundle/artifacts/expand_globs.go | 7 +--- .../config/validate/unique_resource_keys.go | 6 +--- bundle/libraries/expand_glob_references.go | 8 ++--- bundle/libraries/upload.go | 2 +- libs/dyn/visit.go | 2 +- libs/dyn/visit_map.go | 4 +-- libs/dyn/visit_test.go | 36 +++++++++++++++++++ 7 files changed, 45 insertions(+), 20 deletions(-) create mode 100644 libs/dyn/visit_test.go diff --git a/bundle/artifacts/expand_globs.go b/bundle/artifacts/expand_globs.go index 61744405..cdf3d459 100644 --- a/bundle/artifacts/expand_globs.go +++ b/bundle/artifacts/expand_globs.go @@ -33,12 +33,7 @@ func createGlobError(v dyn.Value, p dyn.Path, message string) diag.Diagnostic { Severity: diag.Error, Summary: fmt.Sprintf("%s: %s", source, message), Locations: []dyn.Location{v.Location()}, - - Paths: []dyn.Path{ - // Hack to clone the path. This path copy is mutable. - // To be addressed in a later PR. - p.Append(), - }, + Paths: []dyn.Path{p}, } } diff --git a/bundle/config/validate/unique_resource_keys.go b/bundle/config/validate/unique_resource_keys.go index d6212b0a..50295375 100644 --- a/bundle/config/validate/unique_resource_keys.go +++ b/bundle/config/validate/unique_resource_keys.go @@ -3,7 +3,6 @@ package validate import ( "context" "fmt" - "slices" "sort" "github.com/databricks/cli/bundle" @@ -66,10 +65,7 @@ func (m *uniqueResourceKeys) Apply(ctx context.Context, b *bundle.Bundle) diag.D } } - // dyn.Path under the hood is a slice. The code that walks the configuration - // tree uses the same underlying slice to track the path as it walks - // the tree. So, we need to clone it here. - m.paths = append(m.paths, slices.Clone(p)) + m.paths = append(m.paths, p) m.locations = append(m.locations, v.Locations()...) resourceMetadata[k] = m diff --git a/bundle/libraries/expand_glob_references.go b/bundle/libraries/expand_glob_references.go index 9e90a2a1..9322a06b 100644 --- a/bundle/libraries/expand_glob_references.go +++ b/bundle/libraries/expand_glob_references.go @@ -16,12 +16,10 @@ type expand struct { func matchError(p dyn.Path, l []dyn.Location, message string) diag.Diagnostic { return diag.Diagnostic{ - Severity: diag.Error, - Summary: message, - Paths: []dyn.Path{ - p.Append(), - }, + Severity: diag.Error, + Summary: message, Locations: l, + Paths: []dyn.Path{p}, } } diff --git a/bundle/libraries/upload.go b/bundle/libraries/upload.go index be7cc41d..224e7ab2 100644 --- a/bundle/libraries/upload.go +++ b/bundle/libraries/upload.go @@ -76,7 +76,7 @@ func collectLocalLibraries(b *bundle.Bundle) (map[string][]configLocation, error source = filepath.Join(b.RootPath, source) libs[source] = append(libs[source], configLocation{ - configPath: p.Append(), // Hack to get the copy of path + configPath: p, location: v.Location(), }) diff --git a/libs/dyn/visit.go b/libs/dyn/visit.go index 4d3cf501..38adec24 100644 --- a/libs/dyn/visit.go +++ b/libs/dyn/visit.go @@ -70,7 +70,7 @@ type visitOptions struct { func visit(v Value, prefix Path, suffix Pattern, opts visitOptions) (Value, error) { if len(suffix) == 0 { - return opts.fn(prefix, v) + return opts.fn(slices.Clone(prefix), v) } // Initialize prefix if it is empty. diff --git a/libs/dyn/visit_map.go b/libs/dyn/visit_map.go index cd2cd483..3f0cded0 100644 --- a/libs/dyn/visit_map.go +++ b/libs/dyn/visit_map.go @@ -21,7 +21,7 @@ func Foreach(fn MapFunc) MapFunc { for _, pair := range m.Pairs() { pk := pair.Key pv := pair.Value - nv, err := fn(append(p, Key(pk.MustString())), pv) + nv, err := fn(p.Append(Key(pk.MustString())), pv) if err != nil { return InvalidValue, err } @@ -32,7 +32,7 @@ func Foreach(fn MapFunc) MapFunc { s := slices.Clone(v.MustSequence()) for i, value := range s { var err error - s[i], err = fn(append(p, Index(i)), value) + s[i], err = fn(p.Append(Index(i)), value) if err != nil { return InvalidValue, err } diff --git a/libs/dyn/visit_test.go b/libs/dyn/visit_test.go new file mode 100644 index 00000000..5b61399b --- /dev/null +++ b/libs/dyn/visit_test.go @@ -0,0 +1,36 @@ +package dyn_test + +import ( + "testing" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestVisitCallbackPathCopy(t *testing.T) { + vin := dyn.V(map[string]dyn.Value{ + "foo": dyn.V(42), + "bar": dyn.V(43), + }) + + var paths []dyn.Path + + // The callback should receive a copy of the path. + // If the same underlying value is used, all collected paths will be the same. + // This test uses `MapByPattern` to collect all paths in the map. + // Visit itself doesn't have public functions and we exclusively use black-box testing for this package. + _, _ = dyn.MapByPattern(vin, dyn.NewPattern(dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + paths = append(paths, p) + return v, nil + }) + + // Verify that the paths retained their original values. + var strings []string + for _, p := range paths { + strings = append(strings, p.String()) + } + assert.ElementsMatch(t, strings, []string{ + "foo", + "bar", + }) +}