From 72030844c52f9499b10e7158544a80fde56451f3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 4 Sep 2024 19:16:40 +0200 Subject: [PATCH 1/4] Fixed variable override in target with full variable syntax (#1749) ## Changes This PR makes sure that both of this override syntax for variables work correctly ``` targets: dev: variables: cluster1: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false cluster2: default: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false ``` ## Tests Added regression test --------- Co-authored-by: Pieter Noordhuis --- bundle/config/root.go | 43 ++++++++++++++----- bundle/tests/complex_variables_test.go | 11 ++--- .../complex_multiple_files/databricks.yml | 43 +++++++++++++++++-- .../variables/clusters.yml | 10 ++++- 4 files changed, 86 insertions(+), 21 deletions(-) diff --git a/bundle/config/root.go b/bundle/config/root.go index 281c5c2a3..46578769c 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -406,6 +406,30 @@ func (r *Root) MergeTargetOverrides(name string) error { return r.updateWithDynamicValue(root) } +var variableKeywords = []string{"default", "lookup"} + +// isFullVariableOverrideDef checks if the given value is a full syntax varaible override. +// A full syntax variable override is a map with only one of the following +// keys: "default", "lookup". +func isFullVariableOverrideDef(v dyn.Value) bool { + mv, ok := v.AsMap() + if !ok { + return false + } + + if mv.Len() != 1 { + return false + } + + for _, keyword := range variableKeywords { + if _, ok := mv.GetByString(keyword); ok { + return true + } + } + + return false +} + // rewriteShorthands performs lightweight rewriting of the configuration // tree where we allow users to write a shorthand and must rewrite to the full form. func rewriteShorthands(v dyn.Value) (dyn.Value, error) { @@ -433,30 +457,27 @@ func rewriteShorthands(v dyn.Value) (dyn.Value, error) { }, variable.Locations()), nil case dyn.KindMap, dyn.KindSequence: - lookup, err := dyn.Get(variable, "lookup") - // If lookup is set, we don't want to rewrite the variable and return it as is. - if err == nil && lookup.Kind() != dyn.KindInvalid { + // If it's a full variable definition, leave it as is. + if isFullVariableOverrideDef(variable) { return variable, nil } // Check if the original definition of variable has a type field. + // If it has a type field, it means the shorthand is a value of a complex type. // Type might not be found if the variable overriden in a separate file // and configuration is not merged yet. typeV, err := dyn.GetByPath(v, p.Append(dyn.Key("type"))) - if err != nil { - return dyn.NewValue(map[string]dyn.Value{ - "default": variable, - }, variable.Locations()), nil - } - - if typeV.MustString() == "complex" { + if err == nil && typeV.MustString() == "complex" { return dyn.NewValue(map[string]dyn.Value{ "type": typeV, "default": variable, }, variable.Locations()), nil } - return variable, nil + // If it's a shorthand, rewrite it to a full variable definition. + return dyn.NewValue(map[string]dyn.Value{ + "default": variable, + }, variable.Locations()), nil default: return variable, nil diff --git a/bundle/tests/complex_variables_test.go b/bundle/tests/complex_variables_test.go index d46d8d8c1..6371071ce 100644 --- a/bundle/tests/complex_variables_test.go +++ b/bundle/tests/complex_variables_test.go @@ -81,9 +81,10 @@ func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) { ), )) require.NoError(t, diags.Error()) - - require.Equal(t, "14.2.x-scala2.11", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkVersion) - require.Equal(t, "Standard_DS3_v2", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NodeTypeId) - require.Equal(t, 4, b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.NumWorkers) - require.Equal(t, "false", b.Config.Resources.Jobs["my_job"].JobClusters[0].NewCluster.SparkConf["spark.speculation"]) + for _, cluster := range b.Config.Resources.Jobs["my_job"].JobClusters { + require.Equalf(t, "14.2.x-scala2.11", cluster.NewCluster.SparkVersion, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, "Standard_DS3_v2", cluster.NewCluster.NodeTypeId, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, 4, cluster.NewCluster.NumWorkers, "cluster: %v", cluster.JobClusterKey) + require.Equalf(t, "false", cluster.NewCluster.SparkConf["spark.speculation"], "cluster: %v", cluster.JobClusterKey) + } } diff --git a/bundle/tests/variables/complex_multiple_files/databricks.yml b/bundle/tests/variables/complex_multiple_files/databricks.yml index cb5d24395..42a82c615 100644 --- a/bundle/tests/variables/complex_multiple_files/databricks.yml +++ b/bundle/tests/variables/complex_multiple_files/databricks.yml @@ -5,13 +5,48 @@ resources: jobs: my_job: job_clusters: - - job_cluster_key: key - new_cluster: ${var.cluster} - + - job_cluster_key: key1 + new_cluster: ${var.cluster1} + - job_cluster_key: key2 + new_cluster: ${var.cluster2} + - job_cluster_key: key3 + new_cluster: ${var.cluster3} + - job_cluster_key: key4 + new_cluster: ${var.cluster4} variables: - cluster: + cluster1: + type: complex + description: "A cluster definition" + cluster2: + type: complex + description: "A cluster definition" + cluster3: + type: complex + description: "A cluster definition" + cluster4: type: complex description: "A cluster definition" include: - ./variables/*.yml + + +targets: + default: + dev: + variables: + cluster3: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false + cluster4: + default: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false diff --git a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml index badd45164..0186c437b 100644 --- a/bundle/tests/variables/complex_multiple_files/variables/clusters.yml +++ b/bundle/tests/variables/complex_multiple_files/variables/clusters.yml @@ -2,10 +2,18 @@ targets: default: dev: variables: - cluster: + cluster1: spark_version: "14.2.x-scala2.11" node_type_id: "Standard_DS3_v2" num_workers: 4 spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false + cluster2: + default: + spark_version: "14.2.x-scala2.11" + node_type_id: "Standard_DS3_v2" + num_workers: 4 + spark_conf: + spark.speculation: false + spark.databricks.delta.retentionDurationCheck.enabled: false From f71d9e76499f9296daad620f6efed651b9864679 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 5 Sep 2024 10:56:52 +0200 Subject: [PATCH 2/4] [Release] Release v0.228.0 (#1752) CLI: * Do not error if we cannot prompt for a profile in `auth login` ([#1745](https://github.com/databricks/cli/pull/1745)). Bundles: As of this release CLI will show a prompt is if there are configuration changes which will lead to a DLT recreation. Users can skip the prompt by specifying the `--auto-approve` flag * Pass along $AZURE_CONFIG_FILE to Terraform process ([#1734](https://github.com/databricks/cli/pull/1734)). * Add prompt when a pipeline recreation happens ([#1672](https://github.com/databricks/cli/pull/1672)). * Use materialized views in the default-sql template ([#1709](https://github.com/databricks/cli/pull/1709)). * Update templates to latest LTS DBR ([#1715](https://github.com/databricks/cli/pull/1715)). * Make lock optional in the JSON schema ([#1738](https://github.com/databricks/cli/pull/1738)). * Do not suppress normalisation diagnostics for resolving variables ([#1740](https://github.com/databricks/cli/pull/1740)). * Include a permissions section in all templates ([#1713](https://github.com/databricks/cli/pull/1713)). * Fixed complex variables are not being correctly merged from include files ([#1746](https://github.com/databricks/cli/pull/1746)). * Fixed variable override in target with full variable syntax ([#1749](https://github.com/databricks/cli/pull/1749)). Internal: * Consider serverless clusters as compatible for Python wheel tasks ([#1733](https://github.com/databricks/cli/pull/1733)). * PythonMutator: explain missing package error ([#1736](https://github.com/databricks/cli/pull/1736)). * Add `dyn.Time` to box a timestamp with its original string value ([#1732](https://github.com/databricks/cli/pull/1732)). * Fix streaming of stdout, stdin, stderr in cobra test runner ([#1742](https://github.com/databricks/cli/pull/1742)). Dependency updates: * Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 ([#1741](https://github.com/databricks/cli/pull/1741)). --------- Co-authored-by: Pieter Noordhuis --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fac7d597d..d63831253 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Version changelog +## [Release] Release v0.228.0 + +CLI: + * Do not error if we cannot prompt for a profile in `auth login` ([#1745](https://github.com/databricks/cli/pull/1745)). + +Bundles: + +As of this release, the CLI will show a prompt if there are configuration changes that lead to DLT pipeline recreation. +Users can skip the prompt by specifying the `--auto-approve` flag. + + * Pass along to Terraform process ([#1734](https://github.com/databricks/cli/pull/1734)). + * Add prompt when a pipeline recreation happens ([#1672](https://github.com/databricks/cli/pull/1672)). + * Use materialized views in the default-sql template ([#1709](https://github.com/databricks/cli/pull/1709)). + * Update templates to latest LTS DBR ([#1715](https://github.com/databricks/cli/pull/1715)). + * Make lock optional in the JSON schema ([#1738](https://github.com/databricks/cli/pull/1738)). + * Do not suppress normalisation diagnostics for resolving variables ([#1740](https://github.com/databricks/cli/pull/1740)). + * Include a permissions section in all templates ([#1713](https://github.com/databricks/cli/pull/1713)). + * Fixed complex variables are not being correctly merged from include files ([#1746](https://github.com/databricks/cli/pull/1746)). + * Fixed variable override in target with full variable syntax ([#1749](https://github.com/databricks/cli/pull/1749)). + +Internal: + * Consider serverless clusters as compatible for Python wheel tasks ([#1733](https://github.com/databricks/cli/pull/1733)). + * PythonMutator: explain missing package error ([#1736](https://github.com/databricks/cli/pull/1736)). + * Add `dyn.Time` to box a timestamp with its original string value ([#1732](https://github.com/databricks/cli/pull/1732)). + * Fix streaming of stdout, stdin, stderr in cobra test runner ([#1742](https://github.com/databricks/cli/pull/1742)). + +Dependency updates: + * Bump github.com/Masterminds/semver/v3 from 3.2.1 to 3.3.0 ([#1741](https://github.com/databricks/cli/pull/1741)). + ## [Release] Release v0.227.1 CLI: From ceefa80d7278eeef9cce500b2c69453da2040f40 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 5 Sep 2024 13:05:16 +0200 Subject: [PATCH 3/4] Pass copy of `dyn.Path` to callback function (#1747) ## Changes Some call sites hold on to the `dyn.Path` provided to them by the callback. It must therefore never be mutated after the callback returns, or these mutations leak out into unknown scope. This change means it is no longer possible for this failure mode to happen. ## Tests Unit test. --- bundle/artifacts/expand_globs.go | 7 +--- .../config/validate/unique_resource_keys.go | 6 +--- bundle/libraries/expand_glob_references.go | 8 ++--- bundle/libraries/upload.go | 2 +- libs/dyn/visit.go | 2 +- libs/dyn/visit_map.go | 4 +-- libs/dyn/visit_test.go | 36 +++++++++++++++++++ 7 files changed, 45 insertions(+), 20 deletions(-) create mode 100644 libs/dyn/visit_test.go diff --git a/bundle/artifacts/expand_globs.go b/bundle/artifacts/expand_globs.go index 617444054..cdf3d4590 100644 --- a/bundle/artifacts/expand_globs.go +++ b/bundle/artifacts/expand_globs.go @@ -33,12 +33,7 @@ func createGlobError(v dyn.Value, p dyn.Path, message string) diag.Diagnostic { Severity: diag.Error, Summary: fmt.Sprintf("%s: %s", source, message), Locations: []dyn.Location{v.Location()}, - - Paths: []dyn.Path{ - // Hack to clone the path. This path copy is mutable. - // To be addressed in a later PR. - p.Append(), - }, + Paths: []dyn.Path{p}, } } diff --git a/bundle/config/validate/unique_resource_keys.go b/bundle/config/validate/unique_resource_keys.go index d6212b0ac..50295375b 100644 --- a/bundle/config/validate/unique_resource_keys.go +++ b/bundle/config/validate/unique_resource_keys.go @@ -3,7 +3,6 @@ package validate import ( "context" "fmt" - "slices" "sort" "github.com/databricks/cli/bundle" @@ -66,10 +65,7 @@ func (m *uniqueResourceKeys) Apply(ctx context.Context, b *bundle.Bundle) diag.D } } - // dyn.Path under the hood is a slice. The code that walks the configuration - // tree uses the same underlying slice to track the path as it walks - // the tree. So, we need to clone it here. - m.paths = append(m.paths, slices.Clone(p)) + m.paths = append(m.paths, p) m.locations = append(m.locations, v.Locations()...) resourceMetadata[k] = m diff --git a/bundle/libraries/expand_glob_references.go b/bundle/libraries/expand_glob_references.go index 9e90a2a17..9322a06b8 100644 --- a/bundle/libraries/expand_glob_references.go +++ b/bundle/libraries/expand_glob_references.go @@ -16,12 +16,10 @@ type expand struct { func matchError(p dyn.Path, l []dyn.Location, message string) diag.Diagnostic { return diag.Diagnostic{ - Severity: diag.Error, - Summary: message, - Paths: []dyn.Path{ - p.Append(), - }, + Severity: diag.Error, + Summary: message, Locations: l, + Paths: []dyn.Path{p}, } } diff --git a/bundle/libraries/upload.go b/bundle/libraries/upload.go index be7cc41db..224e7ab2d 100644 --- a/bundle/libraries/upload.go +++ b/bundle/libraries/upload.go @@ -76,7 +76,7 @@ func collectLocalLibraries(b *bundle.Bundle) (map[string][]configLocation, error source = filepath.Join(b.RootPath, source) libs[source] = append(libs[source], configLocation{ - configPath: p.Append(), // Hack to get the copy of path + configPath: p, location: v.Location(), }) diff --git a/libs/dyn/visit.go b/libs/dyn/visit.go index 4d3cf5014..38adec24f 100644 --- a/libs/dyn/visit.go +++ b/libs/dyn/visit.go @@ -70,7 +70,7 @@ type visitOptions struct { func visit(v Value, prefix Path, suffix Pattern, opts visitOptions) (Value, error) { if len(suffix) == 0 { - return opts.fn(prefix, v) + return opts.fn(slices.Clone(prefix), v) } // Initialize prefix if it is empty. diff --git a/libs/dyn/visit_map.go b/libs/dyn/visit_map.go index cd2cd4831..3f0cded03 100644 --- a/libs/dyn/visit_map.go +++ b/libs/dyn/visit_map.go @@ -21,7 +21,7 @@ func Foreach(fn MapFunc) MapFunc { for _, pair := range m.Pairs() { pk := pair.Key pv := pair.Value - nv, err := fn(append(p, Key(pk.MustString())), pv) + nv, err := fn(p.Append(Key(pk.MustString())), pv) if err != nil { return InvalidValue, err } @@ -32,7 +32,7 @@ func Foreach(fn MapFunc) MapFunc { s := slices.Clone(v.MustSequence()) for i, value := range s { var err error - s[i], err = fn(append(p, Index(i)), value) + s[i], err = fn(p.Append(Index(i)), value) if err != nil { return InvalidValue, err } diff --git a/libs/dyn/visit_test.go b/libs/dyn/visit_test.go new file mode 100644 index 000000000..5b61399be --- /dev/null +++ b/libs/dyn/visit_test.go @@ -0,0 +1,36 @@ +package dyn_test + +import ( + "testing" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestVisitCallbackPathCopy(t *testing.T) { + vin := dyn.V(map[string]dyn.Value{ + "foo": dyn.V(42), + "bar": dyn.V(43), + }) + + var paths []dyn.Path + + // The callback should receive a copy of the path. + // If the same underlying value is used, all collected paths will be the same. + // This test uses `MapByPattern` to collect all paths in the map. + // Visit itself doesn't have public functions and we exclusively use black-box testing for this package. + _, _ = dyn.MapByPattern(vin, dyn.NewPattern(dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + paths = append(paths, p) + return v, nil + }) + + // Verify that the paths retained their original values. + var strings []string + for _, p := range paths { + strings = append(strings, p.String()) + } + assert.ElementsMatch(t, strings, []string{ + "foo", + "bar", + }) +} From 02e83877f4284589942bb439e849e01fc81a4314 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Fri, 6 Sep 2024 13:34:57 +0200 Subject: [PATCH 4/4] Added listing cluster filtering for cluster lookups (#1754) ## Changes We added a custom resolver for the cluster to add filtering for the cluster source when we list all clusters. Without the filtering listing could take a very long time (5-10 mins) which leads to lookup timeouts. ## Tests Existing unit tests passing --- .codegen/lookup.go.tmpl | 4 ++ .../resolve_resource_references_test.go | 33 +++++++++----- bundle/config/variable/lookup.go | 44 +++++++++++++++++++ bundle/config/variable/lookup_overrides.go | 41 +++++++++++++++++ bundle/tests/variables_test.go | 9 +++- 5 files changed, 119 insertions(+), 12 deletions(-) create mode 100644 bundle/config/variable/lookup_overrides.go diff --git a/.codegen/lookup.go.tmpl b/.codegen/lookup.go.tmpl index 431709f90..124b629d0 100644 --- a/.codegen/lookup.go.tmpl +++ b/.codegen/lookup.go.tmpl @@ -116,6 +116,10 @@ func allResolvers() *resolvers { {{range .Services -}} {{- if in $allowlist .KebabName -}} r.{{.Singular.PascalName}} = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["{{.Singular.PascalName}}"] + if ok { + return fn(ctx, w, name) + } entity, err := w.{{.PascalName}}.GetBy{{range .NamedIdMap.NamePath}}{{.PascalName}}{{end}}(ctx, name) if err != nil { return "", err diff --git a/bundle/config/mutator/resolve_resource_references_test.go b/bundle/config/mutator/resolve_resource_references_test.go index 86a03b23e..ee2f0e2ea 100644 --- a/bundle/config/mutator/resolve_resource_references_test.go +++ b/bundle/config/mutator/resolve_resource_references_test.go @@ -2,7 +2,6 @@ package mutator import ( "context" - "fmt" "testing" "github.com/databricks/cli/bundle" @@ -44,11 +43,13 @@ func TestResolveClusterReference(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) clusterApi := m.GetMockClustersAPI() - clusterApi.EXPECT().GetByClusterName(mock.Anything, clusterRef1).Return(&compute.ClusterDetails{ - ClusterId: "1234-5678-abcd", - }, nil) - clusterApi.EXPECT().GetByClusterName(mock.Anything, clusterRef2).Return(&compute.ClusterDetails{ - ClusterId: "9876-5432-xywz", + clusterApi.EXPECT().ListAll(mock.Anything, compute.ListClustersRequest{ + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, + }, + }).Return([]compute.ClusterDetails{ + {ClusterId: "1234-5678-abcd", ClusterName: clusterRef1}, + {ClusterId: "9876-5432-xywz", ClusterName: clusterRef2}, }, nil) diags := bundle.Apply(context.Background(), b, ResolveResourceReferences()) @@ -78,10 +79,16 @@ func TestResolveNonExistentClusterReference(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) clusterApi := m.GetMockClustersAPI() - clusterApi.EXPECT().GetByClusterName(mock.Anything, clusterRef).Return(nil, fmt.Errorf("ClusterDetails named '%s' does not exist", clusterRef)) + clusterApi.EXPECT().ListAll(mock.Anything, compute.ListClustersRequest{ + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, + }, + }).Return([]compute.ClusterDetails{ + {ClusterId: "1234-5678-abcd", ClusterName: "some other cluster"}, + }, nil) diags := bundle.Apply(context.Background(), b, ResolveResourceReferences()) - require.ErrorContains(t, diags.Error(), "failed to resolve cluster: Random, err: ClusterDetails named 'Random' does not exist") + require.ErrorContains(t, diags.Error(), "failed to resolve cluster: Random, err: cluster named 'Random' does not exist") } func TestNoLookupIfVariableIsSet(t *testing.T) { @@ -158,8 +165,14 @@ func TestResolveVariableReferencesInVariableLookups(t *testing.T) { m := mocks.NewMockWorkspaceClient(t) b.SetWorkpaceClient(m.WorkspaceClient) clusterApi := m.GetMockClustersAPI() - clusterApi.EXPECT().GetByClusterName(mock.Anything, "cluster-bar-dev").Return(&compute.ClusterDetails{ - ClusterId: "1234-5678-abcd", + + clusterApi.EXPECT().ListAll(mock.Anything, compute.ListClustersRequest{ + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, + }, + }).Return([]compute.ClusterDetails{ + {ClusterId: "1234-5678-abcd", ClusterName: "cluster-bar-dev"}, + {ClusterId: "9876-5432-xywz", ClusterName: "some other cluster"}, }, nil) diags := bundle.Apply(context.Background(), b, bundle.Seq(ResolveVariableReferencesInLookup(), ResolveResourceReferences())) diff --git a/bundle/config/variable/lookup.go b/bundle/config/variable/lookup.go index 9c85e2a71..e40b0ef7a 100755 --- a/bundle/config/variable/lookup.go +++ b/bundle/config/variable/lookup.go @@ -220,6 +220,10 @@ type resolvers struct { func allResolvers() *resolvers { r := &resolvers{} r.Alert = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Alert"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Alerts.GetByDisplayName(ctx, name) if err != nil { return "", err @@ -228,6 +232,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.Id), nil } r.ClusterPolicy = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["ClusterPolicy"] + if ok { + return fn(ctx, w, name) + } entity, err := w.ClusterPolicies.GetByName(ctx, name) if err != nil { return "", err @@ -236,6 +244,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.PolicyId), nil } r.Cluster = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Cluster"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Clusters.GetByClusterName(ctx, name) if err != nil { return "", err @@ -244,6 +256,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.ClusterId), nil } r.Dashboard = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Dashboard"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Dashboards.GetByName(ctx, name) if err != nil { return "", err @@ -252,6 +268,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.Id), nil } r.InstancePool = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["InstancePool"] + if ok { + return fn(ctx, w, name) + } entity, err := w.InstancePools.GetByInstancePoolName(ctx, name) if err != nil { return "", err @@ -260,6 +280,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.InstancePoolId), nil } r.Job = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Job"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Jobs.GetBySettingsName(ctx, name) if err != nil { return "", err @@ -268,6 +292,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.JobId), nil } r.Metastore = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Metastore"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Metastores.GetByName(ctx, name) if err != nil { return "", err @@ -276,6 +304,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.MetastoreId), nil } r.Pipeline = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Pipeline"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Pipelines.GetByName(ctx, name) if err != nil { return "", err @@ -284,6 +316,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.PipelineId), nil } r.Query = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Query"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Queries.GetByDisplayName(ctx, name) if err != nil { return "", err @@ -292,6 +328,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.Id), nil } r.ServicePrincipal = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["ServicePrincipal"] + if ok { + return fn(ctx, w, name) + } entity, err := w.ServicePrincipals.GetByDisplayName(ctx, name) if err != nil { return "", err @@ -300,6 +340,10 @@ func allResolvers() *resolvers { return fmt.Sprint(entity.ApplicationId), nil } r.Warehouse = func(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + fn, ok := lookupOverrides["Warehouse"] + if ok { + return fn(ctx, w, name) + } entity, err := w.Warehouses.GetByName(ctx, name) if err != nil { return "", err diff --git a/bundle/config/variable/lookup_overrides.go b/bundle/config/variable/lookup_overrides.go new file mode 100644 index 000000000..1be373dc6 --- /dev/null +++ b/bundle/config/variable/lookup_overrides.go @@ -0,0 +1,41 @@ +package variable + +import ( + "context" + "fmt" + + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/compute" +) + +var lookupOverrides = map[string]resolverFunc{ + "Cluster": resolveCluster, +} + +// We added a custom resolver for the cluster to add filtering for the cluster source when we list all clusters. +// Without the filtering listing could take a very long time (5-10 mins) which leads to lookup timeouts. +func resolveCluster(ctx context.Context, w *databricks.WorkspaceClient, name string) (string, error) { + result, err := w.Clusters.ListAll(ctx, compute.ListClustersRequest{ + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, + }, + }) + + if err != nil { + return "", err + } + + tmp := map[string][]compute.ClusterDetails{} + for _, v := range result { + key := v.ClusterName + tmp[key] = append(tmp[key], v) + } + alternatives, ok := tmp[name] + if !ok || len(alternatives) == 0 { + return "", fmt.Errorf("cluster named '%s' does not exist", name) + } + if len(alternatives) > 1 { + return "", fmt.Errorf("there are %d instances of clusters named '%s'", len(alternatives), name) + } + return alternatives[0].ClusterId, nil +} diff --git a/bundle/tests/variables_test.go b/bundle/tests/variables_test.go index 51a23e5d5..9451c5a04 100644 --- a/bundle/tests/variables_test.go +++ b/bundle/tests/variables_test.go @@ -124,8 +124,13 @@ func TestVariablesWithTargetLookupOverrides(t *testing.T) { }, nil) clustersApi := mockWorkspaceClient.GetMockClustersAPI() - clustersApi.EXPECT().GetByClusterName(mock.Anything, "some-test-cluster").Return(&compute.ClusterDetails{ - ClusterId: "4321", + clustersApi.EXPECT().ListAll(mock.Anything, compute.ListClustersRequest{ + FilterBy: &compute.ListClustersFilterBy{ + ClusterSources: []compute.ClusterSource{compute.ClusterSourceApi, compute.ClusterSourceUi}, + }, + }).Return([]compute.ClusterDetails{ + {ClusterId: "4321", ClusterName: "some-test-cluster"}, + {ClusterId: "9876", ClusterName: "some-other-cluster"}, }, nil) clusterPoliciesApi := mockWorkspaceClient.GetMockClusterPoliciesAPI()