From e220f9ddd6745a9b3ae10b2dff2b16a4b72d1626 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Mon, 16 Sep 2024 20:35:07 +0200 Subject: [PATCH 01/21] Use the friendly name of service principals when shortening their name (#1770) ## Summary Use the friendly name of service principals when shortening their name. This change is helpful for the prefix in development mode. Instead of adding a prefix like `[dev 1706906c-c0a2-4c25-9f57-3a7aa3cb8123]`, we'll prefix like `[dev my_principal]`. --- .../config/mutator/populate_current_user.go | 2 +- internal/init_test.go | 2 +- libs/auth/user.go | 9 ++- libs/auth/user_test.go | 71 +++++++++++++++---- libs/template/helpers.go | 2 +- 5 files changed, 66 insertions(+), 20 deletions(-) diff --git a/bundle/config/mutator/populate_current_user.go b/bundle/config/mutator/populate_current_user.go index b5e0bd437..1e99b327c 100644 --- a/bundle/config/mutator/populate_current_user.go +++ b/bundle/config/mutator/populate_current_user.go @@ -33,7 +33,7 @@ func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) diag. } b.Config.Workspace.CurrentUser = &config.User{ - ShortName: auth.GetShortUserName(me.UserName), + ShortName: auth.GetShortUserName(me), User: me, } diff --git a/internal/init_test.go b/internal/init_test.go index c3cb0127e..d1a89f7b7 100644 --- a/internal/init_test.go +++ b/internal/init_test.go @@ -126,7 +126,7 @@ func TestAccBundleInitHelpers(t *testing.T) { }{ { funcName: "{{short_name}}", - expected: auth.GetShortUserName(me.UserName), + expected: auth.GetShortUserName(me), }, { funcName: "{{user_name}}", diff --git a/libs/auth/user.go b/libs/auth/user.go index 8eaa87633..c6aa974f3 100644 --- a/libs/auth/user.go +++ b/libs/auth/user.go @@ -4,12 +4,17 @@ import ( "strings" "github.com/databricks/cli/libs/textutil" + "github.com/databricks/databricks-sdk-go/service/iam" ) // Get a short-form username, based on the user's primary email address. // We leave the full range of unicode letters in tact, but remove all "special" characters, // including dots, which are not supported in e.g. experiment names. -func GetShortUserName(emailAddress string) string { - local, _, _ := strings.Cut(emailAddress, "@") +func GetShortUserName(user *iam.User) string { + name := user.UserName + if IsServicePrincipal(user.UserName) && user.DisplayName != "" { + name = user.DisplayName + } + local, _, _ := strings.Cut(name, "@") return textutil.NormalizeString(local) } diff --git a/libs/auth/user_test.go b/libs/auth/user_test.go index 62b2d29ac..24b61464b 100644 --- a/libs/auth/user_test.go +++ b/libs/auth/user_test.go @@ -3,70 +3,111 @@ package auth import ( "testing" + "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" ) func TestGetShortUserName(t *testing.T) { tests := []struct { name string - email string + user *iam.User expected string }{ { - email: "test.user.1234@example.com", + user: &iam.User{ + UserName: "test.user.1234@example.com", + }, expected: "test_user_1234", }, { - email: "tést.üser@example.com", + user: &iam.User{ + UserName: "tést.üser@example.com", + }, expected: "tést_üser", }, { - email: "test$.user@example.com", + user: &iam.User{ + UserName: "test$.user@example.com", + }, expected: "test_user", }, { - email: `jöhn.dœ@domain.com`, // Using non-ASCII characters. + user: &iam.User{ + UserName: `jöhn.dœ@domain.com`, // Using non-ASCII characters. + }, expected: "jöhn_dœ", }, { - email: `first+tag@email.com`, // The plus (+) sign is used for "sub-addressing" in some email services. + user: &iam.User{ + UserName: `first+tag@email.com`, // The plus (+) sign is used for "sub-addressing" in some email services. + }, expected: "first_tag", }, { - email: `email@sub.domain.com`, // Using a sub-domain. + user: &iam.User{ + UserName: `email@sub.domain.com`, // Using a sub-domain. + }, expected: "email", }, { - email: `"_quoted"@domain.com`, // Quoted strings can be part of the local-part. + user: &iam.User{ + UserName: `"_quoted"@domain.com`, // Quoted strings can be part of the local-part. + }, expected: "quoted", }, { - email: `name-o'mally@website.org`, // Single quote in the local-part. + user: &iam.User{ + UserName: `name-o'mally@website.org`, // Single quote in the local-part. + }, expected: "name_o_mally", }, { - email: `user%domain@external.com`, // Percent sign can be used for email routing in legacy systems. + user: &iam.User{ + UserName: `user%domain@external.com`, // Percent sign can be used for email routing in legacy systems. + }, expected: "user_domain", }, { - email: `long.name.with.dots@domain.net`, // Multiple dots in the local-part. + user: &iam.User{ + UserName: `long.name.with.dots@domain.net`, // Multiple dots in the local-part. + }, expected: "long_name_with_dots", }, { - email: `me&you@together.com`, // Using an ampersand (&) in the local-part. + user: &iam.User{ + UserName: `me&you@together.com`, // Using an ampersand (&) in the local-part. + }, expected: "me_you", }, { - email: `user!def!xyz@domain.org`, // The exclamation mark can be valid in some legacy systems. + user: &iam.User{ + UserName: `user!def!xyz@domain.org`, // The exclamation mark can be valid in some legacy systems. + }, expected: "user_def_xyz", }, { - email: `admin@ιντερνετ.com`, // Domain in non-ASCII characters (IDN or Internationalized Domain Name). + user: &iam.User{ + UserName: `admin@ιντερνετ.com`, // Domain in non-ASCII characters (IDN or Internationalized Domain Name). + }, expected: "admin", }, + { + user: &iam.User{ + UserName: `1706906c-c0a2-4c25-9f57-3a7aa3cb8123`, + DisplayName: "my-service-principal", + }, + expected: "my_service_principal", + }, + { + user: &iam.User{ + UserName: `1706906c-c0a2-4c25-9f57-3a7aa3cb8123`, + // This service princpal has DisplayName (it's an optional property) + }, + expected: "1706906c_c0a2_4c25_9f57_3a7aa3cb8123", + }, } for _, tt := range tests { - assert.Equal(t, tt.expected, GetShortUserName(tt.email)) + assert.Equal(t, tt.expected, GetShortUserName(tt.user)) } } diff --git a/libs/template/helpers.go b/libs/template/helpers.go index 1dfe74d73..88c73cc47 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -119,7 +119,7 @@ func loadHelpers(ctx context.Context) template.FuncMap { return "", err } } - return auth.GetShortUserName(cachedUser.UserName), nil + return auth.GetShortUserName(cachedUser), nil }, // Get the default workspace catalog. If there is no default, or if // Unity Catalog is not enabled, return an empty string. From bcab6ca37b27c71156cdb3a9119db9becef4f869 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 18 Sep 2024 12:23:07 +0200 Subject: [PATCH 02/21] Fixed detecting full syntax variable override which includes type field (#1775) ## Changes Fixes #1773 ## Tests Confirmed manually --- bundle/config/root.go | 21 ++++++++++++++++--- bundle/tests/complex_variables_test.go | 18 ++++++++++++++++ bundle/tests/variables/complex/databricks.yml | 13 ++++++++++++ 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/bundle/config/root.go b/bundle/config/root.go index 46578769c..884c2e1ca 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -409,18 +409,33 @@ func (r *Root) MergeTargetOverrides(name string) error { var variableKeywords = []string{"default", "lookup"} // isFullVariableOverrideDef checks if the given value is a full syntax varaible override. -// A full syntax variable override is a map with only one of the following -// keys: "default", "lookup". +// A full syntax variable override is a map with either 1 of 2 keys. +// If it's 2 keys, the keys should be "default" and "type". +// If it's 1 key, the key should be one of the following keys: "default", "lookup". func isFullVariableOverrideDef(v dyn.Value) bool { mv, ok := v.AsMap() if !ok { return false } - if mv.Len() != 1 { + // If the map has more than 2 keys, it is not a full variable override. + if mv.Len() > 2 { return false } + // If the map has 2 keys, one of them should be "default" and the other is "type" + if mv.Len() == 2 { + if _, ok := mv.GetByString("type"); !ok { + return false + } + + if _, ok := mv.GetByString("default"); !ok { + return false + } + + return true + } + for _, keyword := range variableKeywords { if _, ok := mv.GetByString(keyword); ok { return true diff --git a/bundle/tests/complex_variables_test.go b/bundle/tests/complex_variables_test.go index 6371071ce..7a9a53a76 100644 --- a/bundle/tests/complex_variables_test.go +++ b/bundle/tests/complex_variables_test.go @@ -88,3 +88,21 @@ func TestComplexVariablesOverrideWithMultipleFiles(t *testing.T) { require.Equalf(t, "false", cluster.NewCluster.SparkConf["spark.speculation"], "cluster: %v", cluster.JobClusterKey) } } + +func TestComplexVariablesOverrideWithFullSyntax(t *testing.T) { + b, diags := loadTargetWithDiags("variables/complex", "dev") + require.Empty(t, diags) + + diags = bundle.Apply(context.Background(), b, bundle.Seq( + mutator.SetVariables(), + mutator.ResolveVariableReferencesInComplexVariables(), + mutator.ResolveVariableReferences( + "variables", + ), + )) + require.NoError(t, diags.Error()) + require.Empty(t, diags) + + complexvar := b.Config.Variables["complexvar"].Value + require.Equal(t, map[string]interface{}{"key1": "1", "key2": "2", "key3": "3"}, complexvar) +} diff --git a/bundle/tests/variables/complex/databricks.yml b/bundle/tests/variables/complex/databricks.yml index ca27f606d..3b32a7c8e 100644 --- a/bundle/tests/variables/complex/databricks.yml +++ b/bundle/tests/variables/complex/databricks.yml @@ -35,6 +35,13 @@ variables: - jar: "/path/to/jar" - egg: "/path/to/egg" - whl: "/path/to/whl" + complexvar: + type: complex + description: "A complex variable" + default: + key1: "value1" + key2: "value2" + key3: "value3" targets: @@ -49,3 +56,9 @@ targets: spark_conf: spark.speculation: false spark.databricks.delta.retentionDurationCheck.enabled: false + complexvar: + type: complex + default: + key1: "1" + key2: "2" + key3: "3" From e2c1d51d8437963bec84c857b74bb210b78b26b0 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 18 Sep 2024 13:26:16 +0200 Subject: [PATCH 03/21] [Release] Release v0.228.1 (#1778) Bundles: * Added listing cluster filtering for cluster lookups ([#1754](https://github.com/databricks/cli/pull/1754)). * Expand library globs relative to the sync root ([#1756](https://github.com/databricks/cli/pull/1756)). * Fixed generated YAML missing 'default' for empty values ([#1765](https://github.com/databricks/cli/pull/1765)). * Use periodic triggers in all templates ([#1739](https://github.com/databricks/cli/pull/1739)). * Use the friendly name of service principals when shortening their name ([#1770](https://github.com/databricks/cli/pull/1770)). * Fixed detecting full syntax variable override which includes type field ([#1775](https://github.com/databricks/cli/pull/1775)). Internal: * Pass copy of `dyn.Path` to callback function ([#1747](https://github.com/databricks/cli/pull/1747)). * Make bundle JSON schema modular with `$defs` ([#1700](https://github.com/databricks/cli/pull/1700)). * Alias variables block in the `Target` struct ([#1748](https://github.com/databricks/cli/pull/1748)). * Add end to end integration tests for bundle JSON schema ([#1726](https://github.com/databricks/cli/pull/1726)). * Fix artifact upload integration tests ([#1767](https://github.com/databricks/cli/pull/1767)). API Changes: * Added `databricks quality-monitors regenerate-dashboard` command. OpenAPI commit d05898328669a3f8ab0c2ecee37db2673d3ea3f7 (2024-09-04) Dependency updates: * Bump golang.org/x/term from 0.23.0 to 0.24.0 ([#1757](https://github.com/databricks/cli/pull/1757)). * Bump golang.org/x/oauth2 from 0.22.0 to 0.23.0 ([#1761](https://github.com/databricks/cli/pull/1761)). * Bump golang.org/x/text from 0.17.0 to 0.18.0 ([#1759](https://github.com/databricks/cli/pull/1759)). * Bump github.com/databricks/databricks-sdk-go from 0.45.0 to 0.46.0 ([#1760](https://github.com/databricks/cli/pull/1760)). --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d63831253..32a7e5cfa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Version changelog +## [Release] Release v0.228.1 + +Bundles: + * Added listing cluster filtering for cluster lookups ([#1754](https://github.com/databricks/cli/pull/1754)). + * Expand library globs relative to the sync root ([#1756](https://github.com/databricks/cli/pull/1756)). + * Fixed generated YAML missing 'default' for empty values ([#1765](https://github.com/databricks/cli/pull/1765)). + * Use periodic triggers in all templates ([#1739](https://github.com/databricks/cli/pull/1739)). + * Use the friendly name of service principals when shortening their name ([#1770](https://github.com/databricks/cli/pull/1770)). + * Fixed detecting full syntax variable override which includes type field ([#1775](https://github.com/databricks/cli/pull/1775)). + +Internal: + * Pass copy of `dyn.Path` to callback function ([#1747](https://github.com/databricks/cli/pull/1747)). + * Make bundle JSON schema modular with `` ([#1700](https://github.com/databricks/cli/pull/1700)). + * Alias variables block in the `Target` struct ([#1748](https://github.com/databricks/cli/pull/1748)). + * Add end to end integration tests for bundle JSON schema ([#1726](https://github.com/databricks/cli/pull/1726)). + * Fix artifact upload integration tests ([#1767](https://github.com/databricks/cli/pull/1767)). + +API Changes: + * Added `databricks quality-monitors regenerate-dashboard` command. + +OpenAPI commit d05898328669a3f8ab0c2ecee37db2673d3ea3f7 (2024-09-04) +Dependency updates: + * Bump golang.org/x/term from 0.23.0 to 0.24.0 ([#1757](https://github.com/databricks/cli/pull/1757)). + * Bump golang.org/x/oauth2 from 0.22.0 to 0.23.0 ([#1761](https://github.com/databricks/cli/pull/1761)). + * Bump golang.org/x/text from 0.17.0 to 0.18.0 ([#1759](https://github.com/databricks/cli/pull/1759)). + * Bump github.com/databricks/databricks-sdk-go from 0.45.0 to 0.46.0 ([#1760](https://github.com/databricks/cli/pull/1760)). + ## [Release] Release v0.228.0 CLI: From cf989a7e10e56f0b021eb4ffc5a7b793da25b540 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 19 Sep 2024 13:21:32 +0200 Subject: [PATCH 04/21] Upgrade to TF provider 1.52 (#1781) ## Changes Upgrade to TF provider 1.52 We also temporarily skip generating plugin framework structs to unblock upgrade as generation does not work yet and need to be fixed separately --- .../tf/codegen/generator/generator.go | 14 +++++++++++++- bundle/internal/tf/codegen/schema/version.go | 2 +- .../tf/schema/data_source_clusters.go | 16 ++++++++++++---- .../schema/data_source_external_location.go | 1 + .../internal/tf/schema/data_source_share.go | 2 ++ ...omatic_cluster_update_workspace_setting.go | 18 ++++++------------ bundle/internal/tf/schema/resource_cluster.go | 1 + ...ance_security_profile_workspace_setting.go | 4 ++-- ...d_security_monitoring_workspace_setting.go | 2 +- .../tf/schema/resource_model_serving.go | 18 ++++++++++-------- bundle/internal/tf/schema/resource_share.go | 19 +++++++++++++------ .../internal/tf/schema/resource_sql_table.go | 1 + bundle/internal/tf/schema/root.go | 2 +- 13 files changed, 64 insertions(+), 36 deletions(-) diff --git a/bundle/internal/tf/codegen/generator/generator.go b/bundle/internal/tf/codegen/generator/generator.go index 86d762439..b31fdf153 100644 --- a/bundle/internal/tf/codegen/generator/generator.go +++ b/bundle/internal/tf/codegen/generator/generator.go @@ -51,9 +51,15 @@ func (r *root) Generate(path string) error { } func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error { - // Generate types for resources. + // Generate types for resources var resources []*namedBlock for _, k := range sortKeys(schema.ResourceSchemas) { + // Skipping all plugin framework struct generation. + // TODO: This is a temporary fix, generation should be fixed in the future. + if strings.HasSuffix(k, "_pluginframework") { + continue + } + v := schema.ResourceSchemas[k] b := &namedBlock{ filePattern: "resource_%s.go", @@ -71,6 +77,12 @@ func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error // Generate types for data sources. var dataSources []*namedBlock for _, k := range sortKeys(schema.DataSourceSchemas) { + // Skipping all plugin framework struct generation. + // TODO: This is a temporary fix, generation should be fixed in the future. + if strings.HasSuffix(k, "_pluginframework") { + continue + } + v := schema.DataSourceSchemas[k] b := &namedBlock{ filePattern: "data_source_%s.go", diff --git a/bundle/internal/tf/codegen/schema/version.go b/bundle/internal/tf/codegen/schema/version.go index efb297243..b71ea7d1c 100644 --- a/bundle/internal/tf/codegen/schema/version.go +++ b/bundle/internal/tf/codegen/schema/version.go @@ -1,3 +1,3 @@ package schema -const ProviderVersion = "1.50.0" +const ProviderVersion = "1.52.0" diff --git a/bundle/internal/tf/schema/data_source_clusters.go b/bundle/internal/tf/schema/data_source_clusters.go index 7a5f3053d..8c5f9578e 100644 --- a/bundle/internal/tf/schema/data_source_clusters.go +++ b/bundle/internal/tf/schema/data_source_clusters.go @@ -2,8 +2,16 @@ package schema -type DataSourceClusters struct { - ClusterNameContains string `json:"cluster_name_contains,omitempty"` - Id string `json:"id,omitempty"` - Ids []string `json:"ids,omitempty"` +type DataSourceClustersFilterBy struct { + ClusterSources []string `json:"cluster_sources,omitempty"` + ClusterStates []string `json:"cluster_states,omitempty"` + IsPinned bool `json:"is_pinned,omitempty"` + PolicyId string `json:"policy_id,omitempty"` +} + +type DataSourceClusters struct { + ClusterNameContains string `json:"cluster_name_contains,omitempty"` + Id string `json:"id,omitempty"` + Ids []string `json:"ids,omitempty"` + FilterBy *DataSourceClustersFilterBy `json:"filter_by,omitempty"` } diff --git a/bundle/internal/tf/schema/data_source_external_location.go b/bundle/internal/tf/schema/data_source_external_location.go index a3e78cbd3..e1ad9dc3d 100644 --- a/bundle/internal/tf/schema/data_source_external_location.go +++ b/bundle/internal/tf/schema/data_source_external_location.go @@ -19,6 +19,7 @@ type DataSourceExternalLocationExternalLocationInfo struct { CreatedBy string `json:"created_by,omitempty"` CredentialId string `json:"credential_id,omitempty"` CredentialName string `json:"credential_name,omitempty"` + Fallback bool `json:"fallback,omitempty"` IsolationMode string `json:"isolation_mode,omitempty"` MetastoreId string `json:"metastore_id,omitempty"` Name string `json:"name,omitempty"` diff --git a/bundle/internal/tf/schema/data_source_share.go b/bundle/internal/tf/schema/data_source_share.go index 3b40fbb51..da9afaaef 100644 --- a/bundle/internal/tf/schema/data_source_share.go +++ b/bundle/internal/tf/schema/data_source_share.go @@ -18,12 +18,14 @@ type DataSourceShareObject struct { AddedBy string `json:"added_by,omitempty"` CdfEnabled bool `json:"cdf_enabled,omitempty"` Comment string `json:"comment,omitempty"` + Content string `json:"content,omitempty"` DataObjectType string `json:"data_object_type"` HistoryDataSharingStatus string `json:"history_data_sharing_status,omitempty"` Name string `json:"name"` SharedAs string `json:"shared_as,omitempty"` StartVersion int `json:"start_version,omitempty"` Status string `json:"status,omitempty"` + StringSharedAs string `json:"string_shared_as,omitempty"` Partition []DataSourceShareObjectPartition `json:"partition,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go b/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go index e95639de8..5d7f6a140 100644 --- a/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go +++ b/bundle/internal/tf/schema/resource_automatic_cluster_update_workspace_setting.go @@ -2,20 +2,14 @@ package schema -type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceEnablementDetails struct { - ForcedForComplianceMode bool `json:"forced_for_compliance_mode,omitempty"` - UnavailableForDisabledEntitlement bool `json:"unavailable_for_disabled_entitlement,omitempty"` - UnavailableForNonEnterpriseTier bool `json:"unavailable_for_non_enterprise_tier,omitempty"` -} - type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindowWeekDayBasedScheduleWindowStartTime struct { - Hours int `json:"hours,omitempty"` - Minutes int `json:"minutes,omitempty"` + Hours int `json:"hours"` + Minutes int `json:"minutes"` } type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindowWeekDayBasedSchedule struct { - DayOfWeek string `json:"day_of_week,omitempty"` - Frequency string `json:"frequency,omitempty"` + DayOfWeek string `json:"day_of_week"` + Frequency string `json:"frequency"` WindowStartTime *ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindowWeekDayBasedScheduleWindowStartTime `json:"window_start_time,omitempty"` } @@ -25,9 +19,9 @@ type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspa type ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspace struct { CanToggle bool `json:"can_toggle,omitempty"` - Enabled bool `json:"enabled,omitempty"` + Enabled bool `json:"enabled"` + EnablementDetails []any `json:"enablement_details,omitempty"` RestartEvenIfNoUpdatesAvailable bool `json:"restart_even_if_no_updates_available,omitempty"` - EnablementDetails *ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceEnablementDetails `json:"enablement_details,omitempty"` MaintenanceWindow *ResourceAutomaticClusterUpdateWorkspaceSettingAutomaticClusterUpdateWorkspaceMaintenanceWindow `json:"maintenance_window,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_cluster.go b/bundle/internal/tf/schema/resource_cluster.go index e4106d049..4ae063c89 100644 --- a/bundle/internal/tf/schema/resource_cluster.go +++ b/bundle/internal/tf/schema/resource_cluster.go @@ -176,6 +176,7 @@ type ResourceCluster struct { IdempotencyToken string `json:"idempotency_token,omitempty"` InstancePoolId string `json:"instance_pool_id,omitempty"` IsPinned bool `json:"is_pinned,omitempty"` + NoWait bool `json:"no_wait,omitempty"` NodeTypeId string `json:"node_type_id,omitempty"` NumWorkers int `json:"num_workers,omitempty"` PolicyId string `json:"policy_id,omitempty"` diff --git a/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go b/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go index 50815f753..8265adaed 100644 --- a/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go +++ b/bundle/internal/tf/schema/resource_compliance_security_profile_workspace_setting.go @@ -3,8 +3,8 @@ package schema type ResourceComplianceSecurityProfileWorkspaceSettingComplianceSecurityProfileWorkspace struct { - ComplianceStandards []string `json:"compliance_standards,omitempty"` - IsEnabled bool `json:"is_enabled,omitempty"` + ComplianceStandards []string `json:"compliance_standards"` + IsEnabled bool `json:"is_enabled"` } type ResourceComplianceSecurityProfileWorkspaceSetting struct { diff --git a/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go b/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go index 2f552402a..e9c3b0abb 100644 --- a/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go +++ b/bundle/internal/tf/schema/resource_enhanced_security_monitoring_workspace_setting.go @@ -3,7 +3,7 @@ package schema type ResourceEnhancedSecurityMonitoringWorkspaceSettingEnhancedSecurityMonitoringWorkspace struct { - IsEnabled bool `json:"is_enabled,omitempty"` + IsEnabled bool `json:"is_enabled"` } type ResourceEnhancedSecurityMonitoringWorkspaceSetting struct { diff --git a/bundle/internal/tf/schema/resource_model_serving.go b/bundle/internal/tf/schema/resource_model_serving.go index 379807a5d..29d55cd5f 100644 --- a/bundle/internal/tf/schema/resource_model_serving.go +++ b/bundle/internal/tf/schema/resource_model_serving.go @@ -95,14 +95,16 @@ type ResourceModelServingConfigServedEntities struct { } type ResourceModelServingConfigServedModels struct { - EnvironmentVars map[string]string `json:"environment_vars,omitempty"` - InstanceProfileArn string `json:"instance_profile_arn,omitempty"` - ModelName string `json:"model_name"` - ModelVersion string `json:"model_version"` - Name string `json:"name,omitempty"` - ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` - WorkloadSize string `json:"workload_size"` - WorkloadType string `json:"workload_type,omitempty"` + EnvironmentVars map[string]string `json:"environment_vars,omitempty"` + InstanceProfileArn string `json:"instance_profile_arn,omitempty"` + MaxProvisionedThroughput int `json:"max_provisioned_throughput,omitempty"` + MinProvisionedThroughput int `json:"min_provisioned_throughput,omitempty"` + ModelName string `json:"model_name"` + ModelVersion string `json:"model_version"` + Name string `json:"name,omitempty"` + ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` + WorkloadSize string `json:"workload_size,omitempty"` + WorkloadType string `json:"workload_type,omitempty"` } type ResourceModelServingConfigTrafficConfigRoutes struct { diff --git a/bundle/internal/tf/schema/resource_share.go b/bundle/internal/tf/schema/resource_share.go index e531e7770..37f4d4546 100644 --- a/bundle/internal/tf/schema/resource_share.go +++ b/bundle/internal/tf/schema/resource_share.go @@ -18,20 +18,27 @@ type ResourceShareObject struct { AddedBy string `json:"added_by,omitempty"` CdfEnabled bool `json:"cdf_enabled,omitempty"` Comment string `json:"comment,omitempty"` + Content string `json:"content,omitempty"` DataObjectType string `json:"data_object_type"` HistoryDataSharingStatus string `json:"history_data_sharing_status,omitempty"` Name string `json:"name"` SharedAs string `json:"shared_as,omitempty"` StartVersion int `json:"start_version,omitempty"` Status string `json:"status,omitempty"` + StringSharedAs string `json:"string_shared_as,omitempty"` Partition []ResourceShareObjectPartition `json:"partition,omitempty"` } type ResourceShare struct { - CreatedAt int `json:"created_at,omitempty"` - CreatedBy string `json:"created_by,omitempty"` - Id string `json:"id,omitempty"` - Name string `json:"name"` - Owner string `json:"owner,omitempty"` - Object []ResourceShareObject `json:"object,omitempty"` + Comment string `json:"comment,omitempty"` + CreatedAt int `json:"created_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + Id string `json:"id,omitempty"` + Name string `json:"name"` + Owner string `json:"owner,omitempty"` + StorageLocation string `json:"storage_location,omitempty"` + StorageRoot string `json:"storage_root,omitempty"` + UpdatedAt int `json:"updated_at,omitempty"` + UpdatedBy string `json:"updated_by,omitempty"` + Object []ResourceShareObject `json:"object,omitempty"` } diff --git a/bundle/internal/tf/schema/resource_sql_table.go b/bundle/internal/tf/schema/resource_sql_table.go index 51fb3bc0d..4f305c52e 100644 --- a/bundle/internal/tf/schema/resource_sql_table.go +++ b/bundle/internal/tf/schema/resource_sql_table.go @@ -15,6 +15,7 @@ type ResourceSqlTable struct { ClusterKeys []string `json:"cluster_keys,omitempty"` Comment string `json:"comment,omitempty"` DataSourceFormat string `json:"data_source_format,omitempty"` + EffectiveProperties map[string]string `json:"effective_properties,omitempty"` Id string `json:"id,omitempty"` Name string `json:"name"` Options map[string]string `json:"options,omitempty"` diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index ebdb7f095..5fc34d6b4 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -21,7 +21,7 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" -const ProviderVersion = "1.50.0" +const ProviderVersion = "1.52.0" func NewRoot() *Root { return &Root{ From 6c57683dc6077282dd95e03b19396f602dd5d635 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Sat, 21 Sep 2024 08:36:47 +0200 Subject: [PATCH 05/21] Reduce time until the prompt is shown for bundle run (#1727) ## Summary Makes the `databricks bundle run` command use local state before showing the menu prompt, which makes it show more quickly. For large/busy workspaces this means the prompt can show 2-3 seconds earlier. --- cmd/bundle/run.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/cmd/bundle/run.go b/cmd/bundle/run.go index 63458f85c..9ef5eb8ff 100644 --- a/cmd/bundle/run.go +++ b/cmd/bundle/run.go @@ -55,13 +55,7 @@ task or a Python wheel task, the second example applies. return diags.Error() } - diags = bundle.Apply(ctx, b, bundle.Seq( - phases.Initialize(), - terraform.Interpolate(), - terraform.Write(), - terraform.StatePull(), - terraform.Load(terraform.ErrorOnEmptyState), - )) + diags = bundle.Apply(ctx, b, phases.Initialize()) if err := diags.Error(); err != nil { return err } @@ -84,6 +78,16 @@ task or a Python wheel task, the second example applies. return fmt.Errorf("expected a KEY of the resource to run") } + diags = bundle.Apply(ctx, b, bundle.Seq( + terraform.Interpolate(), + terraform.Write(), + terraform.StatePull(), + terraform.Load(terraform.ErrorOnEmptyState), + )) + if err := diags.Error(); err != nil { + return err + } + runner, err := run.Find(b, args[0]) if err != nil { return err From 7665c639bd34392f5d95c177b520d48b9ffa40f4 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Mon, 23 Sep 2024 11:52:04 +0200 Subject: [PATCH 06/21] Use Unity Catalog for pipelines in the default-python template (#1766) ## Summary Enables Unity Catalog for pipelines in the default template. Pipelines will default to non-Unity Catalog pipelines if a catalog is not specified. *Small caveat*: there are cases where admins lock down the default catalog of a workspace and don't allow the creation of a new schema there. If that happens, the pipeline would fail at runtime with a clear error indicating what happened. ("PERMISSION_DENIED: User does not have CREATE SCHEMA on Catalog 'main'."). I've seen this with an internal Databricks workspace, where creating new non-UC schemas wasn't locked down, but creation in the `main` was. ## Testing - Validated on a non-UC + UC workspace. The catalog selection logic here is the same as applied for the SQL templates. --- .../resources/{{.project_name}}_pipeline.yml.tmpl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl index 4b8f74d17..bf4690461 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl @@ -3,6 +3,12 @@ resources: pipelines: {{.project_name}}_pipeline: name: {{.project_name}}_pipeline + {{- if eq default_catalog ""}} + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + # catalog: catalog_name + {{- else}} + catalog: {{default_catalog}} + {{- end}} target: {{.project_name}}_${bundle.environment} libraries: - notebook: From ac80d3dfcb648c26c131ad21aec45449ac5c3b31 Mon Sep 17 00:00:00 2001 From: Ilia Babanov Date: Mon, 23 Sep 2024 12:09:11 +0200 Subject: [PATCH 07/21] Add verbose flag to the "bundle deploy" command (#1774) ## Changes - Extract sync output logic from `cmd/sync` into `lib/sync` - Add hidden `verbose` flag to the `bundle deploy` command, it's false by default and hidden from the `--help` output - Pass output handler to the `deploy/files/upload` mutator if the verbose option is true The was an idea to use in-place output overriding each past file sync event in the output, bit that wont work for the extension, since it doesn't display deploy logs in the terminal. Example output: ``` ~/tmp/defpy: ~/cli/cli bundle deploy --sync-progress Building defpy... Uploading defpy-0.0.1+20240917.112755-py3-none-any.whl... Uploading bundle files to /Users/ilia.babanov@databricks.com/.bundle/defpy/dev/files... Action: PUT: requirements-dev.txt, resources/defpy_pipeline.yml, pytest.ini, src/defpy/main.py, src/defpy/__init__.py, src/dlt_pipeline.ipynb, tests/main_test.py, src/notebook.ipynb, setup.py, resources/defpy_job.yml, .vscode/extensions.json, .vscode/settings.json, fixtures/.gitkeep, .vscode/__builtins__.pyi, README.md, .gitignore, databricks.yml Uploaded tests Uploaded resources Uploaded fixtures Uploaded .vscode Uploaded src/defpy Uploaded requirements-dev.txt Uploaded .gitignore Uploaded fixtures/.gitkeep Uploaded src/defpy/__init__.py Uploaded databricks.yml Uploaded README.md Uploaded setup.py Uploaded .vscode/__builtins__.pyi Uploaded .vscode/extensions.json Uploaded src/dlt_pipeline.ipynb Uploaded .vscode/settings.json Uploaded resources/defpy_job.yml Uploaded pytest.ini Uploaded src/defpy/main.py Uploaded tests/main_test.py Uploaded resources/defpy_pipeline.yml Uploaded src/notebook.ipynb Initial Sync Complete Deploying resources... Updating deployment state... Deployment complete! ``` Output example in the extension: Screenshot 2024-09-19 at 11 07 48 ## Tests Manually for the `sync` and `bundle deploy` commands + vscode extension sync and deploy flows --- bundle/deploy/files/upload.go | 18 ++++++++++---- bundle/phases/deploy.go | 5 ++-- cmd/bundle/deploy.go | 14 ++++++++++- cmd/sync/sync.go | 38 ++++++++++++++--------------- {cmd => libs}/sync/output.go | 6 ++--- libs/sync/sync.go | 45 +++++++++++++++++++++++++---------- 6 files changed, 82 insertions(+), 44 deletions(-) rename {cmd => libs}/sync/output.go (83%) diff --git a/bundle/deploy/files/upload.go b/bundle/deploy/files/upload.go index 2c126623e..77b83611b 100644 --- a/bundle/deploy/files/upload.go +++ b/bundle/deploy/files/upload.go @@ -8,9 +8,12 @@ import ( "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/sync" ) -type upload struct{} +type upload struct { + outputHandler sync.OutputHandler +} func (m *upload) Name() string { return "files.Upload" @@ -18,11 +21,18 @@ func (m *upload) Name() string { func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { cmdio.LogString(ctx, fmt.Sprintf("Uploading bundle files to %s...", b.Config.Workspace.FilePath)) - sync, err := GetSync(ctx, bundle.ReadOnly(b)) + opts, err := GetSyncOptions(ctx, bundle.ReadOnly(b)) if err != nil { return diag.FromErr(err) } + opts.OutputHandler = m.outputHandler + sync, err := sync.New(ctx, *opts) + if err != nil { + return diag.FromErr(err) + } + defer sync.Close() + b.Files, err = sync.RunOnce(ctx) if err != nil { return diag.FromErr(err) @@ -32,6 +42,6 @@ func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return nil } -func Upload() bundle.Mutator { - return &upload{} +func Upload(outputHandler sync.OutputHandler) bundle.Mutator { + return &upload{outputHandler} } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 49544227e..097c561eb 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -18,6 +18,7 @@ import ( "github.com/databricks/cli/bundle/python" "github.com/databricks/cli/bundle/scripts" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/sync" terraformlib "github.com/databricks/cli/libs/terraform" tfjson "github.com/hashicorp/terraform-json" ) @@ -128,7 +129,7 @@ properties such as the 'catalog' or 'storage' are changed:` } // The deploy phase deploys artifacts and resources. -func Deploy() bundle.Mutator { +func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { // Core mutators that CRUD resources and modify deployment state. These // mutators need informed consent if they are potentially destructive. deployCore := bundle.Defer( @@ -157,7 +158,7 @@ func Deploy() bundle.Mutator { libraries.ExpandGlobReferences(), libraries.Upload(), python.TransformWheelTask(), - files.Upload(), + files.Upload(outputHandler), deploy.StateUpdate(), deploy.StatePush(), permissions.ApplyWorkspaceRootPermissions(), diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index 1166875ab..492317347 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/cmd/bundle/utils" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/sync" "github.com/spf13/cobra" ) @@ -25,11 +26,15 @@ func newDeployCommand() *cobra.Command { var failOnActiveRuns bool var computeID string var autoApprove bool + var verbose bool cmd.Flags().BoolVar(&force, "force", false, "Force-override Git branch validation.") cmd.Flags().BoolVar(&forceLock, "force-lock", false, "Force acquisition of deployment lock.") cmd.Flags().BoolVar(&failOnActiveRuns, "fail-on-active-runs", false, "Fail if there are running jobs or pipelines in the deployment.") cmd.Flags().StringVarP(&computeID, "compute-id", "c", "", "Override compute in the deployment with the given compute ID.") cmd.Flags().BoolVar(&autoApprove, "auto-approve", false, "Skip interactive approvals that might be required for deployment.") + cmd.Flags().BoolVar(&verbose, "verbose", false, "Enable verbose output.") + // Verbose flag currently only affects file sync output, it's used by the vscode extension + cmd.Flags().MarkHidden("verbose") cmd.RunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() @@ -51,11 +56,18 @@ func newDeployCommand() *cobra.Command { return nil }) + var outputHandler sync.OutputHandler + if verbose { + outputHandler = func(ctx context.Context, c <-chan sync.Event) { + sync.TextOutput(ctx, c, cmd.OutOrStdout()) + } + } + diags = diags.Extend( bundle.Apply(ctx, b, bundle.Seq( phases.Initialize(), phases.Build(), - phases.Deploy(), + phases.Deploy(outputHandler), )), ) } diff --git a/cmd/sync/sync.go b/cmd/sync/sync.go index 23a4c018f..2092d9e33 100644 --- a/cmd/sync/sync.go +++ b/cmd/sync/sync.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "path/filepath" - stdsync "sync" "time" "github.com/databricks/cli/bundle" @@ -46,6 +45,21 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn return nil, flag.ErrHelp } + var outputFunc func(context.Context, <-chan sync.Event, io.Writer) + switch f.output { + case flags.OutputText: + outputFunc = sync.TextOutput + case flags.OutputJSON: + outputFunc = sync.JsonOutput + } + + var outputHandler sync.OutputHandler + if outputFunc != nil { + outputHandler = func(ctx context.Context, events <-chan sync.Event) { + outputFunc(ctx, events, cmd.OutOrStdout()) + } + } + opts := sync.SyncOptions{ LocalRoot: vfs.MustNew(args[0]), Paths: []string{"."}, @@ -62,6 +76,8 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn // exist and add it to the `.gitignore` file in the root. SnapshotBasePath: filepath.Join(args[0], ".databricks"), WorkspaceClient: root.WorkspaceClient(cmd.Context()), + + OutputHandler: outputHandler, } return &opts, nil } @@ -118,23 +134,7 @@ func New() *cobra.Command { if err != nil { return err } - - var outputFunc func(context.Context, <-chan sync.Event, io.Writer) - switch f.output { - case flags.OutputText: - outputFunc = textOutput - case flags.OutputJSON: - outputFunc = jsonOutput - } - - var wg stdsync.WaitGroup - if outputFunc != nil { - wg.Add(1) - go func() { - defer wg.Done() - outputFunc(ctx, s.Events(), cmd.OutOrStdout()) - }() - } + defer s.Close() if f.watch { err = s.RunContinuous(ctx) @@ -142,8 +142,6 @@ func New() *cobra.Command { _, err = s.RunOnce(ctx) } - s.Close() - wg.Wait() return err } diff --git a/cmd/sync/output.go b/libs/sync/output.go similarity index 83% rename from cmd/sync/output.go rename to libs/sync/output.go index 2785343f9..c01b25ef6 100644 --- a/cmd/sync/output.go +++ b/libs/sync/output.go @@ -5,12 +5,10 @@ import ( "context" "encoding/json" "io" - - "github.com/databricks/cli/libs/sync" ) // Read synchronization events and write them as JSON to the specified writer (typically stdout). -func jsonOutput(ctx context.Context, ch <-chan sync.Event, w io.Writer) { +func JsonOutput(ctx context.Context, ch <-chan Event, w io.Writer) { enc := json.NewEncoder(w) for { select { @@ -31,7 +29,7 @@ func jsonOutput(ctx context.Context, ch <-chan sync.Event, w io.Writer) { } // Read synchronization events and write them as text to the specified writer (typically stdout). -func textOutput(ctx context.Context, ch <-chan sync.Event, w io.Writer) { +func TextOutput(ctx context.Context, ch <-chan Event, w io.Writer) { bw := bufio.NewWriter(w) for { diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 9eaebf2ad..cc9c73944 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -3,6 +3,7 @@ package sync import ( "context" "fmt" + stdsync "sync" "time" "github.com/databricks/cli/libs/filer" @@ -15,6 +16,8 @@ import ( "github.com/databricks/databricks-sdk-go/service/iam" ) +type OutputHandler func(context.Context, <-chan Event) + type SyncOptions struct { LocalRoot vfs.Path Paths []string @@ -34,6 +37,8 @@ type SyncOptions struct { CurrentUser *iam.User Host string + + OutputHandler OutputHandler } type Sync struct { @@ -49,6 +54,10 @@ type Sync struct { // Synchronization progress events are sent to this event notifier. notifier EventNotifier seq int + + // WaitGroup is automatically created when an output handler is provided in the SyncOptions. + // Close call is required to ensure the output handler goroutine handles all events in time. + outputWaitGroup *stdsync.WaitGroup } // New initializes and returns a new [Sync] instance. @@ -106,31 +115,41 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { return nil, err } + var notifier EventNotifier + var outputWaitGroup = &stdsync.WaitGroup{} + if opts.OutputHandler != nil { + ch := make(chan Event, MaxRequestsInFlight) + notifier = &ChannelNotifier{ch} + outputWaitGroup.Add(1) + go func() { + defer outputWaitGroup.Done() + opts.OutputHandler(ctx, ch) + }() + } else { + notifier = &NopNotifier{} + } + return &Sync{ SyncOptions: &opts, - fileSet: fileSet, - includeFileSet: includeFileSet, - excludeFileSet: excludeFileSet, - snapshot: snapshot, - filer: filer, - notifier: &NopNotifier{}, - seq: 0, + fileSet: fileSet, + includeFileSet: includeFileSet, + excludeFileSet: excludeFileSet, + snapshot: snapshot, + filer: filer, + notifier: notifier, + outputWaitGroup: outputWaitGroup, + seq: 0, }, nil } -func (s *Sync) Events() <-chan Event { - ch := make(chan Event, MaxRequestsInFlight) - s.notifier = &ChannelNotifier{ch} - return ch -} - func (s *Sync) Close() { if s.notifier == nil { return } s.notifier.Close() s.notifier = nil + s.outputWaitGroup.Wait() } func (s *Sync) notifyStart(ctx context.Context, d diff) { From 56ed9bebf39b2ef6430e42dbb840684285217436 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Mon, 23 Sep 2024 12:42:34 +0200 Subject: [PATCH 08/21] Added support for creating all-purpose clusters (#1698) ## Changes Added support for creating all-purpose clusters Example of configuration ``` bundle: name: clusters resources: clusters: test_cluster: cluster_name: "Test Cluster" num_workers: 2 node_type_id: "i3.xlarge" autoscale: min_workers: 2 max_workers: 7 spark_version: "13.3.x-scala2.12" spark_conf: "spark.executor.memory": "2g" jobs: test_job: name: "Test Job" tasks: - task_key: test_task existing_cluster_id: ${resources.clusters.test_cluster.id} notebook_task: notebook_path: "./src/test.py" targets: development: mode: development compute_id: ${resources.clusters.test_cluster.id} ``` ## Tests Added unit, config and E2E tests --- bundle/config/bundle.go | 7 +- bundle/config/mutator/apply_presets.go | 15 +++ bundle/config/mutator/compute_id_compat.go | 87 +++++++++++++++++ .../config/mutator/compute_id_compate_test.go | 57 +++++++++++ bundle/config/mutator/mutator.go | 1 + bundle/config/mutator/override_compute.go | 8 +- .../config/mutator/override_compute_test.go | 4 +- .../mutator/process_target_mode_test.go | 10 ++ bundle/config/mutator/run_as_test.go | 2 + bundle/config/resources.go | 1 + bundle/config/resources/clusters.go | 39 ++++++++ bundle/config/root.go | 6 +- bundle/config/target.go | 7 +- bundle/deploy/terraform/convert.go | 22 +++++ bundle/deploy/terraform/convert_test.go | 56 +++++++++++ bundle/deploy/terraform/interpolate.go | 2 + bundle/deploy/terraform/interpolate_test.go | 2 + .../deploy/terraform/tfdyn/convert_cluster.go | 52 ++++++++++ .../terraform/tfdyn/convert_cluster_test.go | 97 +++++++++++++++++++ bundle/tests/clusters/databricks.yml | 36 +++++++ bundle/tests/clusters_test.go | 36 +++++++ cmd/bundle/deploy.go | 11 ++- .../clusters/databricks_template_schema.json | 16 +++ .../clusters/template/databricks.yml.tmpl | 24 +++++ .../bundles/clusters/template/hello_world.py | 1 + internal/bundle/clusters_test.go | 56 +++++++++++ internal/testutil/cloud.go | 4 + 27 files changed, 643 insertions(+), 16 deletions(-) create mode 100644 bundle/config/mutator/compute_id_compat.go create mode 100644 bundle/config/mutator/compute_id_compate_test.go create mode 100644 bundle/config/resources/clusters.go create mode 100644 bundle/deploy/terraform/tfdyn/convert_cluster.go create mode 100644 bundle/deploy/terraform/tfdyn/convert_cluster_test.go create mode 100644 bundle/tests/clusters/databricks.yml create mode 100644 bundle/tests/clusters_test.go create mode 100644 internal/bundle/bundles/clusters/databricks_template_schema.json create mode 100644 internal/bundle/bundles/clusters/template/databricks.yml.tmpl create mode 100644 internal/bundle/bundles/clusters/template/hello_world.py create mode 100644 internal/bundle/clusters_test.go diff --git a/bundle/config/bundle.go b/bundle/config/bundle.go index 78648dfd7..f533c4d18 100644 --- a/bundle/config/bundle.go +++ b/bundle/config/bundle.go @@ -38,8 +38,11 @@ type Bundle struct { // Annotated readonly as this should be set at the target level. Mode Mode `json:"mode,omitempty" bundle:"readonly"` - // Overrides the compute used for jobs and other supported assets. - ComputeID string `json:"compute_id,omitempty"` + // DEPRECATED: Overrides the compute used for jobs and other supported assets. + ComputeId string `json:"compute_id,omitempty"` + + // Overrides the cluster used for jobs and other supported assets. + ClusterId string `json:"cluster_id,omitempty"` // Deployment section specifies deployment related configuration for bundle Deployment Deployment `json:"deployment,omitempty"` diff --git a/bundle/config/mutator/apply_presets.go b/bundle/config/mutator/apply_presets.go index 28d015c10..27af82e54 100644 --- a/bundle/config/mutator/apply_presets.go +++ b/bundle/config/mutator/apply_presets.go @@ -160,6 +160,21 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos // the Databricks UI and via the SQL API. } + // Clusters: Prefix, Tags + for _, c := range r.Clusters { + c.ClusterName = prefix + c.ClusterName + if c.CustomTags == nil { + c.CustomTags = make(map[string]string) + } + for _, tag := range tags { + normalisedKey := b.Tagging.NormalizeKey(tag.Key) + normalisedValue := b.Tagging.NormalizeValue(tag.Value) + if _, ok := c.CustomTags[normalisedKey]; !ok { + c.CustomTags[normalisedKey] = normalisedValue + } + } + } + return nil } diff --git a/bundle/config/mutator/compute_id_compat.go b/bundle/config/mutator/compute_id_compat.go new file mode 100644 index 000000000..3afe02e9e --- /dev/null +++ b/bundle/config/mutator/compute_id_compat.go @@ -0,0 +1,87 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type computeIdToClusterId struct{} + +func ComputeIdToClusterId() bundle.Mutator { + return &computeIdToClusterId{} +} + +func (m *computeIdToClusterId) Name() string { + return "ComputeIdToClusterId" +} + +func (m *computeIdToClusterId) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + + // The "compute_id" key is set; rewrite it to "cluster_id". + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + v, d := rewriteComputeIdToClusterId(v, dyn.NewPath(dyn.Key("bundle"))) + diags = diags.Extend(d) + + // Check if the "compute_id" key is set in any target overrides. + return dyn.MapByPattern(v, dyn.NewPattern(dyn.Key("targets"), dyn.AnyKey()), func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + v, d := rewriteComputeIdToClusterId(v, dyn.Path{}) + diags = diags.Extend(d) + return v, nil + }) + }) + + diags = diags.Extend(diag.FromErr(err)) + return diags +} + +func rewriteComputeIdToClusterId(v dyn.Value, p dyn.Path) (dyn.Value, diag.Diagnostics) { + var diags diag.Diagnostics + computeIdPath := p.Append(dyn.Key("compute_id")) + computeId, err := dyn.GetByPath(v, computeIdPath) + + // If the "compute_id" key is not set, we don't need to do anything. + if err != nil { + return v, nil + } + + if computeId.Kind() == dyn.KindInvalid { + return v, nil + } + + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Warning, + Summary: "compute_id is deprecated, please use cluster_id instead", + Locations: computeId.Locations(), + Paths: []dyn.Path{computeIdPath}, + }) + + clusterIdPath := p.Append(dyn.Key("cluster_id")) + nv, err := dyn.SetByPath(v, clusterIdPath, computeId) + if err != nil { + return dyn.InvalidValue, diag.FromErr(err) + } + // Drop the "compute_id" key. + vout, err := dyn.Walk(nv, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + switch len(p) { + case 0: + return v, nil + case 1: + if p[0] == dyn.Key("compute_id") { + return v, dyn.ErrDrop + } + return v, nil + case 2: + if p[1] == dyn.Key("compute_id") { + return v, dyn.ErrDrop + } + } + return v, dyn.ErrSkip + }) + + diags = diags.Extend(diag.FromErr(err)) + return vout, diags +} diff --git a/bundle/config/mutator/compute_id_compate_test.go b/bundle/config/mutator/compute_id_compate_test.go new file mode 100644 index 000000000..e59d37e39 --- /dev/null +++ b/bundle/config/mutator/compute_id_compate_test.go @@ -0,0 +1,57 @@ +package mutator_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/libs/diag" + "github.com/stretchr/testify/assert" +) + +func TestComputeIdToClusterId(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Bundle: config.Bundle{ + ComputeId: "compute-id", + }, + }, + } + + diags := bundle.Apply(context.Background(), b, mutator.ComputeIdToClusterId()) + assert.NoError(t, diags.Error()) + assert.Equal(t, "compute-id", b.Config.Bundle.ClusterId) + assert.Empty(t, b.Config.Bundle.ComputeId) + + assert.Len(t, diags, 1) + assert.Equal(t, "compute_id is deprecated, please use cluster_id instead", diags[0].Summary) + assert.Equal(t, diag.Warning, diags[0].Severity) +} + +func TestComputeIdToClusterIdInTargetOverride(t *testing.T) { + b := &bundle.Bundle{ + Config: config.Root{ + Targets: map[string]*config.Target{ + "dev": { + ComputeId: "compute-id-dev", + }, + }, + }, + } + + diags := bundle.Apply(context.Background(), b, mutator.ComputeIdToClusterId()) + assert.NoError(t, diags.Error()) + assert.Empty(t, b.Config.Targets["dev"].ComputeId) + + diags = diags.Extend(bundle.Apply(context.Background(), b, mutator.SelectTarget("dev"))) + assert.NoError(t, diags.Error()) + + assert.Equal(t, "compute-id-dev", b.Config.Bundle.ClusterId) + assert.Empty(t, b.Config.Bundle.ComputeId) + + assert.Len(t, diags, 1) + assert.Equal(t, "compute_id is deprecated, please use cluster_id instead", diags[0].Summary) + assert.Equal(t, diag.Warning, diags[0].Severity) +} diff --git a/bundle/config/mutator/mutator.go b/bundle/config/mutator/mutator.go index 0458beff4..faf50ae6e 100644 --- a/bundle/config/mutator/mutator.go +++ b/bundle/config/mutator/mutator.go @@ -23,6 +23,7 @@ func DefaultMutators() []bundle.Mutator { VerifyCliVersion(), EnvironmentsToTargets(), + ComputeIdToClusterId(), InitializeVariables(), DefineDefaultTarget(), LoadGitDetails(), diff --git a/bundle/config/mutator/override_compute.go b/bundle/config/mutator/override_compute.go index 73fbad364..5700cdf26 100644 --- a/bundle/config/mutator/override_compute.go +++ b/bundle/config/mutator/override_compute.go @@ -39,22 +39,22 @@ func overrideJobCompute(j *resources.Job, compute string) { func (m *overrideCompute) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { if b.Config.Bundle.Mode != config.Development { - if b.Config.Bundle.ComputeID != "" { + if b.Config.Bundle.ClusterId != "" { return diag.Errorf("cannot override compute for an target that does not use 'mode: development'") } return nil } if v := env.Get(ctx, "DATABRICKS_CLUSTER_ID"); v != "" { - b.Config.Bundle.ComputeID = v + b.Config.Bundle.ClusterId = v } - if b.Config.Bundle.ComputeID == "" { + if b.Config.Bundle.ClusterId == "" { return nil } r := b.Config.Resources for i := range r.Jobs { - overrideJobCompute(r.Jobs[i], b.Config.Bundle.ComputeID) + overrideJobCompute(r.Jobs[i], b.Config.Bundle.ClusterId) } return nil diff --git a/bundle/config/mutator/override_compute_test.go b/bundle/config/mutator/override_compute_test.go index 152ee543e..369447d7e 100644 --- a/bundle/config/mutator/override_compute_test.go +++ b/bundle/config/mutator/override_compute_test.go @@ -20,7 +20,7 @@ func TestOverrideDevelopment(t *testing.T) { Config: config.Root{ Bundle: config.Bundle{ Mode: config.Development, - ComputeID: "newClusterID", + ClusterId: "newClusterID", }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -144,7 +144,7 @@ func TestOverrideProduction(t *testing.T) { b := &bundle.Bundle{ Config: config.Root{ Bundle: config.Bundle{ - ComputeID: "newClusterID", + ClusterId: "newClusterID", }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 42f1929c8..b0eb57ee1 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -13,6 +13,7 @@ import ( "github.com/databricks/cli/libs/tags" sdkconfig "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/service/catalog" + "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/iam" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/ml" @@ -119,6 +120,9 @@ func mockBundle(mode config.Mode) *bundle.Bundle { Schemas: map[string]*resources.Schema{ "schema1": {CreateSchema: &catalog.CreateSchema{Name: "schema1"}}, }, + Clusters: map[string]*resources.Cluster{ + "cluster1": {ClusterSpec: &compute.ClusterSpec{ClusterName: "cluster1", SparkVersion: "13.2.x", NumWorkers: 1}}, + }, }, }, // Use AWS implementation for testing. @@ -177,6 +181,9 @@ func TestProcessTargetModeDevelopment(t *testing.T) { // Schema 1 assert.Equal(t, "dev_lennart_schema1", b.Config.Resources.Schemas["schema1"].Name) + + // Clusters + assert.Equal(t, "[dev lennart] cluster1", b.Config.Resources.Clusters["cluster1"].ClusterName) } func TestProcessTargetModeDevelopmentTagNormalizationForAws(t *testing.T) { @@ -281,6 +288,7 @@ func TestProcessTargetModeDefault(t *testing.T) { assert.Equal(t, "servingendpoint1", b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Name) assert.Equal(t, "registeredmodel1", b.Config.Resources.RegisteredModels["registeredmodel1"].Name) assert.Equal(t, "qualityMonitor1", b.Config.Resources.QualityMonitors["qualityMonitor1"].TableName) + assert.Equal(t, "cluster1", b.Config.Resources.Clusters["cluster1"].ClusterName) } func TestProcessTargetModeProduction(t *testing.T) { @@ -312,6 +320,7 @@ func TestProcessTargetModeProduction(t *testing.T) { b.Config.Resources.Experiments["experiment2"].Permissions = permissions b.Config.Resources.Models["model1"].Permissions = permissions b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Permissions = permissions + b.Config.Resources.Clusters["cluster1"].Permissions = permissions diags = validateProductionMode(context.Background(), b, false) require.NoError(t, diags.Error()) @@ -322,6 +331,7 @@ func TestProcessTargetModeProduction(t *testing.T) { assert.Equal(t, "servingendpoint1", b.Config.Resources.ModelServingEndpoints["servingendpoint1"].Name) assert.Equal(t, "registeredmodel1", b.Config.Resources.RegisteredModels["registeredmodel1"].Name) assert.Equal(t, "qualityMonitor1", b.Config.Resources.QualityMonitors["qualityMonitor1"].TableName) + assert.Equal(t, "cluster1", b.Config.Resources.Clusters["cluster1"].ClusterName) } func TestProcessTargetModeProductionOkForPrincipal(t *testing.T) { diff --git a/bundle/config/mutator/run_as_test.go b/bundle/config/mutator/run_as_test.go index e6cef9ba4..abeea45d0 100644 --- a/bundle/config/mutator/run_as_test.go +++ b/bundle/config/mutator/run_as_test.go @@ -32,6 +32,7 @@ func allResourceTypes(t *testing.T) []string { // the dyn library gives us the correct list of all resources supported. Please // also update this check when adding a new resource require.Equal(t, []string{ + "clusters", "experiments", "jobs", "model_serving_endpoints", @@ -133,6 +134,7 @@ func TestRunAsErrorForUnsupportedResources(t *testing.T) { // some point in the future. These resources are (implicitly) on the deny list, since // they are not on the allow list below. allowList := []string{ + "clusters", "jobs", "models", "registered_models", diff --git a/bundle/config/resources.go b/bundle/config/resources.go index 22d69ffb5..a3afb7fc3 100644 --- a/bundle/config/resources.go +++ b/bundle/config/resources.go @@ -19,6 +19,7 @@ type Resources struct { RegisteredModels map[string]*resources.RegisteredModel `json:"registered_models,omitempty"` QualityMonitors map[string]*resources.QualityMonitor `json:"quality_monitors,omitempty"` Schemas map[string]*resources.Schema `json:"schemas,omitempty"` + Clusters map[string]*resources.Cluster `json:"clusters,omitempty"` } type ConfigResource interface { diff --git a/bundle/config/resources/clusters.go b/bundle/config/resources/clusters.go new file mode 100644 index 000000000..632345666 --- /dev/null +++ b/bundle/config/resources/clusters.go @@ -0,0 +1,39 @@ +package resources + +import ( + "context" + + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/marshal" + "github.com/databricks/databricks-sdk-go/service/compute" +) + +type Cluster struct { + ID string `json:"id,omitempty" bundle:"readonly"` + Permissions []Permission `json:"permissions,omitempty"` + ModifiedStatus ModifiedStatus `json:"modified_status,omitempty" bundle:"internal"` + + *compute.ClusterSpec +} + +func (s *Cluster) UnmarshalJSON(b []byte) error { + return marshal.Unmarshal(b, s) +} + +func (s Cluster) MarshalJSON() ([]byte, error) { + return marshal.Marshal(s) +} + +func (s *Cluster) Exists(ctx context.Context, w *databricks.WorkspaceClient, id string) (bool, error) { + _, err := w.Clusters.GetByClusterId(ctx, id) + if err != nil { + log.Debugf(ctx, "cluster %s does not exist", id) + return false, err + } + return true, nil +} + +func (s *Cluster) TerraformResourceName() string { + return "databricks_cluster" +} diff --git a/bundle/config/root.go b/bundle/config/root.go index 884c2e1ca..92d834f0a 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -366,9 +366,9 @@ func (r *Root) MergeTargetOverrides(name string) error { } } - // Merge `compute_id`. This field must be overwritten if set, not merged. - if v := target.Get("compute_id"); v.Kind() != dyn.KindInvalid { - root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("compute_id")), v) + // Merge `cluster_id`. This field must be overwritten if set, not merged. + if v := target.Get("cluster_id"); v.Kind() != dyn.KindInvalid { + root, err = dyn.SetByPath(root, dyn.NewPath(dyn.Key("bundle"), dyn.Key("cluster_id")), v) if err != nil { return err } diff --git a/bundle/config/target.go b/bundle/config/target.go index fc6ba7b5b..fae9c940b 100644 --- a/bundle/config/target.go +++ b/bundle/config/target.go @@ -24,8 +24,11 @@ type Target struct { // name prefix of deployed resources. Presets Presets `json:"presets,omitempty"` - // Overrides the compute used for jobs and other supported assets. - ComputeID string `json:"compute_id,omitempty"` + // DEPRECATED: Overrides the compute used for jobs and other supported assets. + ComputeId string `json:"compute_id,omitempty"` + + // Overrides the cluster used for jobs and other supported assets. + ClusterId string `json:"cluster_id,omitempty"` Bundle *Bundle `json:"bundle,omitempty"` diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index f13c241ce..5a548e3b5 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -231,6 +231,13 @@ func BundleToTerraform(config *config.Root) *schema.Root { tfroot.Resource.QualityMonitor[k] = &dst } + for k, src := range config.Resources.Clusters { + noResources = false + var dst schema.ResourceCluster + conv(src, &dst) + tfroot.Resource.Cluster[k] = &dst + } + // We explicitly set "resource" to nil to omit it from a JSON encoding. // This is required because the terraform CLI requires >= 1 resources defined // if the "resource" property is used in a .tf.json file. @@ -394,6 +401,16 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { } cur.ID = instance.Attributes.ID config.Resources.Schemas[resource.Name] = cur + case "databricks_cluster": + if config.Resources.Clusters == nil { + config.Resources.Clusters = make(map[string]*resources.Cluster) + } + cur := config.Resources.Clusters[resource.Name] + if cur == nil { + cur = &resources.Cluster{ModifiedStatus: resources.ModifiedStatusDeleted} + } + cur.ID = instance.Attributes.ID + config.Resources.Clusters[resource.Name] = cur case "databricks_permissions": case "databricks_grants": // Ignore; no need to pull these back into the configuration. @@ -443,6 +460,11 @@ func TerraformToBundle(state *resourcesState, config *config.Root) error { src.ModifiedStatus = resources.ModifiedStatusCreated } } + for _, src := range config.Resources.Clusters { + if src.ModifiedStatus == "" && src.ID == "" { + src.ModifiedStatus = resources.ModifiedStatusCreated + } + } return nil } diff --git a/bundle/deploy/terraform/convert_test.go b/bundle/deploy/terraform/convert_test.go index e4ef6114a..4c6866d9d 100644 --- a/bundle/deploy/terraform/convert_test.go +++ b/bundle/deploy/terraform/convert_test.go @@ -663,6 +663,14 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "1"}}, }, }, + { + Type: "databricks_cluster", + Mode: "managed", + Name: "test_cluster", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "1"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -692,6 +700,9 @@ func TestTerraformToBundleEmptyLocalResources(t *testing.T) { assert.Equal(t, "1", config.Resources.Schemas["test_schema"].ID) assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Schemas["test_schema"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.Clusters["test_cluster"].ID) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Clusters["test_cluster"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -754,6 +765,13 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { }, }, }, + Clusters: map[string]*resources.Cluster{ + "test_cluster": { + ClusterSpec: &compute.ClusterSpec{ + ClusterName: "test_cluster", + }, + }, + }, }, } var tfState = resourcesState{ @@ -786,6 +804,9 @@ func TestTerraformToBundleEmptyRemoteResources(t *testing.T) { assert.Equal(t, "", config.Resources.Schemas["test_schema"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Schemas["test_schema"].ModifiedStatus) + assert.Equal(t, "", config.Resources.Clusters["test_cluster"].ID) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Clusters["test_cluster"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } @@ -888,6 +909,18 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { }, }, }, + Clusters: map[string]*resources.Cluster{ + "test_cluster": { + ClusterSpec: &compute.ClusterSpec{ + ClusterName: "test_cluster", + }, + }, + "test_cluster_new": { + ClusterSpec: &compute.ClusterSpec{ + ClusterName: "test_cluster_new", + }, + }, + }, }, } var tfState = resourcesState{ @@ -1020,6 +1053,22 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { {Attributes: stateInstanceAttributes{ID: "2"}}, }, }, + { + Type: "databricks_cluster", + Mode: "managed", + Name: "test_cluster", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "1"}}, + }, + }, + { + Type: "databricks_cluster", + Mode: "managed", + Name: "test_cluster_old", + Instances: []stateResourceInstance{ + {Attributes: stateInstanceAttributes{ID: "2"}}, + }, + }, }, } err := TerraformToBundle(&tfState, &config) @@ -1081,6 +1130,13 @@ func TestTerraformToBundleModifiedResources(t *testing.T) { assert.Equal(t, "", config.Resources.Schemas["test_schema_new"].ID) assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Schemas["test_schema_new"].ModifiedStatus) + assert.Equal(t, "1", config.Resources.Clusters["test_cluster"].ID) + assert.Equal(t, "", config.Resources.Clusters["test_cluster"].ModifiedStatus) + assert.Equal(t, "2", config.Resources.Clusters["test_cluster_old"].ID) + assert.Equal(t, resources.ModifiedStatusDeleted, config.Resources.Clusters["test_cluster_old"].ModifiedStatus) + assert.Equal(t, "", config.Resources.Clusters["test_cluster_new"].ID) + assert.Equal(t, resources.ModifiedStatusCreated, config.Resources.Clusters["test_cluster_new"].ModifiedStatus) + AssertFullResourceCoverage(t, &config) } diff --git a/bundle/deploy/terraform/interpolate.go b/bundle/deploy/terraform/interpolate.go index faa098e1c..12894c684 100644 --- a/bundle/deploy/terraform/interpolate.go +++ b/bundle/deploy/terraform/interpolate.go @@ -58,6 +58,8 @@ func (m *interpolateMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.D path = dyn.NewPath(dyn.Key("databricks_quality_monitor")).Append(path[2:]...) case dyn.Key("schemas"): path = dyn.NewPath(dyn.Key("databricks_schema")).Append(path[2:]...) + case dyn.Key("clusters"): + path = dyn.NewPath(dyn.Key("databricks_cluster")).Append(path[2:]...) default: // Trigger "key not found" for unknown resource types. return dyn.GetByPath(root, path) diff --git a/bundle/deploy/terraform/interpolate_test.go b/bundle/deploy/terraform/interpolate_test.go index 5ceb243bc..630a904ac 100644 --- a/bundle/deploy/terraform/interpolate_test.go +++ b/bundle/deploy/terraform/interpolate_test.go @@ -31,6 +31,7 @@ func TestInterpolate(t *testing.T) { "other_model_serving": "${resources.model_serving_endpoints.other_model_serving.id}", "other_registered_model": "${resources.registered_models.other_registered_model.id}", "other_schema": "${resources.schemas.other_schema.id}", + "other_cluster": "${resources.clusters.other_cluster.id}", }, Tasks: []jobs.Task{ { @@ -67,6 +68,7 @@ func TestInterpolate(t *testing.T) { assert.Equal(t, "${databricks_model_serving.other_model_serving.id}", j.Tags["other_model_serving"]) assert.Equal(t, "${databricks_registered_model.other_registered_model.id}", j.Tags["other_registered_model"]) assert.Equal(t, "${databricks_schema.other_schema.id}", j.Tags["other_schema"]) + assert.Equal(t, "${databricks_cluster.other_cluster.id}", j.Tags["other_cluster"]) m := b.Config.Resources.Models["my_model"] assert.Equal(t, "my_model", m.Model.Name) diff --git a/bundle/deploy/terraform/tfdyn/convert_cluster.go b/bundle/deploy/terraform/tfdyn/convert_cluster.go new file mode 100644 index 000000000..f25f09ea8 --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_cluster.go @@ -0,0 +1,52 @@ +package tfdyn + +import ( + "context" + "fmt" + + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go/service/compute" +) + +func convertClusterResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { + // Normalize the output value to the target schema. + vout, diags := convert.Normalize(compute.ClusterSpec{}, vin) + for _, diag := range diags { + log.Debugf(ctx, "cluster normalization diagnostic: %s", diag.Summary) + } + + return vout, nil +} + +type clusterConverter struct{} + +func (clusterConverter) Convert(ctx context.Context, key string, vin dyn.Value, out *schema.Resources) error { + vout, err := convertClusterResource(ctx, vin) + if err != nil { + return err + } + + // We always set no_wait as it allows DABs not to wait for cluster to be started. + vout, err = dyn.Set(vout, "no_wait", dyn.V(true)) + if err != nil { + return err + } + + // Add the converted resource to the output. + out.Cluster[key] = vout.AsAny() + + // Configure permissions for this resource. + if permissions := convertPermissionsResource(ctx, vin); permissions != nil { + permissions.JobId = fmt.Sprintf("${databricks_cluster.%s.id}", key) + out.Permissions["cluster_"+key] = permissions + } + + return nil +} + +func init() { + registerConverter("clusters", clusterConverter{}) +} diff --git a/bundle/deploy/terraform/tfdyn/convert_cluster_test.go b/bundle/deploy/terraform/tfdyn/convert_cluster_test.go new file mode 100644 index 000000000..e7d2542fd --- /dev/null +++ b/bundle/deploy/terraform/tfdyn/convert_cluster_test.go @@ -0,0 +1,97 @@ +package tfdyn + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/internal/tf/schema" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/convert" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestConvertCluster(t *testing.T) { + var src = resources.Cluster{ + ClusterSpec: &compute.ClusterSpec{ + NumWorkers: 3, + SparkVersion: "13.3.x-scala2.12", + ClusterName: "cluster", + SparkConf: map[string]string{ + "spark.executor.memory": "2g", + }, + AwsAttributes: &compute.AwsAttributes{ + Availability: "ON_DEMAND", + }, + AzureAttributes: &compute.AzureAttributes{ + Availability: "SPOT", + }, + DataSecurityMode: "USER_ISOLATION", + NodeTypeId: "m5.xlarge", + Autoscale: &compute.AutoScale{ + MinWorkers: 1, + MaxWorkers: 10, + }, + }, + + Permissions: []resources.Permission{ + { + Level: "CAN_RUN", + UserName: "jack@gmail.com", + }, + { + Level: "CAN_MANAGE", + ServicePrincipalName: "sp", + }, + }, + } + + vin, err := convert.FromTyped(src, dyn.NilValue) + require.NoError(t, err) + + ctx := context.Background() + out := schema.NewResources() + err = clusterConverter{}.Convert(ctx, "my_cluster", vin, out) + require.NoError(t, err) + + cluster := out.Cluster["my_cluster"] + assert.Equal(t, map[string]any{ + "num_workers": int64(3), + "spark_version": "13.3.x-scala2.12", + "cluster_name": "cluster", + "spark_conf": map[string]any{ + "spark.executor.memory": "2g", + }, + "aws_attributes": map[string]any{ + "availability": "ON_DEMAND", + }, + "azure_attributes": map[string]any{ + "availability": "SPOT", + }, + "data_security_mode": "USER_ISOLATION", + "no_wait": true, + "node_type_id": "m5.xlarge", + "autoscale": map[string]any{ + "min_workers": int64(1), + "max_workers": int64(10), + }, + }, cluster) + + // Assert equality on the permissions + assert.Equal(t, &schema.ResourcePermissions{ + JobId: "${databricks_cluster.my_cluster.id}", + AccessControl: []schema.ResourcePermissionsAccessControl{ + { + PermissionLevel: "CAN_RUN", + UserName: "jack@gmail.com", + }, + { + PermissionLevel: "CAN_MANAGE", + ServicePrincipalName: "sp", + }, + }, + }, out.Permissions["cluster_my_cluster"]) + +} diff --git a/bundle/tests/clusters/databricks.yml b/bundle/tests/clusters/databricks.yml new file mode 100644 index 000000000..1074462a6 --- /dev/null +++ b/bundle/tests/clusters/databricks.yml @@ -0,0 +1,36 @@ +bundle: + name: clusters + +workspace: + host: https://acme.cloud.databricks.com/ + +resources: + clusters: + foo: + cluster_name: foo + num_workers: 2 + node_type_id: "i3.xlarge" + autoscale: + min_workers: 2 + max_workers: 7 + spark_version: "13.3.x-scala2.12" + spark_conf: + "spark.executor.memory": "2g" + +targets: + default: + + development: + resources: + clusters: + foo: + cluster_name: foo-override + num_workers: 3 + node_type_id: "m5.xlarge" + autoscale: + min_workers: 1 + max_workers: 3 + spark_version: "15.2.x-scala2.12" + spark_conf: + "spark.executor.memory": "4g" + "spark.executor.memory2": "4g" diff --git a/bundle/tests/clusters_test.go b/bundle/tests/clusters_test.go new file mode 100644 index 000000000..def8a2a31 --- /dev/null +++ b/bundle/tests/clusters_test.go @@ -0,0 +1,36 @@ +package config_tests + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestClusters(t *testing.T) { + b := load(t, "./clusters") + assert.Equal(t, "clusters", b.Config.Bundle.Name) + + cluster := b.Config.Resources.Clusters["foo"] + assert.Equal(t, "foo", cluster.ClusterName) + assert.Equal(t, "13.3.x-scala2.12", cluster.SparkVersion) + assert.Equal(t, "i3.xlarge", cluster.NodeTypeId) + assert.Equal(t, 2, cluster.NumWorkers) + assert.Equal(t, "2g", cluster.SparkConf["spark.executor.memory"]) + assert.Equal(t, 2, cluster.Autoscale.MinWorkers) + assert.Equal(t, 7, cluster.Autoscale.MaxWorkers) +} + +func TestClustersOverride(t *testing.T) { + b := loadTarget(t, "./clusters", "development") + assert.Equal(t, "clusters", b.Config.Bundle.Name) + + cluster := b.Config.Resources.Clusters["foo"] + assert.Equal(t, "foo-override", cluster.ClusterName) + assert.Equal(t, "15.2.x-scala2.12", cluster.SparkVersion) + assert.Equal(t, "m5.xlarge", cluster.NodeTypeId) + assert.Equal(t, 3, cluster.NumWorkers) + assert.Equal(t, "4g", cluster.SparkConf["spark.executor.memory"]) + assert.Equal(t, "4g", cluster.SparkConf["spark.executor.memory2"]) + assert.Equal(t, 1, cluster.Autoscale.MinWorkers) + assert.Equal(t, 3, cluster.Autoscale.MaxWorkers) +} diff --git a/cmd/bundle/deploy.go b/cmd/bundle/deploy.go index 492317347..f1c85cb3d 100644 --- a/cmd/bundle/deploy.go +++ b/cmd/bundle/deploy.go @@ -24,14 +24,16 @@ func newDeployCommand() *cobra.Command { var force bool var forceLock bool var failOnActiveRuns bool - var computeID string + var clusterId string var autoApprove bool var verbose bool cmd.Flags().BoolVar(&force, "force", false, "Force-override Git branch validation.") cmd.Flags().BoolVar(&forceLock, "force-lock", false, "Force acquisition of deployment lock.") cmd.Flags().BoolVar(&failOnActiveRuns, "fail-on-active-runs", false, "Fail if there are running jobs or pipelines in the deployment.") - cmd.Flags().StringVarP(&computeID, "compute-id", "c", "", "Override compute in the deployment with the given compute ID.") + cmd.Flags().StringVar(&clusterId, "compute-id", "", "Override cluster in the deployment with the given compute ID.") + cmd.Flags().StringVarP(&clusterId, "cluster-id", "c", "", "Override cluster in the deployment with the given cluster ID.") cmd.Flags().BoolVar(&autoApprove, "auto-approve", false, "Skip interactive approvals that might be required for deployment.") + cmd.Flags().MarkDeprecated("compute-id", "use --cluster-id instead") cmd.Flags().BoolVar(&verbose, "verbose", false, "Enable verbose output.") // Verbose flag currently only affects file sync output, it's used by the vscode extension cmd.Flags().MarkHidden("verbose") @@ -47,7 +49,10 @@ func newDeployCommand() *cobra.Command { b.AutoApprove = autoApprove if cmd.Flag("compute-id").Changed { - b.Config.Bundle.ComputeID = computeID + b.Config.Bundle.ClusterId = clusterId + } + if cmd.Flag("cluster-id").Changed { + b.Config.Bundle.ClusterId = clusterId } if cmd.Flag("fail-on-active-runs").Changed { b.Config.Bundle.Deployment.FailOnActiveRuns = failOnActiveRuns diff --git a/internal/bundle/bundles/clusters/databricks_template_schema.json b/internal/bundle/bundles/clusters/databricks_template_schema.json new file mode 100644 index 000000000..c1c5cf12e --- /dev/null +++ b/internal/bundle/bundles/clusters/databricks_template_schema.json @@ -0,0 +1,16 @@ +{ + "properties": { + "unique_id": { + "type": "string", + "description": "Unique ID for job name" + }, + "spark_version": { + "type": "string", + "description": "Spark version used for job cluster" + }, + "node_type_id": { + "type": "string", + "description": "Node type id for job cluster" + } + } +} diff --git a/internal/bundle/bundles/clusters/template/databricks.yml.tmpl b/internal/bundle/bundles/clusters/template/databricks.yml.tmpl new file mode 100644 index 000000000..e0d6320a3 --- /dev/null +++ b/internal/bundle/bundles/clusters/template/databricks.yml.tmpl @@ -0,0 +1,24 @@ +bundle: + name: basic + +workspace: + root_path: "~/.bundle/{{.unique_id}}" + +resources: + clusters: + test_cluster: + cluster_name: "test-cluster-{{.unique_id}}" + spark_version: "{{.spark_version}}" + node_type_id: "{{.node_type_id}}" + num_workers: 2 + spark_conf: + "spark.executor.memory": "2g" + + jobs: + foo: + name: test-job-with-cluster-{{.unique_id}} + tasks: + - task_key: my_notebook_task + existing_cluster_id: "${resources.clusters.test_cluster.cluster_id}" + spark_python_task: + python_file: ./hello_world.py diff --git a/internal/bundle/bundles/clusters/template/hello_world.py b/internal/bundle/bundles/clusters/template/hello_world.py new file mode 100644 index 000000000..f301245e2 --- /dev/null +++ b/internal/bundle/bundles/clusters/template/hello_world.py @@ -0,0 +1 @@ +print("Hello World!") diff --git a/internal/bundle/clusters_test.go b/internal/bundle/clusters_test.go new file mode 100644 index 000000000..a961f3ea8 --- /dev/null +++ b/internal/bundle/clusters_test.go @@ -0,0 +1,56 @@ +package bundle + +import ( + "fmt" + "testing" + + "github.com/databricks/cli/internal" + "github.com/databricks/cli/internal/acc" + "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/env" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/google/uuid" + "github.com/stretchr/testify/require" +) + +func TestAccDeployBundleWithCluster(t *testing.T) { + ctx, wt := acc.WorkspaceTest(t) + + if testutil.IsAWSCloud(wt.T) { + t.Skip("Skipping test for AWS cloud because it is not permitted to create clusters") + } + + nodeTypeId := internal.GetNodeTypeId(env.Get(ctx, "CLOUD_ENV")) + uniqueId := uuid.New().String() + root, err := initTestTemplate(t, ctx, "clusters", map[string]any{ + "unique_id": uniqueId, + "node_type_id": nodeTypeId, + "spark_version": defaultSparkVersion, + }) + require.NoError(t, err) + + t.Cleanup(func() { + err = destroyBundle(t, ctx, root) + require.NoError(t, err) + + cluster, err := wt.W.Clusters.GetByClusterName(ctx, fmt.Sprintf("test-cluster-%s", uniqueId)) + if err != nil { + require.ErrorContains(t, err, "does not exist") + } else { + require.Contains(t, []compute.State{compute.StateTerminated, compute.StateTerminating}, cluster.State) + } + + }) + + err = deployBundle(t, ctx, root) + require.NoError(t, err) + + // Cluster should exists after bundle deployment + cluster, err := wt.W.Clusters.GetByClusterName(ctx, fmt.Sprintf("test-cluster-%s", uniqueId)) + require.NoError(t, err) + require.NotNil(t, cluster) + + out, err := runResource(t, ctx, root, "foo") + require.NoError(t, err) + require.Contains(t, out, "Hello World!") +} diff --git a/internal/testutil/cloud.go b/internal/testutil/cloud.go index e547069f3..ba5b75ecf 100644 --- a/internal/testutil/cloud.go +++ b/internal/testutil/cloud.go @@ -49,3 +49,7 @@ func GetCloud(t *testing.T) Cloud { } return -1 } + +func IsAWSCloud(t *testing.T) bool { + return GetCloud(t) == AWS +} From 0cc35ca05693e5989308f432f22bb0a28f8cb1dd Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:12:30 +0530 Subject: [PATCH 09/21] Assert tokens are redacted in origin URL when username is not specified (#1785) TSIA --- libs/git/repository_test.go | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/libs/git/repository_test.go b/libs/git/repository_test.go index a28038eeb..93d9a03dc 100644 --- a/libs/git/repository_test.go +++ b/libs/git/repository_test.go @@ -209,7 +209,26 @@ func TestRepositoryGitConfigWhenNotARepo(t *testing.T) { } func TestRepositoryOriginUrlRemovesUserCreds(t *testing.T) { - repo := newTestRepository(t) - repo.addOriginUrl("https://username:token@github.com/databricks/foobar.git") - repo.assertOriginUrl("https://github.com/databricks/foobar.git") + tcases := []struct { + url string + expected string + }{ + { + url: "https://username:token@github.com/databricks/foobar.git", + expected: "https://github.com/databricks/foobar.git", + }, + { + // Note: The token is still considered and parsed as a username here. + // However credentials integrations by Git providers like GitHub + // allow for setting a PAT token as a username. + url: "https://token@github.com/databricks/foobar.git", + expected: "https://github.com/databricks/foobar.git", + }, + } + + for _, tc := range tcases { + repo := newTestRepository(t) + repo.addOriginUrl(tc.url) + repo.assertOriginUrl(tc.expected) + } } From 490259a14aec0a53fe6bd97f9d1ea5a384e74773 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Tue, 24 Sep 2024 15:51:54 +0200 Subject: [PATCH 10/21] Refactor jobs path translation (#1782) ## Changes Extract package for other modules to transform different kinds of paths in job resources. ## Tests Unit tests --- .../config/mutator/paths/job_paths_visitor.go | 115 ++++++++++++ .../mutator/paths/job_paths_visitor_test.go | 168 ++++++++++++++++++ bundle/config/mutator/paths/visitor.go | 26 +++ bundle/config/mutator/translate_paths_jobs.go | 137 ++++---------- 4 files changed, 340 insertions(+), 106 deletions(-) create mode 100644 bundle/config/mutator/paths/job_paths_visitor.go create mode 100644 bundle/config/mutator/paths/job_paths_visitor_test.go create mode 100644 bundle/config/mutator/paths/visitor.go diff --git a/bundle/config/mutator/paths/job_paths_visitor.go b/bundle/config/mutator/paths/job_paths_visitor.go new file mode 100644 index 000000000..275a8fa53 --- /dev/null +++ b/bundle/config/mutator/paths/job_paths_visitor.go @@ -0,0 +1,115 @@ +package paths + +import ( + "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/libs/dyn" +) + +type jobRewritePattern struct { + pattern dyn.Pattern + kind PathKind + skipRewrite func(string) bool +} + +func noSkipRewrite(string) bool { + return false +} + +func jobTaskRewritePatterns(base dyn.Pattern) []jobRewritePattern { + return []jobRewritePattern{ + { + base.Append(dyn.Key("notebook_task"), dyn.Key("notebook_path")), + PathKindNotebook, + noSkipRewrite, + }, + { + base.Append(dyn.Key("spark_python_task"), dyn.Key("python_file")), + PathKindWorkspaceFile, + noSkipRewrite, + }, + { + base.Append(dyn.Key("dbt_task"), dyn.Key("project_directory")), + PathKindDirectory, + noSkipRewrite, + }, + { + base.Append(dyn.Key("sql_task"), dyn.Key("file"), dyn.Key("path")), + PathKindWorkspaceFile, + noSkipRewrite, + }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("whl")), + PathKindLibrary, + noSkipRewrite, + }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("jar")), + PathKindLibrary, + noSkipRewrite, + }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")), + PathKindWorkspaceFile, + noSkipRewrite, + }, + } +} + +func jobRewritePatterns() []jobRewritePattern { + // Base pattern to match all tasks in all jobs. + base := dyn.NewPattern( + dyn.Key("resources"), + dyn.Key("jobs"), + dyn.AnyKey(), + dyn.Key("tasks"), + dyn.AnyIndex(), + ) + + // Compile list of patterns and their respective rewrite functions. + jobEnvironmentsPatterns := []jobRewritePattern{ + { + dyn.NewPattern( + dyn.Key("resources"), + dyn.Key("jobs"), + dyn.AnyKey(), + dyn.Key("environments"), + dyn.AnyIndex(), + dyn.Key("spec"), + dyn.Key("dependencies"), + dyn.AnyIndex(), + ), + PathKindWithPrefix, + func(s string) bool { + return !libraries.IsLibraryLocal(s) + }, + }, + } + + taskPatterns := jobTaskRewritePatterns(base) + forEachPatterns := jobTaskRewritePatterns(base.Append(dyn.Key("for_each_task"), dyn.Key("task"))) + allPatterns := append(taskPatterns, jobEnvironmentsPatterns...) + allPatterns = append(allPatterns, forEachPatterns...) + return allPatterns +} + +// VisitJobPaths visits all paths in job resources and applies a function to each path. +func VisitJobPaths(value dyn.Value, fn VisitFunc) (dyn.Value, error) { + var err error + var newValue = value + + for _, rewritePattern := range jobRewritePatterns() { + newValue, err = dyn.MapByPattern(newValue, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + if rewritePattern.skipRewrite(v.MustString()) { + return v, nil + } + + return fn(p, rewritePattern.kind, v) + }) + + if err != nil { + return dyn.InvalidValue, err + } + } + + return newValue, nil +} diff --git a/bundle/config/mutator/paths/job_paths_visitor_test.go b/bundle/config/mutator/paths/job_paths_visitor_test.go new file mode 100644 index 000000000..7f0201579 --- /dev/null +++ b/bundle/config/mutator/paths/job_paths_visitor_test.go @@ -0,0 +1,168 @@ +package paths + +import ( + "testing" + + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/require" +) + +func TestVisitJobPaths(t *testing.T) { + task0 := jobs.Task{ + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "abc", + }, + } + task1 := jobs.Task{ + SparkPythonTask: &jobs.SparkPythonTask{ + PythonFile: "abc", + }, + } + task2 := jobs.Task{ + DbtTask: &jobs.DbtTask{ + ProjectDirectory: "abc", + }, + } + task3 := jobs.Task{ + SqlTask: &jobs.SqlTask{ + File: &jobs.SqlTaskFile{ + Path: "abc", + }, + }, + } + task4 := jobs.Task{ + Libraries: []compute.Library{ + {Whl: "dist/foo.whl"}, + }, + } + task5 := jobs.Task{ + Libraries: []compute.Library{ + {Jar: "dist/foo.jar"}, + }, + } + task6 := jobs.Task{ + Libraries: []compute.Library{ + {Requirements: "requirements.txt"}, + }, + } + + job0 := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + task0, + task1, + task2, + task3, + task4, + task5, + task6, + }, + }, + } + + root := config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job0": job0, + }, + }, + } + + actual := visitJobPaths(t, root) + expected := []dyn.Path{ + dyn.MustPathFromString("resources.jobs.job0.tasks[0].notebook_task.notebook_path"), + dyn.MustPathFromString("resources.jobs.job0.tasks[1].spark_python_task.python_file"), + dyn.MustPathFromString("resources.jobs.job0.tasks[2].dbt_task.project_directory"), + dyn.MustPathFromString("resources.jobs.job0.tasks[3].sql_task.file.path"), + dyn.MustPathFromString("resources.jobs.job0.tasks[4].libraries[0].whl"), + dyn.MustPathFromString("resources.jobs.job0.tasks[5].libraries[0].jar"), + dyn.MustPathFromString("resources.jobs.job0.tasks[6].libraries[0].requirements"), + } + + assert.ElementsMatch(t, expected, actual) +} + +func TestVisitJobPaths_environments(t *testing.T) { + environment0 := jobs.JobEnvironment{ + Spec: &compute.Environment{ + Dependencies: []string{ + "dist_0/*.whl", + "dist_1/*.whl", + }, + }, + } + job0 := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Environments: []jobs.JobEnvironment{ + environment0, + }, + }, + } + + root := config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job0": job0, + }, + }, + } + + actual := visitJobPaths(t, root) + expected := []dyn.Path{ + dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[0]"), + dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[1]"), + } + + assert.ElementsMatch(t, expected, actual) +} + +func TestVisitJobPaths_foreach(t *testing.T) { + task0 := jobs.Task{ + ForEachTask: &jobs.ForEachTask{ + Task: jobs.Task{ + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "abc", + }, + }, + }, + } + job0 := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + task0, + }, + }, + } + + root := config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job0": job0, + }, + }, + } + + actual := visitJobPaths(t, root) + expected := []dyn.Path{ + dyn.MustPathFromString("resources.jobs.job0.tasks[0].for_each_task.task.notebook_task.notebook_path"), + } + + assert.ElementsMatch(t, expected, actual) +} + +func visitJobPaths(t *testing.T, root config.Root) []dyn.Path { + var actual []dyn.Path + err := root.Mutate(func(value dyn.Value) (dyn.Value, error) { + return VisitJobPaths(value, func(p dyn.Path, kind PathKind, v dyn.Value) (dyn.Value, error) { + actual = append(actual, p) + return v, nil + }) + }) + require.NoError(t, err) + return actual +} diff --git a/bundle/config/mutator/paths/visitor.go b/bundle/config/mutator/paths/visitor.go new file mode 100644 index 000000000..40d1f14ef --- /dev/null +++ b/bundle/config/mutator/paths/visitor.go @@ -0,0 +1,26 @@ +package paths + +import "github.com/databricks/cli/libs/dyn" + +type PathKind int + +const ( + // PathKindLibrary is a path to a library file + PathKindLibrary = iota + + // PathKindNotebook is a path to a notebook file + PathKindNotebook + + // PathKindWorkspaceFile is a path to a regular workspace file, + // notebooks are not allowed because they are uploaded a special + // kind of workspace object. + PathKindWorkspaceFile + + // PathKindWithPrefix is a path that starts with './' + PathKindWithPrefix + + // PathKindDirectory is a path to directory + PathKindDirectory +) + +type VisitFunc func(path dyn.Path, kind PathKind, value dyn.Value) (dyn.Value, error) diff --git a/bundle/config/mutator/translate_paths_jobs.go b/bundle/config/mutator/translate_paths_jobs.go index e34eeb2f0..c29ff0ea9 100644 --- a/bundle/config/mutator/translate_paths_jobs.go +++ b/bundle/config/mutator/translate_paths_jobs.go @@ -4,97 +4,11 @@ import ( "fmt" "slices" - "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/bundle/config/mutator/paths" + "github.com/databricks/cli/libs/dyn" ) -type jobRewritePattern struct { - pattern dyn.Pattern - fn rewriteFunc - skipRewrite func(string) bool -} - -func noSkipRewrite(string) bool { - return false -} - -func rewritePatterns(t *translateContext, base dyn.Pattern) []jobRewritePattern { - return []jobRewritePattern{ - { - base.Append(dyn.Key("notebook_task"), dyn.Key("notebook_path")), - t.translateNotebookPath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("spark_python_task"), dyn.Key("python_file")), - t.translateFilePath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("dbt_task"), dyn.Key("project_directory")), - t.translateDirectoryPath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("sql_task"), dyn.Key("file"), dyn.Key("path")), - t.translateFilePath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("whl")), - t.translateNoOp, - noSkipRewrite, - }, - { - base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("jar")), - t.translateNoOp, - noSkipRewrite, - }, - { - base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")), - t.translateFilePath, - noSkipRewrite, - }, - } -} - -func (t *translateContext) jobRewritePatterns() []jobRewritePattern { - // Base pattern to match all tasks in all jobs. - base := dyn.NewPattern( - dyn.Key("resources"), - dyn.Key("jobs"), - dyn.AnyKey(), - dyn.Key("tasks"), - dyn.AnyIndex(), - ) - - // Compile list of patterns and their respective rewrite functions. - jobEnvironmentsPatterns := []jobRewritePattern{ - { - dyn.NewPattern( - dyn.Key("resources"), - dyn.Key("jobs"), - dyn.AnyKey(), - dyn.Key("environments"), - dyn.AnyIndex(), - dyn.Key("spec"), - dyn.Key("dependencies"), - dyn.AnyIndex(), - ), - t.translateNoOpWithPrefix, - func(s string) bool { - return !libraries.IsLibraryLocal(s) - }, - }, - } - - taskPatterns := rewritePatterns(t, base) - forEachPatterns := rewritePatterns(t, base.Append(dyn.Key("for_each_task"), dyn.Key("task"))) - allPatterns := append(taskPatterns, jobEnvironmentsPatterns...) - allPatterns = append(allPatterns, forEachPatterns...) - return allPatterns -} - func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) { var err error @@ -111,30 +25,41 @@ func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) } } - for _, rewritePattern := range t.jobRewritePatterns() { - v, err = dyn.MapByPattern(v, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { - key := p[2].Key() + return paths.VisitJobPaths(v, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) { + key := p[2].Key() - // Skip path translation if the job is using git source. - if slices.Contains(ignore, key) { - return v, nil - } + // Skip path translation if the job is using git source. + if slices.Contains(ignore, key) { + return v, nil + } - dir, err := v.Location().Directory() - if err != nil { - return dyn.InvalidValue, fmt.Errorf("unable to determine directory for job %s: %w", key, err) - } + dir, err := v.Location().Directory() + if err != nil { + return dyn.InvalidValue, fmt.Errorf("unable to determine directory for job %s: %w", key, err) + } - sv := v.MustString() - if rewritePattern.skipRewrite(sv) { - return v, nil - } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, fallback[key]) - }) + rewritePatternFn, err := t.getRewritePatternFn(kind) if err != nil { return dyn.InvalidValue, err } + + return t.rewriteRelativeTo(p, v, rewritePatternFn, dir, fallback[key]) + }) +} + +func (t *translateContext) getRewritePatternFn(kind paths.PathKind) (rewriteFunc, error) { + switch kind { + case paths.PathKindLibrary: + return t.translateNoOp, nil + case paths.PathKindNotebook: + return t.translateNotebookPath, nil + case paths.PathKindWorkspaceFile: + return t.translateFilePath, nil + case paths.PathKindDirectory: + return t.translateDirectoryPath, nil + case paths.PathKindWithPrefix: + return t.translateNoOpWithPrefix, nil } - return v, nil + return nil, fmt.Errorf("unsupported path kind: %d", kind) } From 3d9decdda9638fb5495212611307d70257b3d3e6 Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Wed, 25 Sep 2024 13:30:14 +0200 Subject: [PATCH 11/21] Add JobTaskClusterSpec validate mutator (#1784) ## Changes Add JobTaskClusterSpec validate mutator. It catches the case when tasks don't which cluster to use. For example, we can get this error with minor modifications to `default-python` template: ```yaml tasks: - task_key: python_file_task spark_python_task: python_file: ../src/my_project_10/main.py ``` ``` % databricks bundle validate Error: Missing required cluster or environment settings at resources.jobs.my_project_10_job.tasks[0] in resources/my_project_10_job.yml:17:11 Task "print_github_stars" requires a cluster or an environment to run. Specify one of the following fields: job_cluster_key, environment_key, existing_cluster_id, new_cluster. ``` We implicitly rely on "one of" validation, which does not exist. Many bundle fields can't co-exist, for instance, specifying: `JobTask.{existing_cluster_id,job_cluster_key}`, `Library.{whl,pypi}`, `JobTask.{notebook_task,python_wheel_task}`, etc. ## Tests Unit tests --------- Co-authored-by: Pieter Noordhuis --- .../config/validate/job_task_cluster_spec.go | 161 ++++++++++++++ .../validate/job_task_cluster_spec_test.go | 203 ++++++++++++++++++ bundle/config/validate/validate.go | 1 + 3 files changed, 365 insertions(+) create mode 100644 bundle/config/validate/job_task_cluster_spec.go create mode 100644 bundle/config/validate/job_task_cluster_spec_test.go diff --git a/bundle/config/validate/job_task_cluster_spec.go b/bundle/config/validate/job_task_cluster_spec.go new file mode 100644 index 000000000..b80befcdf --- /dev/null +++ b/bundle/config/validate/job_task_cluster_spec.go @@ -0,0 +1,161 @@ +package validate + +import ( + "context" + "fmt" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +// JobTaskClusterSpec validates that job tasks have cluster spec defined +// if task requires a cluster +func JobTaskClusterSpec() bundle.ReadOnlyMutator { + return &jobTaskClusterSpec{} +} + +type jobTaskClusterSpec struct { +} + +func (v *jobTaskClusterSpec) Name() string { + return "validate:job_task_cluster_spec" +} + +func (v *jobTaskClusterSpec) Apply(ctx context.Context, rb bundle.ReadOnlyBundle) diag.Diagnostics { + diags := diag.Diagnostics{} + + jobsPath := dyn.NewPath(dyn.Key("resources"), dyn.Key("jobs")) + + for resourceName, job := range rb.Config().Resources.Jobs { + resourcePath := jobsPath.Append(dyn.Key(resourceName)) + + for taskIndex, task := range job.Tasks { + taskPath := resourcePath.Append(dyn.Key("tasks"), dyn.Index(taskIndex)) + + diags = diags.Extend(validateJobTask(rb, task, taskPath)) + } + } + + return diags +} + +func validateJobTask(rb bundle.ReadOnlyBundle, task jobs.Task, taskPath dyn.Path) diag.Diagnostics { + diags := diag.Diagnostics{} + + var specified []string + var unspecified []string + + if task.JobClusterKey != "" { + specified = append(specified, "job_cluster_key") + } else { + unspecified = append(unspecified, "job_cluster_key") + } + + if task.EnvironmentKey != "" { + specified = append(specified, "environment_key") + } else { + unspecified = append(unspecified, "environment_key") + } + + if task.ExistingClusterId != "" { + specified = append(specified, "existing_cluster_id") + } else { + unspecified = append(unspecified, "existing_cluster_id") + } + + if task.NewCluster != nil { + specified = append(specified, "new_cluster") + } else { + unspecified = append(unspecified, "new_cluster") + } + + if task.ForEachTask != nil { + forEachTaskPath := taskPath.Append(dyn.Key("for_each_task"), dyn.Key("task")) + + diags = diags.Extend(validateJobTask(rb, task.ForEachTask.Task, forEachTaskPath)) + } + + if isComputeTask(task) && len(specified) == 0 { + if task.NotebookTask != nil { + // notebook tasks without cluster spec will use notebook environment + } else { + // path might be not very helpful, adding user-specified task key clarifies the context + detail := fmt.Sprintf( + "Task %q requires a cluster or an environment to run.\nSpecify one of the following fields: %s.", + task.TaskKey, + strings.Join(unspecified, ", "), + ) + + diags = diags.Append(diag.Diagnostic{ + Severity: diag.Error, + Summary: "Missing required cluster or environment settings", + Detail: detail, + Locations: rb.Config().GetLocations(taskPath.String()), + Paths: []dyn.Path{taskPath}, + }) + } + } + + return diags +} + +// isComputeTask returns true if the task runs on a cluster or serverless GC +func isComputeTask(task jobs.Task) bool { + if task.NotebookTask != nil { + // if warehouse_id is set, it's SQL notebook that doesn't need cluster or serverless GC + if task.NotebookTask.WarehouseId != "" { + return false + } else { + // task settings don't require specifying a cluster/serverless GC, but task itself can run on one + // we handle that case separately in validateJobTask + return true + } + } + + if task.PythonWheelTask != nil { + return true + } + + if task.DbtTask != nil { + return true + } + + if task.SparkJarTask != nil { + return true + } + + if task.SparkSubmitTask != nil { + return true + } + + if task.SparkPythonTask != nil { + return true + } + + if task.SqlTask != nil { + return false + } + + if task.PipelineTask != nil { + // while pipelines use clusters, pipeline tasks don't, they only trigger pipelines + return false + } + + if task.RunJobTask != nil { + return false + } + + if task.ConditionTask != nil { + return false + } + + // for each task doesn't use clusters, underlying task(s) can though + if task.ForEachTask != nil { + return false + } + + return false +} diff --git a/bundle/config/validate/job_task_cluster_spec_test.go b/bundle/config/validate/job_task_cluster_spec_test.go new file mode 100644 index 000000000..a3a7ccf25 --- /dev/null +++ b/bundle/config/validate/job_task_cluster_spec_test.go @@ -0,0 +1,203 @@ +package validate + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/assert" +) + +func TestJobTaskClusterSpec(t *testing.T) { + expectedSummary := "Missing required cluster or environment settings" + + type testCase struct { + name string + task jobs.Task + errorPath string + errorDetail string + errorSummary string + } + + testCases := []testCase{ + { + name: "valid notebook task", + task: jobs.Task{ + // while a cluster is needed, it will use notebook environment to create one + NotebookTask: &jobs.NotebookTask{}, + }, + }, + { + name: "valid notebook task (job_cluster_key)", + task: jobs.Task{ + JobClusterKey: "cluster1", + NotebookTask: &jobs.NotebookTask{}, + }, + }, + { + name: "valid notebook task (new_cluster)", + task: jobs.Task{ + NewCluster: &compute.ClusterSpec{}, + NotebookTask: &jobs.NotebookTask{}, + }, + }, + { + name: "valid notebook task (existing_cluster_id)", + task: jobs.Task{ + ExistingClusterId: "cluster1", + NotebookTask: &jobs.NotebookTask{}, + }, + }, + { + name: "valid SQL notebook task", + task: jobs.Task{ + NotebookTask: &jobs.NotebookTask{ + WarehouseId: "warehouse1", + }, + }, + }, + { + name: "valid python wheel task", + task: jobs.Task{ + JobClusterKey: "cluster1", + PythonWheelTask: &jobs.PythonWheelTask{}, + }, + }, + { + name: "valid python wheel task (environment_key)", + task: jobs.Task{ + EnvironmentKey: "environment1", + PythonWheelTask: &jobs.PythonWheelTask{}, + }, + }, + { + name: "valid dbt task", + task: jobs.Task{ + JobClusterKey: "cluster1", + DbtTask: &jobs.DbtTask{}, + }, + }, + { + name: "valid spark jar task", + task: jobs.Task{ + JobClusterKey: "cluster1", + SparkJarTask: &jobs.SparkJarTask{}, + }, + }, + { + name: "valid spark submit", + task: jobs.Task{ + NewCluster: &compute.ClusterSpec{}, + SparkSubmitTask: &jobs.SparkSubmitTask{}, + }, + }, + { + name: "valid spark python task", + task: jobs.Task{ + JobClusterKey: "cluster1", + SparkPythonTask: &jobs.SparkPythonTask{}, + }, + }, + { + name: "valid SQL task", + task: jobs.Task{ + SqlTask: &jobs.SqlTask{}, + }, + }, + { + name: "valid pipeline task", + task: jobs.Task{ + PipelineTask: &jobs.PipelineTask{}, + }, + }, + { + name: "valid run job task", + task: jobs.Task{ + RunJobTask: &jobs.RunJobTask{}, + }, + }, + { + name: "valid condition task", + task: jobs.Task{ + ConditionTask: &jobs.ConditionTask{}, + }, + }, + { + name: "valid for each task", + task: jobs.Task{ + ForEachTask: &jobs.ForEachTask{ + Task: jobs.Task{ + JobClusterKey: "cluster1", + NotebookTask: &jobs.NotebookTask{}, + }, + }, + }, + }, + { + name: "invalid python wheel task", + task: jobs.Task{ + PythonWheelTask: &jobs.PythonWheelTask{}, + TaskKey: "my_task", + }, + errorPath: "resources.jobs.job1.tasks[0]", + errorDetail: `Task "my_task" requires a cluster or an environment to run. +Specify one of the following fields: job_cluster_key, environment_key, existing_cluster_id, new_cluster.`, + errorSummary: expectedSummary, + }, + { + name: "invalid for each task", + task: jobs.Task{ + ForEachTask: &jobs.ForEachTask{ + Task: jobs.Task{ + PythonWheelTask: &jobs.PythonWheelTask{}, + TaskKey: "my_task", + }, + }, + }, + errorPath: "resources.jobs.job1.tasks[0].for_each_task.task", + errorDetail: `Task "my_task" requires a cluster or an environment to run. +Specify one of the following fields: job_cluster_key, environment_key, existing_cluster_id, new_cluster.`, + errorSummary: expectedSummary, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + job := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{tc.task}, + }, + } + + b := createBundle(map[string]*resources.Job{"job1": job}) + diags := bundle.ApplyReadOnly(context.Background(), bundle.ReadOnly(b), JobTaskClusterSpec()) + + if tc.errorPath != "" || tc.errorDetail != "" || tc.errorSummary != "" { + assert.Len(t, diags, 1) + assert.Len(t, diags[0].Paths, 1) + + diag := diags[0] + + assert.Equal(t, tc.errorPath, diag.Paths[0].String()) + assert.Equal(t, tc.errorSummary, diag.Summary) + assert.Equal(t, tc.errorDetail, diag.Detail) + } else { + assert.ElementsMatch(t, []string{}, diags) + } + }) + } +} + +func createBundle(jobs map[string]*resources.Job) *bundle.Bundle { + return &bundle.Bundle{ + Config: config.Root{ + Resources: config.Resources{ + Jobs: jobs, + }, + }, + } +} diff --git a/bundle/config/validate/validate.go b/bundle/config/validate/validate.go index b4da0bc05..79f42bd23 100644 --- a/bundle/config/validate/validate.go +++ b/bundle/config/validate/validate.go @@ -34,6 +34,7 @@ func (v *validate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics JobClusterKeyDefined(), FilesToSync(), ValidateSyncPatterns(), + JobTaskClusterSpec(), )) } From b3a3071086899dabbdf36f063d1cf892993090ff Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 25 Sep 2024 14:35:16 +0200 Subject: [PATCH 12/21] Fixed full variable override detection (#1787) ## Changes Fixes #1786 ## Tests All valid override combinations are added as test cases --- bundle/config/root.go | 37 +++++++++++++---- bundle/config/root_test.go | 85 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 8 deletions(-) diff --git a/bundle/config/root.go b/bundle/config/root.go index 92d834f0a..ff169e4ce 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -418,22 +418,43 @@ func isFullVariableOverrideDef(v dyn.Value) bool { return false } - // If the map has more than 2 keys, it is not a full variable override. - if mv.Len() > 2 { + // If the map has more than 3 keys, it is not a full variable override. + if mv.Len() > 3 { return false } - // If the map has 2 keys, one of them should be "default" and the other is "type" + // If the map has 3 keys, they should be "description", "type" and "default" or "lookup" + if mv.Len() == 3 { + if _, ok := mv.GetByString("type"); ok { + if _, ok := mv.GetByString("description"); ok { + if _, ok := mv.GetByString("default"); ok { + return true + } + } + } + + return false + } + + // If the map has 2 keys, one of them should be "default" or "lookup" and the other is "type" or "description" if mv.Len() == 2 { - if _, ok := mv.GetByString("type"); !ok { - return false + if _, ok := mv.GetByString("type"); ok { + if _, ok := mv.GetByString("default"); ok { + return true + } } - if _, ok := mv.GetByString("default"); !ok { - return false + if _, ok := mv.GetByString("description"); ok { + if _, ok := mv.GetByString("default"); ok { + return true + } + + if _, ok := mv.GetByString("lookup"); ok { + return true + } } - return true + return false } for _, keyword := range variableKeywords { diff --git a/bundle/config/root_test.go b/bundle/config/root_test.go index d2c7a9b1f..9e6123534 100644 --- a/bundle/config/root_test.go +++ b/bundle/config/root_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/databricks/cli/bundle/config/variable" + "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -169,3 +170,87 @@ func TestRootMergeTargetOverridesWithVariables(t *testing.T) { assert.Equal(t, "complex var", root.Variables["complex"].Description) } + +func TestIsFullVariableOverrideDef(t *testing.T) { + testCases := []struct { + value dyn.Value + expected bool + }{ + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + "default": dyn.V("foo"), + "description": dyn.V("foo var"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + "lookup": dyn.V("foo"), + "description": dyn.V("foo var"), + }), + expected: false, + }, + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + "default": dyn.V("foo"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + "lookup": dyn.V("foo"), + }), + expected: false, + }, + { + value: dyn.V(map[string]dyn.Value{ + "description": dyn.V("string"), + "default": dyn.V("foo"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "description": dyn.V("string"), + "lookup": dyn.V("foo"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "default": dyn.V("foo"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "lookup": dyn.V("foo"), + }), + expected: true, + }, + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + }), + expected: false, + }, + { + value: dyn.V(map[string]dyn.Value{ + "type": dyn.V("string"), + "default": dyn.V("foo"), + "description": dyn.V("foo var"), + "lookup": dyn.V("foo"), + }), + expected: false, + }, + } + + for i, tc := range testCases { + assert.Equal(t, tc.expected, isFullVariableOverrideDef(tc.value), "test case %d", i) + } + +} From a4ba0bbe9f332dbc497d6cc3be0e19436e2e9375 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:28:14 +0530 Subject: [PATCH 13/21] Add sub-extension to resource files in built-in templates (#1777) ## Changes We want to encourage a pattern of only specifying a single resource in a YAML file when an `..yml` (like `.job.yml`) is used. This convention could allow us to bijectively map a resource YAML file to it's corresponding resource in the Databricks workspace. This PR simply makes the built-in templates compliant to this format. ## Tests Existing tests. --- .../dbt-sql/template/{{.project_name}}/README.md.tmpl | 2 +- ...ect_name}}_job.yml.tmpl => {{.project_name}}.job.yml.tmpl} | 0 .../templates/default-python/template/__preamble.tmpl | 4 ++-- .../default-python/template/{{.project_name}}/README.md.tmpl | 2 +- ...ect_name}}_job.yml.tmpl => {{.project_name}}.job.yml.tmpl} | 2 +- ..._pipeline.yml.tmpl => {{.project_name}}.pipeline.yml.tmpl} | 0 .../template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl | 2 +- .../template/{{.project_name}}/src/notebook.ipynb.tmpl | 2 +- ...}}_sql_job.yml.tmpl => {{.project_name}}_sql.job.yml.tmpl} | 0 .../template/{{.project_name}}/src/orders_daily.sql.tmpl | 2 +- .../template/{{.project_name}}/src/orders_raw.sql.tmpl | 2 +- 11 files changed, 9 insertions(+), 9 deletions(-) rename libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{{.project_name}}_job.yml.tmpl => {{.project_name}}.job.yml.tmpl} (100%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name}}_job.yml.tmpl => {{.project_name}}.job.yml.tmpl} (97%) rename libs/template/templates/default-python/template/{{.project_name}}/resources/{{{.project_name}}_pipeline.yml.tmpl => {{.project_name}}.pipeline.yml.tmpl} (100%) rename libs/template/templates/default-sql/template/{{.project_name}}/resources/{{{.project_name}}_sql_job.yml.tmpl => {{.project_name}}_sql.job.yml.tmpl} (100%) diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index dbf8a8d85..cd4c29a76 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -121,7 +121,7 @@ You can find that job by opening your workpace and clicking on **Workflows**. You can also deploy to your production target directly from the command-line. The warehouse, catalog, and schema for that target are configured in databricks.yml. -When deploying to this target, note that the default job at resources/{{.project_name}}_job.yml +When deploying to this target, note that the default job at resources/{{.project_name}}.job.yml has a schedule set that runs every day. The schedule is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl similarity index 100% rename from libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl rename to libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl diff --git a/libs/template/templates/default-python/template/__preamble.tmpl b/libs/template/templates/default-python/template/__preamble.tmpl index a919a269c..69b769cde 100644 --- a/libs/template/templates/default-python/template/__preamble.tmpl +++ b/libs/template/templates/default-python/template/__preamble.tmpl @@ -18,7 +18,7 @@ This file only template directives; it is skipped for the actual output. {{if $notDLT}} {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} - {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.yml"}} + {{skip "{{.project_name}}/resources/{{.project_name}}.pipeline.yml"}} {{end}} {{if $notNotebook}} @@ -26,7 +26,7 @@ This file only template directives; it is skipped for the actual output. {{end}} {{if (and $notDLT $notNotebook $notPython)}} - {{skip "{{.project_name}}/resources/{{.project_name}}_job.yml"}} + {{skip "{{.project_name}}/resources/{{.project_name}}.job.yml"}} {{else}} {{skip "{{.project_name}}/resources/.gitkeep"}} {{end}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 5adade0b3..53847a9c9 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -29,7 +29,7 @@ The '{{.project_name}}' project was generated by using the default-python templa ``` Note that the default job from the template has a schedule that runs every day - (defined in resources/{{.project_name}}_job.yml). The schedule + (defined in resources/{{.project_name}}.job.yml). The schedule is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl similarity index 97% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl index d2100e908..5211e3894 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl @@ -40,7 +40,7 @@ resources: - task_key: notebook_task {{- end}} pipeline_task: - {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}_pipeline.yml */}} + {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}} pipeline_id: ${resources.pipelines.{{.project_name}}_pipeline.id} {{end -}} {{- if (eq .include_python "yes") }} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl similarity index 100% rename from libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl rename to libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl index b152e9a30..253ed321c 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -14,7 +14,7 @@ "source": [ "# DLT pipeline\n", "\n", - "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}_pipeline.yml." + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml." ] }, { diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl index a228f8d18..6782a053b 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -14,7 +14,7 @@ "source": [ "# Default notebook\n", "\n", - "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}_job.yml." + "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml." ] }, { diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql_job.yml.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql.job.yml.tmpl similarity index 100% rename from libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql_job.yml.tmpl rename to libs/template/templates/default-sql/template/{{.project_name}}/resources/{{.project_name}}_sql.job.yml.tmpl diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl index e5ceb77a9..444ae4e03 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl @@ -1,4 +1,4 @@ --- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml) +-- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql.job.yml) USE CATALOG {{"{{"}}catalog{{"}}"}}; USE IDENTIFIER({{"{{"}}schema{{"}}"}}); diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl index c73606ef1..80f6773cb 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl @@ -1,4 +1,4 @@ --- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml) +-- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql.job.yml) -- -- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ -- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html From 7f1121d8d85900db0fc333ee901dfe6eb8488b3b Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 25 Sep 2024 17:45:28 +0200 Subject: [PATCH 14/21] Pin Go toolchain to 1.22.7 (#1790) ## Changes Relates to https://github.com/databricks/cli/pull/1758. More information about toolchains: * https://go.dev/blog/toolchain * https://go.dev/doc/toolchain We need to specify the toolchain as we need to bump Go to 1.22.0 for the `mod` upgrade and want to use the latest toolchain on the 1.22 series. ## Tests The previous release was made with Go 1.22.7 so we should continue to use it. --- .github/workflows/push.yml | 6 +++--- .github/workflows/release-snapshot.yml | 2 +- .github/workflows/release.yml | 2 +- go.mod | 4 +++- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 02bf73784..ee60da9da 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -33,7 +33,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: 1.22.7 - name: Setup Python uses: actions/setup-python@v5 @@ -68,7 +68,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: 1.22.7 # No need to download cached dependencies when running gofmt. cache: false @@ -100,7 +100,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: 1.22.7 # Github repo: https://github.com/ajv-validator/ajv-cli - name: Install ajv-cli diff --git a/.github/workflows/release-snapshot.yml b/.github/workflows/release-snapshot.yml index defd1c535..6a601a5f9 100644 --- a/.github/workflows/release-snapshot.yml +++ b/.github/workflows/release-snapshot.yml @@ -21,7 +21,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: 1.22.7 # The default cache key for this action considers only the `go.sum` file. # We include .goreleaser.yaml here to differentiate from the cache used by the push action diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 531fb39bf..f9742a19d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,7 +22,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: 1.22.x + go-version: 1.22.7 # The default cache key for this action considers only the `go.sum` file. # We include .goreleaser.yaml here to differentiate from the cache used by the push action diff --git a/go.mod b/go.mod index ba41ef3ac..e1c7519fd 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/databricks/cli -go 1.22 +go 1.22.0 + +toolchain go1.22.7 require ( github.com/Masterminds/semver/v3 v3.3.0 // MIT From 495040e4cd2d8fbbbcc09aff6cf3b88cb4daee78 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 25 Sep 2024 21:43:48 +0530 Subject: [PATCH 15/21] Modify SetLocation test utility to take full locations as argument (#1788) I plan to use this in https://github.com/databricks/cli/pull/1780, to set the line and column numbers as well for the locations. gopatch file used: ``` @@ var x expression var y expression var z expression @@ -bundletest.SetLocation(x, y, z) +bundletest.SetLocation(x, y, []dyn.Location{{File: z}}) ``` --- bundle/artifacts/expand_globs_test.go | 7 ++-- .../expand_pipeline_glob_paths_test.go | 5 +-- .../config/mutator/rewrite_sync_paths_test.go | 25 ++++++++------- bundle/config/mutator/sync_infer_root_test.go | 3 +- bundle/config/mutator/translate_paths_test.go | 32 +++++++++---------- bundle/deploy/metadata/compute_test.go | 7 ++-- bundle/internal/bundletest/location.go | 6 ++-- .../libraries/expand_glob_references_test.go | 7 ++-- 8 files changed, 48 insertions(+), 44 deletions(-) diff --git a/bundle/artifacts/expand_globs_test.go b/bundle/artifacts/expand_globs_test.go index c9c478448..1665a4806 100644 --- a/bundle/artifacts/expand_globs_test.go +++ b/bundle/artifacts/expand_globs_test.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/internal/bundletest" "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -36,7 +37,7 @@ func TestExpandGlobs_Nominal(t *testing.T) { }, } - bundletest.SetLocation(b, "artifacts", filepath.Join(tmpDir, "databricks.yml")) + bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) ctx := context.Background() diags := bundle.Apply(ctx, b, bundle.Seq( @@ -77,7 +78,7 @@ func TestExpandGlobs_InvalidPattern(t *testing.T) { }, } - bundletest.SetLocation(b, "artifacts", filepath.Join(tmpDir, "databricks.yml")) + bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) ctx := context.Background() diags := bundle.Apply(ctx, b, bundle.Seq( @@ -125,7 +126,7 @@ func TestExpandGlobs_NoMatches(t *testing.T) { }, } - bundletest.SetLocation(b, "artifacts", filepath.Join(tmpDir, "databricks.yml")) + bundletest.SetLocation(b, "artifacts", []dyn.Location{{File: filepath.Join(tmpDir, "databricks.yml")}}) ctx := context.Background() diags := bundle.Apply(ctx, b, bundle.Seq( diff --git a/bundle/config/mutator/expand_pipeline_glob_paths_test.go b/bundle/config/mutator/expand_pipeline_glob_paths_test.go index d1671c256..07dd20215 100644 --- a/bundle/config/mutator/expand_pipeline_glob_paths_test.go +++ b/bundle/config/mutator/expand_pipeline_glob_paths_test.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/pipelines" "github.com/stretchr/testify/require" @@ -105,8 +106,8 @@ func TestExpandGlobPathsInPipelines(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) - bundletest.SetLocation(b, "resources.pipelines.pipeline.libraries[3]", filepath.Join(dir, "relative", "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) + bundletest.SetLocation(b, "resources.pipelines.pipeline.libraries[3]", []dyn.Location{{File: filepath.Join(dir, "relative", "resource.yml")}}) m := ExpandPipelineGlobPaths() diags := bundle.Apply(context.Background(), b, m) diff --git a/bundle/config/mutator/rewrite_sync_paths_test.go b/bundle/config/mutator/rewrite_sync_paths_test.go index fa7f124b7..a66f2763a 100644 --- a/bundle/config/mutator/rewrite_sync_paths_test.go +++ b/bundle/config/mutator/rewrite_sync_paths_test.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" ) @@ -33,12 +34,12 @@ func TestRewriteSyncPathsRelative(t *testing.T) { }, } - bundletest.SetLocation(b, "sync.paths[0]", "./databricks.yml") - bundletest.SetLocation(b, "sync.paths[1]", "./databricks.yml") - bundletest.SetLocation(b, "sync.include[0]", "./file.yml") - bundletest.SetLocation(b, "sync.include[1]", "./a/file.yml") - bundletest.SetLocation(b, "sync.exclude[0]", "./a/b/file.yml") - bundletest.SetLocation(b, "sync.exclude[1]", "./a/b/c/file.yml") + bundletest.SetLocation(b, "sync.paths[0]", []dyn.Location{{File: "./databricks.yml"}}) + bundletest.SetLocation(b, "sync.paths[1]", []dyn.Location{{File: "./databricks.yml"}}) + bundletest.SetLocation(b, "sync.include[0]", []dyn.Location{{File: "./file.yml"}}) + bundletest.SetLocation(b, "sync.include[1]", []dyn.Location{{File: "./a/file.yml"}}) + bundletest.SetLocation(b, "sync.exclude[0]", []dyn.Location{{File: "./a/b/file.yml"}}) + bundletest.SetLocation(b, "sync.exclude[1]", []dyn.Location{{File: "./a/b/c/file.yml"}}) diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths()) assert.NoError(t, diags.Error()) @@ -72,12 +73,12 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { }, } - bundletest.SetLocation(b, "sync.paths[0]", "/tmp/dir/databricks.yml") - bundletest.SetLocation(b, "sync.paths[1]", "/tmp/dir/databricks.yml") - bundletest.SetLocation(b, "sync.include[0]", "/tmp/dir/file.yml") - bundletest.SetLocation(b, "sync.include[1]", "/tmp/dir/a/file.yml") - bundletest.SetLocation(b, "sync.exclude[0]", "/tmp/dir/a/b/file.yml") - bundletest.SetLocation(b, "sync.exclude[1]", "/tmp/dir/a/b/c/file.yml") + bundletest.SetLocation(b, "sync.paths[0]", []dyn.Location{{File: "/tmp/dir/databricks.yml"}}) + bundletest.SetLocation(b, "sync.paths[1]", []dyn.Location{{File: "/tmp/dir/databricks.yml"}}) + bundletest.SetLocation(b, "sync.include[0]", []dyn.Location{{File: "/tmp/dir/file.yml"}}) + bundletest.SetLocation(b, "sync.include[1]", []dyn.Location{{File: "/tmp/dir/a/file.yml"}}) + bundletest.SetLocation(b, "sync.exclude[0]", []dyn.Location{{File: "/tmp/dir/a/b/file.yml"}}) + bundletest.SetLocation(b, "sync.exclude[1]", []dyn.Location{{File: "/tmp/dir/a/b/c/file.yml"}}) diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths()) assert.NoError(t, diags.Error()) diff --git a/bundle/config/mutator/sync_infer_root_test.go b/bundle/config/mutator/sync_infer_root_test.go index 383e56769..85e40adc6 100644 --- a/bundle/config/mutator/sync_infer_root_test.go +++ b/bundle/config/mutator/sync_infer_root_test.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -184,7 +185,7 @@ func TestSyncInferRoot_Error(t *testing.T) { }, } - bundletest.SetLocation(b, "sync.paths", "databricks.yml") + bundletest.SetLocation(b, "sync.paths", []dyn.Location{{File: "databricks.yml"}}) ctx := context.Background() diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 50fcd3b07..c03cee73e 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -82,7 +82,7 @@ func TestTranslatePathsSkippedWithGitSource(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) @@ -210,7 +210,7 @@ func TestTranslatePaths(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) @@ -346,8 +346,8 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { }, } - bundletest.SetLocation(b, "resources.jobs", filepath.Join(dir, "job/resource.yml")) - bundletest.SetLocation(b, "resources.pipelines", filepath.Join(dir, "pipeline/resource.yml")) + bundletest.SetLocation(b, "resources.jobs", []dyn.Location{{File: filepath.Join(dir, "job/resource.yml")}}) + bundletest.SetLocation(b, "resources.pipelines", []dyn.Location{{File: filepath.Join(dir, "pipeline/resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) @@ -408,7 +408,7 @@ func TestTranslatePathsOutsideSyncRoot(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "../resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "../resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.ErrorContains(t, diags.Error(), "is not contained in sync root path") @@ -439,7 +439,7 @@ func TestJobNotebookDoesNotExistError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "fake.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.EqualError(t, diags.Error(), "notebook ./doesnt_exist.py not found") @@ -470,7 +470,7 @@ func TestJobFileDoesNotExistError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "fake.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.EqualError(t, diags.Error(), "file ./doesnt_exist.py not found") @@ -501,7 +501,7 @@ func TestPipelineNotebookDoesNotExistError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "fake.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.EqualError(t, diags.Error(), "notebook ./doesnt_exist.py not found") @@ -532,7 +532,7 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "fake.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "fake.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.EqualError(t, diags.Error(), "file ./doesnt_exist.py not found") @@ -567,7 +567,7 @@ func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.ErrorContains(t, diags.Error(), `expected a file for "resources.jobs.job.tasks[0].spark_python_task.python_file" but got a notebook`) @@ -602,7 +602,7 @@ func TestJobNotebookTaskWithFileSourceError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.ErrorContains(t, diags.Error(), `expected a notebook for "resources.jobs.job.tasks[0].notebook_task.notebook_path" but got a file`) @@ -637,7 +637,7 @@ func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.ErrorContains(t, diags.Error(), `expected a notebook for "resources.pipelines.pipeline.libraries[0].notebook.path" but got a file`) @@ -672,7 +672,7 @@ func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) assert.ErrorContains(t, diags.Error(), `expected a file for "resources.pipelines.pipeline.libraries[0].file.path" but got a notebook`) @@ -710,7 +710,7 @@ func TestTranslatePathJobEnvironments(t *testing.T) { }, } - bundletest.SetLocation(b, "resources.jobs", filepath.Join(dir, "job/resource.yml")) + bundletest.SetLocation(b, "resources.jobs", []dyn.Location{{File: filepath.Join(dir, "job/resource.yml")}}) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) require.NoError(t, diags.Error()) @@ -753,8 +753,8 @@ func TestTranslatePathWithComplexVariables(t *testing.T) { }, } - bundletest.SetLocation(b, "variables", filepath.Join(dir, "variables/variables.yml")) - bundletest.SetLocation(b, "resources.jobs", filepath.Join(dir, "job/resource.yml")) + bundletest.SetLocation(b, "variables", []dyn.Location{{File: filepath.Join(dir, "variables/variables.yml")}}) + bundletest.SetLocation(b, "resources.jobs", []dyn.Location{{File: filepath.Join(dir, "job/resource.yml")}}) ctx := context.Background() // Assign the variables to the dynamic configuration. diff --git a/bundle/deploy/metadata/compute_test.go b/bundle/deploy/metadata/compute_test.go index 6d43f845b..2c2c72376 100644 --- a/bundle/deploy/metadata/compute_test.go +++ b/bundle/deploy/metadata/compute_test.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/internal/bundletest" "github.com/databricks/cli/bundle/metadata" + "github.com/databricks/cli/libs/dyn" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -55,9 +56,9 @@ func TestComputeMetadataMutator(t *testing.T) { }, } - bundletest.SetLocation(b, "resources.jobs.my-job-1", "a/b/c") - bundletest.SetLocation(b, "resources.jobs.my-job-2", "d/e/f") - bundletest.SetLocation(b, "resources.pipelines.my-pipeline", "abc") + bundletest.SetLocation(b, "resources.jobs.my-job-1", []dyn.Location{{File: "a/b/c"}}) + bundletest.SetLocation(b, "resources.jobs.my-job-2", []dyn.Location{{File: "d/e/f"}}) + bundletest.SetLocation(b, "resources.pipelines.my-pipeline", []dyn.Location{{File: "abc"}}) expectedMetadata := metadata.Metadata{ Version: metadata.Version, diff --git a/bundle/internal/bundletest/location.go b/bundle/internal/bundletest/location.go index 380d6e17d..2ffd621bf 100644 --- a/bundle/internal/bundletest/location.go +++ b/bundle/internal/bundletest/location.go @@ -8,15 +8,13 @@ import ( // SetLocation sets the location of all values in the bundle to the given path. // This is useful for testing where we need to associate configuration // with the path it is loaded from. -func SetLocation(b *bundle.Bundle, prefix string, filePath string) { +func SetLocation(b *bundle.Bundle, prefix string, locations []dyn.Location) { start := dyn.MustPathFromString(prefix) b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { return dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { // If the path has the given prefix, set the location. if p.HasPrefix(start) { - return v.WithLocations([]dyn.Location{{ - File: filePath, - }}), nil + return v.WithLocations(locations), nil } // The path is not nested under the given prefix. diff --git a/bundle/libraries/expand_glob_references_test.go b/bundle/libraries/expand_glob_references_test.go index e7f2e1693..2dfbddb74 100644 --- a/bundle/libraries/expand_glob_references_test.go +++ b/bundle/libraries/expand_glob_references_test.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/internal/bundletest" "github.com/databricks/cli/internal/testutil" + "github.com/databricks/cli/libs/dyn" "github.com/databricks/databricks-sdk-go/service/compute" "github.com/databricks/databricks-sdk-go/service/jobs" "github.com/stretchr/testify/require" @@ -61,7 +62,7 @@ func TestGlobReferencesExpandedForTaskLibraries(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, ExpandGlobReferences()) require.Empty(t, diags) @@ -146,7 +147,7 @@ func TestGlobReferencesExpandedForForeachTaskLibraries(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, ExpandGlobReferences()) require.Empty(t, diags) @@ -221,7 +222,7 @@ func TestGlobReferencesExpandedForEnvironmentsDeps(t *testing.T) { }, } - bundletest.SetLocation(b, ".", filepath.Join(dir, "resource.yml")) + bundletest.SetLocation(b, ".", []dyn.Location{{File: filepath.Join(dir, "resource.yml")}}) diags := bundle.Apply(context.Background(), b, ExpandGlobReferences()) require.Empty(t, diags) From 875b112f801c8b04694e077cc07ed88a335db31b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Sep 2024 06:07:01 +0000 Subject: [PATCH 16/21] Bump golang.org/x/mod from 0.20.0 to 0.21.0 (#1758) Bumps [golang.org/x/mod](https://github.com/golang/mod) from 0.20.0 to 0.21.0.
Commits
  • 46a3137 zip: set GIT_DIR in test when using bare repositories
  • 3afcd4e go.mod: set go version to 1.22.0
  • b1d336c go.mod: update required go version to go1.22
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/mod&package-manager=go_modules&previous-version=0.20.0&new-version=0.21.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
--------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Andrew Nester Co-authored-by: Pieter Noordhuis --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index e1c7519fd..b6478a915 100644 --- a/go.mod +++ b/go.mod @@ -24,7 +24,7 @@ require ( github.com/spf13/pflag v1.0.5 // BSD-3-Clause github.com/stretchr/testify v1.9.0 // MIT golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 - golang.org/x/mod v0.20.0 + golang.org/x/mod v0.21.0 golang.org/x/oauth2 v0.23.0 golang.org/x/sync v0.8.0 golang.org/x/term v0.24.0 diff --git a/go.sum b/go.sum index 3d4a2cdce..80fa43fdd 100644 --- a/go.sum +++ b/go.sum @@ -180,8 +180,8 @@ golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= -golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= From 94d8c3ba1e18abb82ca4dde85210cc1d2134f303 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Sep 2024 06:29:34 +0000 Subject: [PATCH 17/21] Bump github.com/hashicorp/hc-install from 0.7.0 to 0.9.0 (#1772) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/hashicorp/hc-install](https://github.com/hashicorp/hc-install) from 0.7.0 to 0.9.0.
Release notes

Sourced from github.com/hashicorp/hc-install's releases.

v0.9.0

What's Changed

Full Changelog: https://github.com/hashicorp/hc-install/compare/v0.8.1...v0.9.0

v0.8.1

What's Changed

New Contributors

Full Changelog: https://github.com/hashicorp/hc-install/compare/v0.8.0...v0.8.1

v0.8.0

ENHANCEMENTS:

BUG FIXES:

INTERNAL:

... (truncated)

Commits
  • 157a802 Merge pull request #250 from hashicorp/release-0.9.0
  • 4c734fc Prepare for v0.9.0 release
  • d78b328 Merge pull request #249 from hashicorp/d-contributing-md-update
  • 34f38b0 docs: Update release instructions
  • 6a5aa83 build(deps): bump golang.org/x/mod from 0.20.0 to 0.21.0 (#242)
  • 1784fcc Merge pull request #248 from hashicorp/revert-version-contents
  • ea2c69b Finish Release of 0.8.1 by updating VERSION
  • 4f3e00e Releasing 0.8.1
  • c6d1ced Merge pull request #246 from hashicorp/update-contributing
  • eea12f1 Update CONTRIBUTING.md to add clean up step
  • Additional commits viewable in compare view

Most Recent Ignore Conditions Applied to This Pull Request | Dependency Name | Ignore Conditions | | --- | --- | | github.com/hashicorp/hc-install | [>= 0.8.a, < 0.9] |
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/hashicorp/hc-install&package-manager=go_modules&previous-version=0.7.0&new-version=0.9.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 3 ++- go.sum | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index b6478a915..0cf3ef8a7 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/ghodss/yaml v1.0.0 // MIT + NOTICE github.com/google/uuid v1.6.0 // BSD-3-Clause github.com/hashicorp/go-version v1.7.0 // MPL 2.0 - github.com/hashicorp/hc-install v0.7.0 // MPL 2.0 + github.com/hashicorp/hc-install v0.9.0 // MPL 2.0 github.com/hashicorp/terraform-exec v0.21.0 // MPL 2.0 github.com/hashicorp/terraform-json v0.22.1 // MPL 2.0 github.com/manifoldco/promptui v0.9.0 // BSD-3-Clause @@ -51,6 +51,7 @@ require ( github.com/google/s2a-go v0.1.7 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect + github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect diff --git a/go.sum b/go.sum index 80fa43fdd..d88667751 100644 --- a/go.sum +++ b/go.sum @@ -99,10 +99,14 @@ github.com/googleapis/gax-go/v2 v2.12.4 h1:9gWcmF85Wvq4ryPFvGFaOgPIs1AQX0d0bcbGw github.com/googleapis/gax-go/v2 v2.12.4/go.mod h1:KYEYLorsnIGDi/rPC8b5TdlB9kbKoFubselGIoBMCwI= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= +github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= +github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= +github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= +github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= -github.com/hashicorp/hc-install v0.7.0 h1:Uu9edVqjKQxxuD28mR5TikkKDd/p55S8vzPC1659aBk= -github.com/hashicorp/hc-install v0.7.0/go.mod h1:ELmmzZlGnEcqoUMKUuykHaPCIR1sYLYX+KSggWSKZuA= +github.com/hashicorp/hc-install v0.9.0 h1:2dIk8LcvANwtv3QZLckxcjyF5w8KVtiMxu6G6eLhghE= +github.com/hashicorp/hc-install v0.9.0/go.mod h1:+6vOP+mf3tuGgMApVYtmsnDoKWMDcFXeTxCACYZ8SFg= github.com/hashicorp/terraform-exec v0.21.0 h1:uNkLAe95ey5Uux6KJdua6+cv8asgILFVWkd/RG0D2XQ= github.com/hashicorp/terraform-exec v0.21.0/go.mod h1:1PPeMYou+KDUSSeRE9szMZ/oHf4fYUmB923Wzbq1ICg= github.com/hashicorp/terraform-json v0.22.1 h1:xft84GZR0QzjPVWs4lRUwvTcPnegqlyS7orfb5Ltvec= From 66f2ba64a8a479d45efb9b23eab096a5ffda1367 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Thu, 26 Sep 2024 14:55:07 +0200 Subject: [PATCH 18/21] Simplified isFullVariableOverrideDef implementation (#1791) ## Changes Simplified isFullVariableOverrideDef implementation Follow up on https://github.com/databricks/cli/pull/1787 --- bundle/config/root.go | 54 ++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/bundle/config/root.go b/bundle/config/root.go index ff169e4ce..4b1467456 100644 --- a/bundle/config/root.go +++ b/bundle/config/root.go @@ -406,7 +406,14 @@ func (r *Root) MergeTargetOverrides(name string) error { return r.updateWithDynamicValue(root) } -var variableKeywords = []string{"default", "lookup"} +var allowedVariableDefinitions = []([]string){ + {"default", "type", "description"}, + {"default", "type"}, + {"default", "description"}, + {"lookup", "description"}, + {"default"}, + {"lookup"}, +} // isFullVariableOverrideDef checks if the given value is a full syntax varaible override. // A full syntax variable override is a map with either 1 of 2 keys. @@ -423,42 +430,21 @@ func isFullVariableOverrideDef(v dyn.Value) bool { return false } - // If the map has 3 keys, they should be "description", "type" and "default" or "lookup" - if mv.Len() == 3 { - if _, ok := mv.GetByString("type"); ok { - if _, ok := mv.GetByString("description"); ok { - if _, ok := mv.GetByString("default"); ok { - return true - } + for _, keys := range allowedVariableDefinitions { + if len(keys) != mv.Len() { + continue + } + + // Check if the keys are the same. + match := true + for _, key := range keys { + if _, ok := mv.GetByString(key); !ok { + match = false + break } } - return false - } - - // If the map has 2 keys, one of them should be "default" or "lookup" and the other is "type" or "description" - if mv.Len() == 2 { - if _, ok := mv.GetByString("type"); ok { - if _, ok := mv.GetByString("default"); ok { - return true - } - } - - if _, ok := mv.GetByString("description"); ok { - if _, ok := mv.GetByString("default"); ok { - return true - } - - if _, ok := mv.GetByString("lookup"); ok { - return true - } - } - - return false - } - - for _, keyword := range variableKeywords { - if _, ok := mv.GetByString(keyword); ok { + if match { return true } } From 4e8e02738081b74017bcb9d7b440e75ffa08c0d7 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:52:22 +0530 Subject: [PATCH 19/21] Sort tasks by `task_key` before generating the Terraform configuration (#1776) ## Changes Sort the tasks in the resultant `bundle.tf.json`. This is important because configuration from one task can leak into another if the tasks are not sorted. For more details see: 1. https://github.com/databricks/terraform-provider-databricks/issues/3951 2. https://github.com/databricks/terraform-provider-databricks/issues/4011 ## Tests Unit test and manually. For manual testing I used the following configuration: ``` resources: jobs: foo: tasks: - task_key: task-Z notebook_task: notebook_path: nb.py source: GIT existing_cluster_id: 0715-133738-ju0ma84z - task_key: task-1 notebook_task: notebook_path: ${workspace.file_path}/local.py source: WORKSPACE existing_cluster_id: 0715-133738-ju0ma84z depends_on: - task_key: task-Z git_source: git_provider: gitHub git_url: https://github.com/shreyas-goenka/job-source-tmp.git git_branch: main ``` Steps (1): 1. Deploy this bundle. 2. Comment out "source: GIT" 3. Deploy again Before: Deploying this bundle twice would fail. This is because the "source: GIT" would carry over to the next deployment. After: There was no error on the subsequent deployment. Steps (2): 1. Deploy once 2. Deploy again Before: Works correctly but leads to a update API call every time. After: No diff is detected by terraform. --- bundle/deploy/terraform/convert.go | 5 +++ bundle/deploy/terraform/tfdyn/convert_job.go | 33 ++++++++++++++++++- .../terraform/tfdyn/convert_job_test.go | 30 ++++++++++++++--- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index 5a548e3b5..b8993c031 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "sort" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" @@ -82,6 +83,10 @@ func BundleToTerraform(config *config.Root) *schema.Root { conv(src, &dst) if src.JobSettings != nil { + sort.Slice(src.JobSettings.Tasks, func(i, j int) bool { + return src.JobSettings.Tasks[i].TaskKey < src.JobSettings.Tasks[j].TaskKey + }) + for _, v := range src.Tasks { var t schema.ResourceJobTask conv(v, &t) diff --git a/bundle/deploy/terraform/tfdyn/convert_job.go b/bundle/deploy/terraform/tfdyn/convert_job.go index d1e7e73e2..8948e3baf 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job.go +++ b/bundle/deploy/terraform/tfdyn/convert_job.go @@ -3,6 +3,7 @@ package tfdyn import ( "context" "fmt" + "sort" "github.com/databricks/cli/bundle/internal/tf/schema" "github.com/databricks/cli/libs/dyn" @@ -19,8 +20,38 @@ func convertJobResource(ctx context.Context, vin dyn.Value) (dyn.Value, error) { log.Debugf(ctx, "job normalization diagnostic: %s", diag.Summary) } + // Sort the tasks of each job in the bundle by task key. Sorting + // the task keys ensures that the diff computed by terraform is correct and avoids + // recreates. For more details see the NOTE at + // https://registry.terraform.io/providers/databricks/databricks/latest/docs/resources/job#example-usage + // and https://github.com/databricks/terraform-provider-databricks/issues/4011 + // and https://github.com/databricks/cli/pull/1776 + vout := vin + var err error + tasks, ok := vin.Get("tasks").AsSequence() + if ok { + sort.Slice(tasks, func(i, j int) bool { + // We sort the tasks by their task key. Tasks without task keys are ordered + // before tasks with task keys. We do not error for those tasks + // since presence of a task_key is validated for in the Jobs backend. + tk1, ok := tasks[i].Get("task_key").AsString() + if !ok { + return true + } + tk2, ok := tasks[j].Get("task_key").AsString() + if !ok { + return false + } + return tk1 < tk2 + }) + vout, err = dyn.Set(vin, "tasks", dyn.V(tasks)) + if err != nil { + return dyn.InvalidValue, err + } + } + // Modify top-level keys. - vout, err := renameKeys(vin, map[string]string{ + vout, err = renameKeys(vout, map[string]string{ "tasks": "task", "job_clusters": "job_cluster", "parameters": "parameter", diff --git a/bundle/deploy/terraform/tfdyn/convert_job_test.go b/bundle/deploy/terraform/tfdyn/convert_job_test.go index b9e1f967f..695b9ba24 100644 --- a/bundle/deploy/terraform/tfdyn/convert_job_test.go +++ b/bundle/deploy/terraform/tfdyn/convert_job_test.go @@ -42,8 +42,8 @@ func TestConvertJob(t *testing.T) { }, Tasks: []jobs.Task{ { - TaskKey: "task_key", - JobClusterKey: "job_cluster_key", + TaskKey: "task_key_b", + JobClusterKey: "job_cluster_key_b", Libraries: []compute.Library{ { Pypi: &compute.PythonPyPiLibrary{ @@ -55,6 +55,17 @@ func TestConvertJob(t *testing.T) { }, }, }, + { + TaskKey: "task_key_a", + JobClusterKey: "job_cluster_key_a", + }, + { + TaskKey: "task_key_c", + JobClusterKey: "job_cluster_key_c", + }, + { + Description: "missing task key 😱", + }, }, }, Permissions: []resources.Permission{ @@ -100,8 +111,15 @@ func TestConvertJob(t *testing.T) { }, "task": []any{ map[string]any{ - "task_key": "task_key", - "job_cluster_key": "job_cluster_key", + "description": "missing task key 😱", + }, + map[string]any{ + "task_key": "task_key_a", + "job_cluster_key": "job_cluster_key_a", + }, + map[string]any{ + "task_key": "task_key_b", + "job_cluster_key": "job_cluster_key_b", "library": []any{ map[string]any{ "pypi": map[string]any{ @@ -113,6 +131,10 @@ func TestConvertJob(t *testing.T) { }, }, }, + map[string]any{ + "task_key": "task_key_c", + "job_cluster_key": "job_cluster_key_c", + }, }, }, out.Job["my_job"]) From a1dca56abfb16879d55e113d98735e10a97f558e Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 27 Sep 2024 11:30:39 +0200 Subject: [PATCH 20/21] Trim trailing whitespace (#1794) ## Changes Trailing whitespace is trimmed per the VS Code settings for this repository. ## Tests n/a --- bundle/config/mutator/python/python_mutator.go | 4 ++-- bundle/config/mutator/python/python_mutator_test.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index fbf3b7e0b..3d4a502f7 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -228,12 +228,12 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return output, pythonDiagnostics } -const installExplanation = `If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, +const installExplanation = `If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, and that the wheel is installed in the Python environment: $ .venv/bin/pip install -e . -If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, +If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, or activate the environment before running CLI commands: experimental: diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index bf12b2499..7a419d799 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -570,12 +570,12 @@ func TestExplainProcessErr(t *testing.T) { Explanation: 'databricks-pydabs' library is not installed in the Python environment. -If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, +If using Python wheels, ensure that 'databricks-pydabs' is included in the dependencies, and that the wheel is installed in the Python environment: $ .venv/bin/pip install -e . -If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, +If using a virtual environment, ensure it is specified as the venv_path property in databricks.yml, or activate the environment before running CLI commands: experimental: From 56cd96cb939207df3403546e998579a4cc768cf6 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 27 Sep 2024 11:32:54 +0200 Subject: [PATCH 21/21] Move trampoline code into trampoline package (#1793) ## Changes Doing this to make room for PyDABs under `bundle/python`. ## Tests n/a --- bundle/phases/deploy.go | 4 ++-- bundle/phases/initialize.go | 4 ++-- .../conditional_transform_test.go | 2 +- .../warning.go => trampoline/python_dbr_warning.go} | 2 +- .../python_dbr_warning_test.go} | 2 +- .../transform.go => trampoline/python_wheel.go} | 10 +++++----- .../python_wheel_test.go} | 2 +- bundle/{config/mutator => trampoline}/trampoline.go | 3 ++- .../{config/mutator => trampoline}/trampoline_test.go | 2 +- 9 files changed, 16 insertions(+), 15 deletions(-) rename bundle/{python => trampoline}/conditional_transform_test.go (99%) rename bundle/{python/warning.go => trampoline/python_dbr_warning.go} (99%) rename bundle/{python/warning_test.go => trampoline/python_dbr_warning_test.go} (99%) rename bundle/{python/transform.go => trampoline/python_wheel.go} (94%) rename bundle/{python/transform_test.go => trampoline/python_wheel_test.go} (99%) rename bundle/{config/mutator => trampoline}/trampoline.go (99%) rename bundle/{config/mutator => trampoline}/trampoline_test.go (99%) diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index 097c561eb..cb0ecf75d 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -15,8 +15,8 @@ import ( "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/bundle/permissions" - "github.com/databricks/cli/bundle/python" "github.com/databricks/cli/bundle/scripts" + "github.com/databricks/cli/bundle/trampoline" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/sync" terraformlib "github.com/databricks/cli/libs/terraform" @@ -157,7 +157,7 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator { artifacts.CleanUp(), libraries.ExpandGlobReferences(), libraries.Upload(), - python.TransformWheelTask(), + trampoline.TransformWheelTask(), files.Upload(outputHandler), deploy.StateUpdate(), deploy.StatePush(), diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 8039a4f13..93ce61b25 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -9,8 +9,8 @@ import ( "github.com/databricks/cli/bundle/deploy/metadata" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/permissions" - "github.com/databricks/cli/bundle/python" "github.com/databricks/cli/bundle/scripts" + "github.com/databricks/cli/bundle/trampoline" ) // The initialize phase fills in defaults and connects to the workspace. @@ -66,7 +66,7 @@ func Initialize() bundle.Mutator { mutator.ConfigureWSFS(), mutator.TranslatePaths(), - python.WrapperWarning(), + trampoline.WrapperWarning(), permissions.ApplyBundlePermissions(), permissions.FilterCurrentUser(), metadata.AnnotateJobs(), diff --git a/bundle/python/conditional_transform_test.go b/bundle/trampoline/conditional_transform_test.go similarity index 99% rename from bundle/python/conditional_transform_test.go rename to bundle/trampoline/conditional_transform_test.go index 1d397f7a7..26e67154e 100644 --- a/bundle/python/conditional_transform_test.go +++ b/bundle/trampoline/conditional_transform_test.go @@ -1,4 +1,4 @@ -package python +package trampoline import ( "context" diff --git a/bundle/python/warning.go b/bundle/trampoline/python_dbr_warning.go similarity index 99% rename from bundle/python/warning.go rename to bundle/trampoline/python_dbr_warning.go index 0e9d8bef0..f62e9eab4 100644 --- a/bundle/python/warning.go +++ b/bundle/trampoline/python_dbr_warning.go @@ -1,4 +1,4 @@ -package python +package trampoline import ( "context" diff --git a/bundle/python/warning_test.go b/bundle/trampoline/python_dbr_warning_test.go similarity index 99% rename from bundle/python/warning_test.go rename to bundle/trampoline/python_dbr_warning_test.go index a5ab75632..d293c9477 100644 --- a/bundle/python/warning_test.go +++ b/bundle/trampoline/python_dbr_warning_test.go @@ -1,4 +1,4 @@ -package python +package trampoline import ( "context" diff --git a/bundle/python/transform.go b/bundle/trampoline/python_wheel.go similarity index 94% rename from bundle/python/transform.go rename to bundle/trampoline/python_wheel.go index 9d3b1ab6a..8e309a625 100644 --- a/bundle/python/transform.go +++ b/bundle/trampoline/python_wheel.go @@ -1,4 +1,4 @@ -package python +package trampoline import ( "context" @@ -69,7 +69,7 @@ func TransformWheelTask() bundle.Mutator { res := b.Config.Experimental != nil && b.Config.Experimental.PythonWheelWrapper return res, nil }, - mutator.NewTrampoline( + NewTrampoline( "python_wheel", &pythonTrampoline{}, NOTEBOOK_TEMPLATE, @@ -94,9 +94,9 @@ func (t *pythonTrampoline) CleanUp(task *jobs.Task) error { return nil } -func (t *pythonTrampoline) GetTasks(b *bundle.Bundle) []mutator.TaskWithJobKey { +func (t *pythonTrampoline) GetTasks(b *bundle.Bundle) []TaskWithJobKey { r := b.Config.Resources - result := make([]mutator.TaskWithJobKey, 0) + result := make([]TaskWithJobKey, 0) for k := range b.Config.Resources.Jobs { tasks := r.Jobs[k].JobSettings.Tasks for i := range tasks { @@ -110,7 +110,7 @@ func (t *pythonTrampoline) GetTasks(b *bundle.Bundle) []mutator.TaskWithJobKey { continue } - result = append(result, mutator.TaskWithJobKey{ + result = append(result, TaskWithJobKey{ JobKey: k, Task: task, }) diff --git a/bundle/python/transform_test.go b/bundle/trampoline/python_wheel_test.go similarity index 99% rename from bundle/python/transform_test.go rename to bundle/trampoline/python_wheel_test.go index c7bddca14..40c3b38f3 100644 --- a/bundle/python/transform_test.go +++ b/bundle/trampoline/python_wheel_test.go @@ -1,4 +1,4 @@ -package python +package trampoline import ( "context" diff --git a/bundle/config/mutator/trampoline.go b/bundle/trampoline/trampoline.go similarity index 99% rename from bundle/config/mutator/trampoline.go rename to bundle/trampoline/trampoline.go index dcca50149..1dc1c4463 100644 --- a/bundle/config/mutator/trampoline.go +++ b/bundle/trampoline/trampoline.go @@ -1,4 +1,4 @@ -package mutator +package trampoline import ( "context" @@ -23,6 +23,7 @@ type TrampolineFunctions interface { GetTasks(b *bundle.Bundle) []TaskWithJobKey CleanUp(task *jobs.Task) error } + type trampoline struct { name string functions TrampolineFunctions diff --git a/bundle/config/mutator/trampoline_test.go b/bundle/trampoline/trampoline_test.go similarity index 99% rename from bundle/config/mutator/trampoline_test.go rename to bundle/trampoline/trampoline_test.go index 08d3c8220..08a290f93 100644 --- a/bundle/config/mutator/trampoline_test.go +++ b/bundle/trampoline/trampoline_test.go @@ -1,4 +1,4 @@ -package mutator +package trampoline import ( "context"